diff --git a/DDPG_study18_best_test_varianz.pkl b/DDPG_study18_best_test_varianz.pkl
new file mode 100644
index 00000000..c87e3e11
Binary files /dev/null and b/DDPG_study18_best_test_varianz.pkl differ
diff --git a/OMG_Integrator_Actor/3/model.zip b/OMG_Integrator_Actor/3/model.zip
new file mode 100644
index 00000000..351a7617
Binary files /dev/null and b/OMG_Integrator_Actor/3/model.zip differ
diff --git a/OMG_Integrator_Actor/32/model.zip b/OMG_Integrator_Actor/32/model.zip
new file mode 100644
index 00000000..f7b22eb7
Binary files /dev/null and b/OMG_Integrator_Actor/32/model.zip differ
diff --git a/OMG_Integrator_Actor_i_load_feature/0/model.zip b/OMG_Integrator_Actor_i_load_feature/0/model.zip
new file mode 100644
index 00000000..393106b1
Binary files /dev/null and b/OMG_Integrator_Actor_i_load_feature/0/model.zip differ
diff --git a/OMG_Integrator_Actor_i_load_feature/1/model.zip b/OMG_Integrator_Actor_i_load_feature/1/model.zip
new file mode 100644
index 00000000..bdfdac14
Binary files /dev/null and b/OMG_Integrator_Actor_i_load_feature/1/model.zip differ
diff --git a/OMG_Integrator_Actor_i_load_feature_2/1/model.zip b/OMG_Integrator_Actor_i_load_feature_2/1/model.zip
new file mode 100644
index 00000000..21ec1154
Binary files /dev/null and b/OMG_Integrator_Actor_i_load_feature_2/1/model.zip differ
diff --git a/Pipi.pkl b/Pipi.pkl
new file mode 100644
index 00000000..4fcaf4dc
Binary files /dev/null and b/Pipi.pkl differ
diff --git a/experiments/DQN/env/Custom_Cartpole.py b/experiments/DQN/env/Custom_Cartpole.py
new file mode 100644
index 00000000..fb627916
--- /dev/null
+++ b/experiments/DQN/env/Custom_Cartpole.py
@@ -0,0 +1,226 @@
+"""
+Classic cart-pole system implemented by Rich Sutton et al.
+Copied from http://incompleteideas.net/sutton/book/code/pole.c
+permalink: https://perma.cc/C9ZM-652R
+"""
+
+import math
+import gym
+from gym import spaces, logger
+from gym.utils import seeding
+import numpy as np
+
+
+class CartPoleEnv(gym.Env):
+    """
+    Description:
+        A pole is attached by an un-actuated joint to a cart, which moves along
+        a frictionless track. The pendulum starts upright, and the goal is to
+        prevent it from falling over by increasing and reducing the cart's
+        velocity.
+
+    Source:
+        This environment corresponds to the version of the cart-pole problem
+        described by Barto, Sutton, and Anderson
+
+    Observation:
+        Type: Box(4)
+        Num     Observation               Min                     Max
+        0       Cart Position             -4.8                    4.8
+        1       Cart Velocity             -Inf                    Inf
+        2       Pole Angle                -0.418 rad (-24 deg)    0.418 rad (24 deg)
+        3       Pole Angular Velocity     -Inf                    Inf
+
+
+    Actions:
+        Type: Discrete(2)
+        Num   Action
+        0     Push cart to the left
+        1     Push cart to the right
+
+        Note: The amount the velocity that is reduced or increased is not
+        fixed; it depends on the angle the pole is pointing. This is because
+        the center of gravity of the pole increases the amount of energy needed
+        to move the cart underneath it
+
+    Reward:
+        Reward is 1 for every step taken, including the termination step
+
+    Starting State:
+        All observations are assigned a uniform random value in [-0.05..0.05]
+
+    Episode Termination:
+        Pole Angle is more than 12 degrees.
+        Cart Position is more than 2.4 (center of the cart reaches the edge of
+        the display).
+        Episode length is greater than 200.
+        Solved Requirements:
+        Considered solved when the average return is greater than or equal to
+        195.0 over 100 consecutive trials.
+    """
+
+    metadata = {
+        'render.modes': ['human', 'rgb_array'],
+        'video.frames_per_second': 50
+    }
+
+    def __init__(self):
+        self.gravity = 9.8
+        self.masscart = 1.0
+        self.masspole = 0.1
+        self.total_mass = (self.masspole + self.masscart)
+        self.length = 0.5  # actually half the pole's length
+        self.polemass_length = (self.masspole * self.length)
+        self.force_mag = 10.0
+        self.tau = 0.02  # seconds between state updates
+        self.kinematics_integrator = 'euler'
+
+        # Angle at which to fail the episode
+        self.theta_threshold_radians = 12 * 2 * math.pi / 360
+        self.x_threshold = 2.4
+
+        # Angle limit set to 2 * theta_threshold_radians so failing observation
+        # is still within bounds.
+        high = np.array([self.x_threshold * 2,
+                         np.finfo(np.float32).max,
+                         self.theta_threshold_radians * 2,
+                         np.finfo(np.float32).max],
+                        dtype=np.float32)
+
+        self.action_space = spaces.Discrete(2)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
+
+        self.seed()
+        self.viewer = None
+        self.state = None
+
+        self.steps_beyond_done = None
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def step(self, action):
+        err_msg = "%r (%s) invalid" % (action, type(action))
+        assert self.action_space.contains(action), err_msg
+
+        x, x_dot, theta, theta_dot = self.state
+        force = self.force_mag if action == 1 else -self.force_mag
+        costheta = math.cos(theta)
+        sintheta = math.sin(theta)
+
+        # For the interested reader:
+        # https://coneural.org/florian/papers/05_cart_pole.pdf
+        temp = (force + self.polemass_length * theta_dot ** 2 * sintheta) / self.total_mass
+        thetaacc = (self.gravity * sintheta - costheta * temp) / (
+                    self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass))
+        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
+
+        if self.kinematics_integrator == 'euler':
+            x = x + self.tau * x_dot
+            x_dot = x_dot + self.tau * xacc
+            theta = theta + self.tau * theta_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+        else:  # semi-implicit euler
+            x_dot = x_dot + self.tau * xacc
+            x = x + self.tau * x_dot
+            theta_dot = theta_dot + self.tau * thetaacc
+            theta = theta + self.tau * theta_dot
+
+        if theta >= np.pi:
+            theta -= 2 * np.pi
+        elif theta <= -np.pi:
+            theta += 2 * np.pi
+
+        self.state = (x, x_dot, theta, theta_dot)
+
+        done = bool(
+            x < -self.x_threshold
+            or x > self.x_threshold
+            # or theta < -self.theta_threshold_radians
+            # or theta > self.theta_threshold_radians
+        )
+
+        if not done:
+            reward = 1 - (abs(theta) / np.pi)
+            # reward = 1.0
+        elif self.steps_beyond_done is None:
+            # Pole just fell!
+            self.steps_beyond_done = 0
+            reward = 0.0
+        else:
+            if self.steps_beyond_done == 0:
+                logger.warn(
+                    "You are calling 'step()' even though this "
+                    "environment has already returned done = True. You "
+                    "should always call 'reset()' once you receive 'done = "
+                    "True' -- any further steps are undefined behavior."
+                )
+            self.steps_beyond_done += 1
+            reward = 0.0
+
+        return np.array(self.state), reward, done, {}
+
+    def reset(self):
+        self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
+        self.steps_beyond_done = None
+        return np.array(self.state)
+
+    def render(self, mode='human'):
+        screen_width = 600
+        screen_height = 400
+
+        world_width = self.x_threshold * 2
+        scale = screen_width / world_width
+        carty = 100  # TOP OF CART
+        polewidth = 10.0
+        polelen = scale * (2 * self.length)
+        cartwidth = 50.0
+        cartheight = 30.0
+
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(screen_width, screen_height)
+            l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
+            axleoffset = cartheight / 4.0
+            cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
+            self.carttrans = rendering.Transform()
+            cart.add_attr(self.carttrans)
+            self.viewer.add_geom(cart)
+            l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2
+            pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
+            pole.set_color(.8, .6, .4)
+            self.poletrans = rendering.Transform(translation=(0, axleoffset))
+            pole.add_attr(self.poletrans)
+            pole.add_attr(self.carttrans)
+            self.viewer.add_geom(pole)
+            self.axle = rendering.make_circle(polewidth / 2)
+            self.axle.add_attr(self.poletrans)
+            self.axle.add_attr(self.carttrans)
+            self.axle.set_color(.5, .5, .8)
+            self.viewer.add_geom(self.axle)
+            self.track = rendering.Line((0, carty), (screen_width, carty))
+            self.track.set_color(0, 0, 0)
+            self.viewer.add_geom(self.track)
+
+            self._pole_geom = pole
+
+        if self.state is None:
+            return None
+
+        # Edit the pole polygon vertex
+        pole = self._pole_geom
+        l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2
+        pole.v = [(l, b), (l, t), (r, t), (r, b)]
+
+        x = self.state
+        cartx = x[0] * scale + screen_width / 2.0  # MIDDLE OF CART
+        self.carttrans.set_translation(cartx, carty)
+        self.poletrans.set_rotation(-x[2])
+
+        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+
+    def close(self):
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
diff --git a/experiments/DQN/train_dqn_cont_env_test.py b/experiments/DQN/train_dqn_cont_env_test.py
new file mode 100644
index 00000000..84c85bc8
--- /dev/null
+++ b/experiments/DQN/train_dqn_cont_env_test.py
@@ -0,0 +1,176 @@
+from multiprocessing import Pool
+from typing import Union
+
+import gym
+import numpy as np
+import pandas as pd
+import torch as th
+from stable_baselines3 import DQN
+
+import matplotlib.pyplot as plt
+
+# env = gym.make("CartPole-v0")
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.type_aliases import GymStepReturn, GymObs
+
+from experiments.DQN.env.Custom_Cartpole import CartPoleEnv
+
+
+# return_all_agents = []
+
+class FeatureWrapper(Monitor):
+
+    def __init__(self, env, training_episode_length):
+        """
+        :
+        """
+        state_constraints = [[- 2.4, 2.4],
+                             [-7, 7],
+                             [-np.pi, +np.pi],
+                             [-10, 10]]
+        self.state_low = np.array(state_constraints)[:, 0]
+        self.state_high = np.array(state_constraints)[:, 1]
+        self.delta_v = 0.15
+
+        super().__init__(env)
+        self.training_episode_length = training_episode_length
+        self._n_training_steps = 0
+        self.episode_return = []
+        self.observation_space = gym.spaces.Box(low=np.full(env.observation_space.shape[0] + 1, -np.inf),
+                                                high=np.full(env.observation_space.shape[0] + 1, np.inf))
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        """
+
+        obs, reward, done, info = super().step(action)
+
+        done = False
+        if np.any(np.abs(self.env.state) > self.state_high):
+            reward = -1
+            done = True
+
+        self._n_training_steps += 1
+
+        if self._n_training_steps == self.training_episode_length:
+            done = True
+            #info["timelimit_reached"] = True
+
+        if self._n_training_steps == self.training_episode_length or done:
+            self.episode_return.append(sum(self.rewards))
+
+        norm_state = [(obs[0] - self.state_low[0]) / (self.state_high[0] - self.state_low[0]) * 2 - 1,
+                      (obs[1] - self.state_low[1]) / (self.state_high[1] - self.state_low[1]) * 2 - 1,
+                      np.cos(obs[2]),
+                      np.sin(obs[2]),
+                      (obs[3] - self.state_low[3]) / (self.state_high[3] - self.state_low[3]) * 2 - 1,
+                      ]
+
+        return norm_state, reward, done, info
+
+    def reset(self, **kwargs) -> GymObs:
+        """
+
+        """
+        obs = super().reset()
+
+        # self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
+        # self.state[0] = self.np_random.uniform(low=-2.4, high=2.4, size=(1,))
+        # self.state[1] = self.np_random.uniform(low=-7, high=7, size=(1,))
+        # self.state[2] = self.np_random.uniform(low=-np.pi, high=np.pi, size=(1,))
+        # self.state[3] = self.np_random.uniform(low=-10, high=10, size=(1,))
+
+        while True:
+            self.env.state = np.random.uniform(low=self.state_low,
+                                               high=self.state_high)
+            if np.abs(self.env.state[3]) < 1:
+                if self.env.state[1] > 0:
+                    if np.sqrt((2.4 - self.env.state[0]) / self.env.tau * self.delta_v) > self.env.state[1]:
+                        break
+                if self.env.state[1] < 0:
+                    if -np.sqrt((2.4 + self.env.state[0]) / self.env.tau * self.delta_v) < self.env.state[1]:
+                        break
+
+        self.steps_beyond_done = None
+
+        self._n_training_steps = 0
+
+        norm_state = [(self.env.state[0] - self.state_low[0]) / (self.state_high[0] - self.state_low[0]) * 2 - 1,
+                      (self.env.state[1] - self.state_low[1]) / (self.state_high[1] - self.state_low[1]) * 2 - 1,
+                      np.cos(self.env.state[2]),
+                      np.sin(self.env.state[2]),
+                      (self.env.state[3] - self.state_low[3]) / (self.state_high[3] - self.state_low[3]) * 2 - 1,
+                      ]
+
+        return norm_state  #np.array(self.state)
+
+
+# for i in range(2):
+def bla(idx):
+    env = FeatureWrapper(CartPoleEnv(), training_episode_length=200)
+    # env = FeatureWrapper(gym.make("CartPole-v1"))
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=[200, 200, 200])
+
+    model = DQN("MlpPolicy", env, learning_rate=1e-3, buffer_size=10000, learning_starts=1000, batch_size=32,
+                tau=0.001,
+                gamma=0.99, train_freq=(1, "step"), gradient_steps=-1, optimize_memory_usage=False,
+                target_update_interval=1,
+                exploration_fraction=0.4, exploration_initial_eps=0.5, exploration_final_eps=0.01, max_grad_norm=1000,
+                tensorboard_log='TB_log/', create_eval_env=False, policy_kwargs=policy_kwargs, verbose=1, seed=None,
+                device='auto',
+                _init_setup_model=True)
+
+    model.q_net.q_net._modules['1'].negative_slope = 0.1
+    model.q_net.q_net._modules['3'].negative_slope = 0.1
+    model.q_net.q_net._modules['5'].negative_slope = 0.1
+    model.q_net_target.q_net._modules['1'].negative_slope = 0.1
+    model.q_net_target.q_net._modules['3'].negative_slope = 0.1
+    model.q_net_target.q_net._modules['5'].negative_slope = 0.1
+
+    # model = DQN("MlpPolicy", env, verbose=1)
+
+    # learn(total_timesteps, callback=None, log_interval=4, eval_env=None, eval_freq=- 1, n_eval_episodes=5,
+    #      tb_log_name='DQN', eval_log_path=None, reset_n um_timesteps=True)
+    model.learn(total_timesteps=100000, log_interval=4)
+    return np.array(env.episode_return)
+
+
+#with Pool(2) as p:
+return_all_agents = map(bla, range(20))
+
+
+
+# return_all_agents.append(np.array(env.episode_return))
+
+
+# asd = min(map(len,return_all_agents))
+# np_return_all_agents = np.array([l[:asd] for l in return_all_agents])
+
+df = pd.DataFrame(return_all_agents)
+
+df.to_pickle("DQN_original20Agents_sb3_original_lowBuffer")
+
+m = df.mean()
+s = df.std()
+
+episode = pd.Series(range(0, df.shape[1]))
+
+plt.plot(episode, m)
+plt.fill_between(episode, m - s, m + s, facecolor='r')
+plt.ylabel('Average return')
+plt.xlabel('Episode')
+plt.ylim([0, 200])
+plt.grid()
+plt.title('20 Agent Original Code_original_lowBuffer')
+plt.show()
+
+"""
+obs = env.reset()
+while True:
+    action, _states = model.predict(obs, deterministic=True)
+    obs, reward, done, info = env.step(action)
+    env.render()
+    if done:
+      obs = env.reset()
+"""
diff --git a/experiments/DQN/viz.py b/experiments/DQN/viz.py
new file mode 100644
index 00000000..26300eeb
--- /dev/null
+++ b/experiments/DQN/viz.py
@@ -0,0 +1,33 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+
+agents25_original = pd.read_pickle("DQN_ORIGINAL_5Agents")
+agents50_fix = pd.read_pickle("DQN_WITH_fix5Agents")
+
+m = agents25_original.mean()
+s = agents25_original.std()
+
+episode = pd.Series(range(0, agents25_original.shape[1]))
+
+plt.plot(episode, m)
+plt.fill_between(episode, m - s, m + s, facecolor='r')
+plt.ylabel('Average return')
+plt.xlabel('Episode')
+plt.ylim([0, 200])
+plt.grid()
+plt.title('25 Agent Original')
+plt.show()
+
+m_fix = agents50_fix.mean()
+s_fix = agents50_fix.std()
+
+episode_fix = pd.Series(range(0, agents50_fix.shape[1]))
+
+plt.plot(episode_fix, m_fix)
+plt.fill_between(episode_fix, m_fix - s_fix, m_fix + s_fix, facecolor='r')
+plt.ylabel('Average return')
+plt.xlabel('Episode')
+plt.ylim([0, 200])
+plt.grid()
+plt.title('50 Agent Fixed Code')
+plt.show()
diff --git a/experiments/GEM/env/env_wrapper_GEM.py b/experiments/GEM/env/env_wrapper_GEM.py
new file mode 100644
index 00000000..2af43069
--- /dev/null
+++ b/experiments/GEM/env/env_wrapper_GEM.py
@@ -0,0 +1,480 @@
+import platform
+from typing import Union
+
+import gym
+import numpy as np
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.type_aliases import GymStepReturn
+
+from experiments.GEM.util.config import cfg
+from experiments.hp_tune.env.vctrl_single_inv import net
+from openmodelica_microgrid_gym.util import Fastqueue
+
+
+class BaseWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000,
+                 recorder=None, n_trail="", gamma=0,
+                 number_learing_steps=500000, number_past_vals=0):
+        """
+        Base Env Wrapper to add features to the env-observations and adds information to env.step output which can be
+        used in case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features - 2, -np.inf),  # -2 because v_dq is removed
+            high=np.full(env.observation_space.shape[0] + number_of_features - 2, np.inf))
+
+        # increase action-space for PI-seperation
+        # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1))
+
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self.n_episode = 0
+        self.reward_episode_mean = []
+        self.i_d_mess = []
+        self.i_q_mess = []
+        self.i_d_ref = []
+        self.i_q_ref = []
+        self.action_d = []
+        self.action_q = []
+        self.n_trail = n_trail
+        self.used_P = np.zeros(self.action_space.shape)
+        self.gamma = gamma
+        self.number_learing_steps = number_learing_steps
+        self.delay_queues = [Fastqueue(1, 2) for _ in range(number_past_vals)]
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+
+        obs, reward, done, info = super().step(action)
+        reward = reward * (1 - self.gamma)
+        # super().render()
+
+        self._n_training_steps += 1
+
+        if cfg['loglevel'] == 'train':
+            self.i_d_mess.append(np.float64(obs[0]))
+            self.i_q_mess.append(np.float64(obs[1]))
+            self.i_d_ref.append(np.float64(obs[2]))
+            self.i_q_ref.append(np.float64(obs[3]))
+            self.action_d.append(np.float64(action[0]))
+            self.action_q.append(np.float64(action[1]))
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+            super().close()
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "i_d_mess": self.i_d_mess,
+                                "i_q_mess": self.i_q_mess,
+                                "v_d_mess": self.env.env.v_d_mess,
+                                "v_q_mess": self.env.env.v_q_mess,
+                                "i_d_ref": self.i_d_ref,
+                                "i_q_ref": self.i_q_ref,
+                                'action_d': self.action_d,
+                                'action_q': self.action_q,
+                                "Rewards": self.rewards,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": cfg['STUDY_NAME'],
+                                "Reward function": 'MRE'
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.i_d_mess = []
+                self.i_q_mess = []
+                self.i_d_ref = []
+                self.i_q_ref = []
+                self.action_d = []
+                self.action_q = []
+        """
+        Features
+        """
+        error = (obs[2:4] - obs[0:2]) / 2  # control error: v_setpoint - v_mess
+        obs = np.append(obs, error)
+        obs = np.append(obs, self.used_P)
+        obs_delay_array = self.shift_and_append(obs[0:2])
+        obs = np.append(obs, obs_delay_array)
+
+        # todo efficiency?
+        self.used_P = np.copy(action)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        [x.clear() for x in self.delay_queues]
+        obs = super().reset()
+
+        if cfg['loglevel'] == 'train':
+            self.i_d_mess.append(np.float64(obs[0]))
+            self.i_q_mess.append(np.float64(obs[1]))
+            self.i_d_ref.append(np.float64(obs[2]))
+            self.i_q_ref.append(np.float64(obs[3]))
+            self.action_d.append(np.float64(0))
+            self.action_q.append(np.float64(0))
+
+        self._n_training_steps = 0
+        self.used_P = np.zeros(self.action_space.shape)
+
+        # if cfg['loglevel'] == 'train':
+        # print("Log Data for Taining in Basewrapper L2xx?")
+
+        """
+        Features
+        """
+        # SP wir an den State gehangen!
+        error = (obs[2:4] - obs[0:2]) / 2  # control error: v_setpoint - v_mess
+        obs = np.append(obs, error)
+        obs = np.append(obs, self.used_P)
+        obs_delay_array = self.shift_and_append(obs[0:2])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs
+
+    def shift_and_append(self, obs):
+        """
+        Takes the observation and shifts throught the queue
+        every queue output is added to total obs
+        """
+        obs_delay_array = np.array([])
+        obs_temp = obs
+        for queue in self.delay_queues:
+            obs_temp = queue.shift(obs_temp)
+            obs_delay_array = np.append(obs_delay_array, obs_temp)
+
+        return obs_delay_array
+
+
+class FeatureWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0,
+                 penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0,
+                 number_learing_steps=500000):  # , use_past_vals=False, number_past_vals=0):
+        """
+        Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in
+        case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features - 2, -np.inf),
+            # -2 because, v_dq is removed
+            high=np.full(env.observation_space.shape[0] + number_of_features - 2, np.inf))
+
+        # increase action-space for PI-seperation
+        # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1))
+
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_d_mess = []
+        self.i_q_mess = []
+        self.i_d_ref = []
+        self.i_q_ref = []
+        self.action_d = []
+        self.action_q = []
+        self.n_episode = 0
+        self.reward_episode_mean = []
+        self.n_trail = n_trail
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.integrator_weight = integrator_weight
+        self.antiwindup_weight = antiwindup_weight
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+        self.gamma = gamma
+        self.penalty_I_weight = penalty_I_weight
+        self.penalty_P_weight = penalty_P_weight
+        self.t_start_penalty_I = t_start_penalty_I
+        self.t_start_penalty_P = t_start_penalty_P
+        self.number_learing_steps = number_learing_steps
+        self.integrator_sum_list0 = []
+        self.integrator_sum_list1 = []
+        self.action_P0 = []
+        self.action_P1 = []
+        self.action_I0 = []
+        self.action_I1 = []
+
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+        action_P = action[0:2]
+        action_I = action[2:4]
+
+        self.integrator_sum += action_I * self.integrator_weight
+
+        action_PI = action_P + self.integrator_sum
+
+        # check if m_abc will be clipped
+        if np.any(abs(action_PI) > 1):
+
+            clipped_action = np.clip(action_PI, -1, 1)
+
+            delta_action = clipped_action - action_PI
+            # if, reduce integrator by clipped delta
+            # action_delta = abc_to_dq0(delta_action, self.env.net.components[0].phase)
+            self.integrator_sum += delta_action * self.antiwindup_weight
+
+            """
+            clip_reward = np.clip(np.sum(np.abs(delta_action) * \
+                                         (-1 / (self.env.net.components[0].v_lim / self.env.net.components[
+                                             0].v_DC))) / 3 * (1 - self.gamma),
+                                  -1, 0)
+            """
+            clip_reward = 0
+
+            action_PI = clipped_action
+
+        else:
+            clip_reward = 0
+
+        obs, reward, done, info = super().step(action_PI)
+        reward = reward + clip_reward
+        reward = reward * (1 - self.gamma)
+
+        #super().render()
+
+        integrator_penalty = np.sum(-((np.abs(action_I)) ** 0.5)) * (1 - self.gamma) / 3
+        # action_P_penalty = - np.sum((np.abs(action_P - self.used_P)) ** 0.5) * (1 - self.gamma) / 3
+        action_P_penalty = np.sum(-((np.abs(action_P)) ** 0.5)) * (1 - self.gamma) / 3
+
+        # reward_weight is = 1
+
+        if self.total_steps > self.t_start_penalty_I:
+            penalty_I_weight_scale = 1 / (self.t_start_penalty_I - self.number_learing_steps) * self.total_steps - \
+                                     self.number_learing_steps / (self.t_start_penalty_I - self.number_learing_steps)
+
+        else:
+            penalty_I_weight_scale = 1
+
+        if self.total_steps > self.t_start_penalty_P:
+            penalty_P_weight_scale = 1 / (self.t_start_penalty_P - self.number_learing_steps) * self.total_steps - \
+                                     self.number_learing_steps / (self.t_start_penalty_P - self.number_learing_steps)
+
+        else:
+
+            penalty_P_weight_scale = 1
+
+        reward = (reward + (self.penalty_I_weight * penalty_I_weight_scale) * integrator_penalty
+                  + self.penalty_P_weight * penalty_P_weight_scale * action_P_penalty) \
+                 / (1 + self.penalty_I_weight * penalty_I_weight_scale + self.penalty_P_weight * penalty_P_weight_scale)
+
+        self._n_training_steps += 1
+
+        if cfg['loglevel'] == 'train':
+            self.i_d_mess.append(np.float64(obs[0]))
+            self.i_q_mess.append(np.float64(obs[1]))
+            self.i_d_ref.append(np.float64(obs[2]))
+            self.i_q_ref.append(np.float64(obs[3]))
+            self.action_d.append(np.float64(action[0]))
+            self.action_q.append(np.float64(action[1]))
+            self.integrator_sum_list0.append(self.integrator_sum[0])
+            self.integrator_sum_list1.append(self.integrator_sum[1])
+            self.action_P0.append(np.float64(action_P[0]))
+            self.action_P1.append(np.float64(action_P[1]))
+            self.action_I0.append(np.float64(action_I[0]))
+            self.action_I1.append(np.float64(action_I[1]))
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+            super().close()
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "i_d_mess": self.i_d_mess,
+                                "i_q_mess": self.i_q_mess,
+                                "v_d_mess": self.env.env.v_d_mess,
+                                "v_q_mess": self.env.env.v_q_mess,
+                                "i_d_ref": self.i_d_ref,
+                                "i_q_ref": self.i_q_ref,
+                                'action_d': self.action_d,
+                                'action_q': self.action_q,
+                                "Rewards": self.rewards,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": cfg['STUDY_NAME'],
+                                "Reward function": 'rew.rew_fun_dq0',
+                                'Integrator0': self.integrator_sum_list0,
+                                'Integrator1': self.integrator_sum_list1,
+                                'actionP0': self.action_P0,
+                                'actionP1': self.action_P1,
+                                'actionI0': self.action_I0,
+                                'actionI1': self.action_I1,
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.i_d_mess = []
+                self.i_q_mess = []
+                self.i_d_ref = []
+                self.i_q_ref = []
+                self.action_d = []
+                self.action_q = []
+
+            # if self._n_training_steps > 500:
+            # super().close()
+
+        """
+        Features
+        """
+        error = (obs[2:4] - obs[0:2]) / 2  # control error: v_setpoint - v_mess
+        obs = np.append(obs, error)
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+
+        self.used_P = np.copy(action_P)
+        self.used_I = np.copy(self.integrator_sum)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+        obs = super().reset()
+
+        if cfg['loglevel'] == 'train':
+            self.i_d_mess.append(np.float64(obs[0]))
+            self.i_q_mess.append(np.float64(obs[1]))
+            self.i_d_ref.append(np.float64(obs[2]))
+            self.i_q_ref.append(np.float64(obs[3]))
+            self.action_d.append(np.float64(0))
+            self.action_q.append(np.float64(0))
+
+        self._n_training_steps = 0
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+
+        """"
+        Features
+        """
+        # SP wir an den State gehangen!
+        error = (obs[2:4] - obs[0:2]) / 2  # control error: v_setpoint - v_mess
+        obs = np.append(obs, error)
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, self.used_action)
+
+        return obs
+
+
+
+
+class FeatureWrapper_pastVals(FeatureWrapper):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 500000,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0,
+                 penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0,
+                 number_learing_steps=500000, number_past_vals=10):
+        """
+        Env Wrapper which adds the number_past_vals voltage ([3:6]!!!) observations to the observations.
+        Initialized with zeros!
+        """
+        super().__init__(env, number_of_features, training_episode_length,
+                         recorder, n_trail, integrator_weight, antiwindup_weight, gamma,
+                         penalty_I_weight, penalty_P_weight, t_start_penalty_I, t_start_penalty_P,
+                         number_learing_steps)
+
+        # self.observation_space = gym.spaces.Box(
+        #    low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+        #    high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        self.delay_queues = [Fastqueue(1, 2) for _ in range(number_past_vals)]
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        obs, reward, done, info = super().step(action)
+        obs_delay_array = self.shift_and_append(obs[0:2])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        [x.clear() for x in self.delay_queues]
+        obs = super().reset()
+        obs_delay_array = self.shift_and_append(obs[0:2])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs
+
+    def shift_and_append(self, obs):
+        """
+        Takes the observation and shifts throught the queue
+        every queue output is added to total obs
+        """
+        obs_delay_array = np.array([])
+        obs_temp = obs
+        for queue in self.delay_queues:
+            obs_temp = queue.shift(obs_temp)
+            obs_delay_array = np.append(obs_delay_array, obs_temp)
+
+        return obs_delay_array
+
+
+
+
diff --git a/experiments/GEM/experiment_GEM.py b/experiments/GEM/experiment_GEM.py
new file mode 100644
index 00000000..3c322ce5
--- /dev/null
+++ b/experiments/GEM/experiment_GEM.py
@@ -0,0 +1,490 @@
+import platform
+import time
+import gym_electric_motor as gem
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+from experiments.GEM.env.env_wrapper_GEM import FeatureWrapper, FeatureWrapper_pastVals, BaseWrapper
+# from experiments.GEM.env.GEM_env import AppendLastActionWrapper
+from experiments.GEM.util.config import cfg
+from experiments.GEM.util.recorder_GEM import Recorder
+
+from gym.wrappers import FlattenObservation
+import gym_electric_motor as gem
+from gym_electric_motor.reference_generators import MultipleReferenceGenerator, ConstReferenceGenerator, \
+    WienerProcessReferenceGenerator
+from gym_electric_motor.visualization import MotorDashboard
+from gym_electric_motor.visualization.motor_dashboard_plots import MeanEpisodeRewardPlot
+from gym_electric_motor.physical_systems.mechanical_loads import ConstantSpeedLoad
+from gym.core import Wrapper
+from gym.spaces import Box, Tuple
+from gym_electric_motor.constraints import SquaredConstraint
+
+test_length = 10000
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+Ki_ddpg_combi = 182
+
+
+class AppendLastActionWrapper(Wrapper):
+    """
+    The following environment considers the dead time in the real-world motor control systems.
+    The real-world system changes its state, while the agent simultaneously calculates the next action based on a
+    previously measured observation.
+    Therefore, for the agents it seems as if the applied action affects the environment with one step delay
+    (with a dead time of one time step).
+    As a measure of feature engineering we append the last selected action to the observation of each time step,
+    because this action will be the one that is active while the agent has to make the next decision.
+    """
+
+    def __init__(self, environment):
+        super().__init__(environment)
+        # append the action space dimensions to the observation space dimensions
+        self.observation_space = Tuple((Box(
+            np.concatenate((environment.observation_space[0].low, environment.action_space.low)),
+            np.concatenate((environment.observation_space[0].high, environment.action_space.high))
+        ), environment.observation_space[1]))
+
+        self.v_d_mess = []
+        self.v_q_mess = []
+
+    def step(self, action):
+        (state, ref), rew, term, info = self.env.step(action)
+
+        self.v_d_mess.append(np.float64(state[2]))
+        self.v_q_mess.append(np.float64(state[3]))
+        state = np.delete(state, [2, 3])
+        # extend the output state by the selected action
+        # state = np.concatenate((state, action))
+
+        return (state, ref), rew, term, info
+
+    def reset(self, **kwargs):
+        # extend the output state by zeros after reset
+        # no action can be appended yet, but the dimension must fit
+        # state = np.concatenate((state, np.zeros(self.env.action_space.shape)))
+
+        self.v_d_mess = []
+        self.v_q_mess = []
+
+        # set random reference values
+        self.env.reference_generator._sub_generators[0]._reference_value = np.random.uniform(-1, 0)
+        self.env.reference_generator._sub_generators[1]._reference_value = np.random.uniform(-1, 1)
+
+        state, ref = self.env.reset()
+        self.v_d_mess.append(np.float64(state[2]))
+        self.v_q_mess.append(np.float64(state[3]))
+        state = np.delete(state, [2, 3])  # remove vdq from state
+
+        return state, ref
+
+
+class AppendLastActionWrapper_testsetting(AppendLastActionWrapper):
+
+    def __init__(self, environment, new_ref_d, new_ref_q, ref_change):
+        """
+        new_ref_d/q mus be list of length test_steps/1000!
+        """
+        super().__init__(environment)
+        self.step_number = 0
+        self.ref_count = 0
+        self.new_ref_d = new_ref_d
+        self.new_ref_q = new_ref_q
+        self.ref_change = ref_change
+
+    def step(self, action):
+        self.step_number += 1
+
+        if self.step_number % self.ref_change == 0:
+            self.ref_count += 1
+            self.env.reference_generator._sub_generators[0]._reference_value = self.new_ref_d[
+                self.ref_count]  # np.random.uniform(-1, 0)
+            self.env.reference_generator._sub_generators[1]._reference_value = self.new_ref_q[
+                self.ref_count]  # np.random.uniform(-1, 1)
+
+        (state, ref), rew, term, info = super().step(action)
+
+        return (state, ref), rew, term, info
+
+    def reset(self, **kwargs):
+        self.v_d_mess = []
+        self.v_q_mess = []
+
+        self.env.reference_generator._sub_generators[0]._reference_value = self.new_ref_d[
+            self.ref_count]  # np.random.uniform(-1, 0)
+        self.env.reference_generator._sub_generators[1]._reference_value = self.new_ref_q[
+            self.ref_count]  # np.random.uniform(-1, 1)
+
+        state, ref = self.env.reset()
+        self.v_d_mess.append(np.float64(state[2]))
+        self.v_q_mess.append(np.float64(state[3]))
+        state = np.delete(state, [2, 3])  # remove vdq from state
+
+        return state, ref
+
+
+def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor,
+                        batch_size,
+                        actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                        alpha_relu_critic,
+                        noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                        training_episode_length, buffer_size,  # learning_starts,
+                        tau, number_learning_steps, integrator_weight, antiwindup_weight,
+                        penalty_I_weight, penalty_P_weight,
+                        train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail,
+                        number_past_vals=0):
+    if node not in cfg['lea_vpn_nodes']:
+        # assume we are on pc2
+        log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/'
+    else:
+        log_path = f'{folder_name}/{n_trail}/'
+
+    ####################################################################################################################
+    # GEM
+    # Define reference generators for both currents of the flux oriented dq frame
+    # d current reference is chosen to be constantly at zero to simplify this showcase scenario
+    d_generator = ConstReferenceGenerator('i_sd', 0)
+    # q current changes dynamically
+    q_generator = ConstReferenceGenerator('i_sq', 0)
+
+    # The MultipleReferenceGenerator allows to apply these references simultaneously
+    rg = MultipleReferenceGenerator([d_generator, q_generator])
+
+    # Set the electric parameters of the motor
+    motor_parameter = dict(
+        r_s=15e-3, l_d=0.37e-3, l_q=1.2e-3, psi_p=65.6e-3, p=3, j_rotor=0.06
+    )
+
+    # Change the motor operational limits (important when limit violations can terminate and reset the environment)
+    limit_values = dict(
+        i=160 * 1.41,
+        omega=12000 * np.pi / 30,
+        u=450
+    )
+
+    # Change the motor nominal values
+    nominal_values = {key: 0.7 * limit for key, limit in limit_values.items()}
+
+    # Create the environment
+    env_row = gem.make(
+        # Choose the permanent magnet synchronous motor with continuous-control-set
+        'DqCont-CC-PMSM-v0',
+        # Pass a class with extra parameters
+        visualization=MotorDashboard(
+            state_plots=['i_sq', 'i_sd'],
+            action_plots='all',
+            reward_plot=True,
+            additional_plots=[MeanEpisodeRewardPlot()]
+        ),
+        # Set the mechanical load to have constant speed
+        load=ConstantSpeedLoad(omega_fixed=1000 * np.pi / 30),
+
+        # Define which numerical solver is to be used for the simulation
+        ode_solver='scipy.solve_ivp',
+
+        # Pass the previously defined reference generator
+        reference_generator=rg,
+
+        reward_function=dict(
+            # Set weighting of different addends of the reward function
+            reward_weights={'i_sq': 1, 'i_sd': 1},
+            # Exponent of the reward function
+            # Here we use a square root function
+            reward_power=0.5,
+        ),
+
+        # Define which state variables are to be monitored concerning limit violations
+        # Here, only overcurrent will lead to termination
+        constraints=(),
+
+        # Consider converter dead time within the simulation
+        # This means that a given action will show effect only with one step delay
+        # This is realistic behavior of drive applications
+        converter=dict(
+            dead_time=True,
+        ),
+        # Set the DC-link supply voltage
+        supply=dict(
+            u_nominal=400
+        ),
+
+        motor=dict(
+            # Pass the previously defined motor parameters
+            motor_parameter=motor_parameter,
+
+            # Pass the updated motor limits and nominal values
+            limit_values=limit_values,
+            nominal_values=nominal_values,
+        ),
+        # Define which states will be shown in the state observation (what we can "measure")
+        state_filter=['i_sd', 'i_sq', 'u_sd', 'u_sq'],  # , 'epsilon'],
+    )
+
+    # Now we apply the wrapper defined at the beginning of this script
+    env_train = AppendLastActionWrapper(env_row)
+
+    # We flatten the observation (append the reference vector to the state vector such that
+    # the environment will output just a single vector with both information)
+    # This is necessary for compatibility with kerasRL2
+    env_train = FlattenObservation(env_train)
+
+    ####################################################################################################################
+
+    if cfg['env_wrapper'] == 'past':
+        env = FeatureWrapper_pastVals(env_train, number_of_features=4 + number_past_vals * 2,
+                                      training_episode_length=training_episode_length,
+                                      recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                                      antiwindup_weight=antiwindup_weight, gamma=gamma,
+                                      penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                                      t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                                      number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+    elif cfg['env_wrapper'] == 'no-I-term':
+        env = BaseWrapper(env_train, number_of_features=2 + number_past_vals * 2,
+                          training_episode_length=training_episode_length,
+                          recorder=mongo_recorder, n_trail=n_trail, gamma=gamma,
+                          number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+    else:
+        env = FeatureWrapper(env_train, number_of_features=11, training_episode_length=training_episode_length,
+                             recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                             antiwindup_weight=antiwindup_weight, gamma=gamma,
+                             penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                             t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                             number_learing_steps=number_learning_steps)  # , use_past_vals=True, number_past_vals=30)
+
+    # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+    if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']:
+        env.action_space = gym.spaces.Box(low=np.full(4, -1), high=np.full(4, 1))
+
+    n_actions = env.action_space.shape[-1]
+    noise_var = noise_var  # 20#0.2
+    noise_theta = noise_theta  # 50 # stiffness of OU
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=1e-4)
+
+    print('SCHRITTWEITE DES ACTIONNOISE?!?!?!?Passt laut standard 1e-4')
+
+    # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing,
+    #                                              sigma_min=noise_var * np.ones(n_actions) * noise_var_min,
+    #                                              mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+    #                                              sigma=noise_var * np.ones(n_actions), dt=net.ts)
+    print(optimizer)
+    if optimizer == 'SGD':
+        used_optimzer = th.optim.SGD
+    elif optimizer == 'RMSprop':
+        used_optimzer = th.optim.RMSprop
+    # elif optimizer == 'LBFGS':
+    # needs in step additional argument
+    #    used_optimzer = th.optim.LBFGS
+    else:
+        used_optimzer = th.optim.Adam
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                      , qf=[critic_hidden_size] * critic_number_layers),
+                         optimizer_class=used_optimzer)
+
+    model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=log_path,
+                 # model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/',
+                 policy_kwargs=policy_kwargs,
+                 learning_rate=learning_rate, buffer_size=buffer_size,
+                 # learning_starts=int(learning_starts * training_episode_length),
+                 batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise,
+                 train_freq=(train_freq, train_freq_type), gradient_steps=- 1,
+                 optimize_memory_usage=False,
+                 create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
+    # adn scale weights and biases
+    count = 0
+    for kk in range(actor_number_layers + 1):
+
+        model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale
+        model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[
+                                                                     str(count)].weight.data * weight_scale
+
+        model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale
+        model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[
+                                                                   str(count)].bias.data * bias_scale
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']:
+        env.action_space = gym.spaces.Box(low=np.full(2, -1), high=np.full(2, 1))
+
+    # start training
+    model.learn(total_timesteps=number_learning_steps)
+
+    # Log Train-info data
+    train_data = {"Name": "After_Training",
+                  "Mean_eps_reward": env.reward_episode_mean,
+                  "Trial number": n_trail,
+                  "Database name": folder_name,
+                  "Sum_eps_reward": env.get_episode_rewards()
+                  }
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data)
+
+    model.save(log_path + f'model.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+
+    # Refs created with https://github.com/max-schenke/DESSCA
+    i_d_refs = [-0.5718831392706399, -0.11155989917458595, -0.8444233463864655, -0.19260596846844558,
+                -0.48986342384598824,
+                -0.08540375784816023, -0.6983532259844449, -0.3409346664209051, -0.9852563901175903,
+                -0.019589794863040133,
+                -0.3057052318511703, -0.010759738176742362, -0.7264074671265837, -0.7003086456948622,
+                -0.5205127876117279,
+                -0.0035883351279332454, -0.24656126983332566, -0.7385108721382044, -0.8711444379999949,
+                -0.5322348905850738,
+                -0.16443631057073907, -0.26335305001172343, -0.8339056052207534, -0.9840272325710973,
+                -0.00099042967089491,
+                -0.4276376345373605, -0.4392085789117308, -0.29885945214798054, -0.3526213053117569,
+                -0.15544590095444902,
+                -0.38133627476871246, -0.0007362814213280888, -0.13766159578201825, -0.6998437778149555,
+                -0.02941718441323049,
+                -0.14911600490992516, -0.8711008909873345, -0.5803207691231205, -0.3908087722441505,
+                -0.30424273624679143,
+                -0.6032911651567467, -0.6097285170523984, -0.23000688296189783, -0.009050042083058152,
+                -0.13450601442490417,
+                -0.8117883556545268, -0.7542685229940803, -0.4627233964160423, -0.23713451030767801, -0.580302276033946]
+    i_q_refs = [-0.3392001552090831, 0.9601935188371409, -0.3536698661685236, -0.7470423329656373, 0.7498405690613185,
+                0.02118430489789434, 0.2733946954263321, 0.2919040855524663, 0.16184776106212195, 0.5033515631986878,
+                -0.3472813053105329, -0.3978931436350608, 0.6856579757847681, -0.7061719805667996, 0.05173569323125849,
+                -0.9859275339077078, 0.6511009114276964, -0.07964009848269302, 0.4872958851075428, 0.4244964715390715,
+                0.3348234680253275, -0.02175414797059596, 0.1689424266837956, -0.15367806515850901, -0.6890239130635769,
+                -0.5235888504056838, -0.18887320564466648, -0.9243752447874265, 0.9223611469482904,
+                -0.47288531380037824,
+                0.5419042725157753, 0.21808910731016923, -0.2114136814114341, -0.43862800579799827, 0.7610593015542114,
+                -0.9580202514125911, -0.058327843098379906, -0.6351863815461574, 0.06422483040085132,
+                -0.6157429182475818,
+                0.6283510657507491, -0.1007305747146939, 0.9225787627793309, -0.15228745162185686, 0.6513516638638627,
+                -0.5835510703463308, 0.46458552243856405, 0.25269729661377704, 0.1814216788492872, 0.2111335623928367]
+
+    ref_change = 500
+
+    env_test = env_row
+    env_test = AppendLastActionWrapper_testsetting(env_test, i_d_refs, i_q_refs, ref_change)
+    env_test = FlattenObservation(env_test)
+
+    if cfg['env_wrapper'] == 'past':
+        env_test = FeatureWrapper_pastVals(env_test, number_of_features=4 + number_past_vals * 2,
+                                           integrator_weight=integrator_weight,
+                                           recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                           gamma=0, penalty_I_weight=0,
+                                           penalty_P_weight=0, number_past_vals=number_past_vals,
+                                           training_episode_length=training_episode_length, )
+
+
+    elif cfg['env_wrapper'] == 'no-I-term':
+        env_test = BaseWrapper(env_test, number_of_features=2 + number_past_vals * 2,
+                               training_episode_length=training_episode_length,
+                               recorder=mongo_recorder, n_trail=n_trail, gamma=0,
+                               number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+    else:
+        env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                                  recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                  gamma=1, penalty_I_weight=0,
+                                  penalty_P_weight=0,
+                                  training_episode_length=training_episode_length, )  # , use_past_vals=True, number_past_vals=30)
+    # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+    obs = env_test.reset()
+
+    rew_list = []
+
+    aP0 = []
+    aP1 = []
+    aI0 = []
+    aI1 = []
+    integrator_sum0 = []
+    integrator_sum1 = []
+    i_d_mess = []
+    i_q_mess = []
+    i_d_ref = []
+    i_q_ref = []
+    action_d = []
+    action_q = []
+    env_test.training_episode_length = test_length + 1  # that env is not reset
+
+    for step in range(test_length):
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+
+        aP0.append(np.float64(action[0]))
+        aP1.append(np.float64(action[1]))
+        if action.shape[0] > 2:
+            aI0.append(np.float64(action[2]))
+            aI1.append(np.float64(action[3]))
+            integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+            integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+
+        # env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        i_d_mess.append(np.float64(obs[0]))
+        i_q_mess.append(np.float64(obs[1]))
+        i_d_ref.append(np.float64(obs[2]))
+        i_q_ref.append(np.float64(obs[3]))
+        action_d.append(np.float64(action[0]))
+        action_q.append(np.float64(action[1]))
+
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "i_d_mess": i_d_mess,
+                           "i_q_mess": i_q_mess,
+                           "v_d_mess": env_test.env.env.v_d_mess,
+                           "v_q_mess": env_test.env.env.v_q_mess,
+                           "i_d_ref": i_d_ref,
+                           "i_q_ref": i_q_ref,
+                           'action_d': action_d,
+                           'action_q': action_q,
+                           "ActionP0": aP0,
+                           "ActionP1": aP1,
+                           "ActionI0": aI0,
+                           "ActionI1": aI1,
+                           "integrator_sum0": integrator_sum0,
+                           "integrator_sum1": integrator_sum1,
+                           "Node": platform.uname().node,
+                           "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                           "Reward function": 'rew.rew_fun_dq0',
+                           "Trial number": n_trail,
+                           "Database name": folder_name,
+                           "Info": "GEM; features: error, past_vals, used_action"}
+
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+    return (return_sum / test_length)
diff --git a/experiments/GEM/hp_tune_ddpg_objective_GEM.py b/experiments/GEM/hp_tune_ddpg_objective_GEM.py
new file mode 100644
index 00000000..e42613f9
--- /dev/null
+++ b/experiments/GEM/hp_tune_ddpg_objective_GEM.py
@@ -0,0 +1,471 @@
+import json
+import os
+import time
+
+import sqlalchemy
+from optuna.samplers import TPESampler
+
+os.environ['PGOPTIONS'] = '-c statement_timeout=1000'
+
+import optuna
+import platform
+import argparse
+import sshtunnel
+import numpy as np
+# np.random.seed(0)
+from experiments.GEM.util.config import cfg
+
+from experiments.GEM.experiment_GEM import mongo_recorder, experiment_fit_DDPG
+from experiments.hp_tune.util.scheduler import linear_schedule
+
+model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/'
+
+PC2_LOCAL_PORT2PSQL = 11999
+SERVER_LOCAL_PORT2PSQL = 6432
+DB_NAME = 'optuna'
+PC2_LOCAL_PORT2MYSQL = 11998
+SERVER_LOCAL_PORT2MYSQL = 3306
+STUDY_NAME = cfg['STUDY_NAME']  # 'DDPG_MRE_sqlite_PC2'
+
+node = platform.uname().node
+
+
+def ddpg_objective_fix_params(trial):
+    file_congfig = open(model_path +
+                        'PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json', )
+    trial_config = json.load(file_congfig)
+
+    number_learning_steps = 500000  # trial.suggest_int("number_learning_steps", 100000, 1000000)
+    # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5)
+    # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5)
+    penalty_I_weight = trial_config["penalty_I_weight"]  # trial.suggest_float("penalty_I_weight", 100e-6, 2)
+    penalty_P_weight = trial_config["penalty_P_weight"]  # trial.suggest_float("penalty_P_weight", 100e-6, 2)
+
+    penalty_I_decay_start = trial_config[
+        "penalty_I_decay_start"]  # trial.suggest_float("penalty_I_decay_start", 0.00001, 1)
+    penalty_P_decay_start = trial_config[
+        "penalty_P_decay_start"]  # trial.suggest_float("penalty_P_decay_start", 0.00001, 1)
+
+    t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps)
+    t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps)
+
+    integrator_weight = trial_config["integrator_weight"]  # trial.suggest_float("integrator_weight", 1 / 200, 2)
+    # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0)
+    # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3)
+    antiwindup_weight = trial_config["antiwindup_weight"]  # trial.suggest_float("antiwindup_weight", 0.00001, 1)
+
+    learning_rate = trial_config["learning_rate"]  # trial.suggest_loguniform("learning_rate", 1e-6, 1e-1)  # 0.0002#
+
+    lr_decay_start = trial_config[
+        "lr_decay_start"]  # trial.suggest_float("lr_decay_start", 0.00001, 1)  # 3000  # 0.2 * number_learning_steps?
+    lr_decay_duration = trial_config["lr_decay_duration"]  # trial.suggest_float("lr_decay_duration", 0.00001,
+    #  1)  # 3000  # 0.2 * number_learning_steps?
+    t_start = int(lr_decay_start * number_learning_steps)
+    t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                           number_learning_steps))
+    final_lr = trial_config["final_lr"]  # trial.suggest_float("final_lr", 0.00001, 1)
+
+    gamma = trial_config["gamma"]  # trial.suggest_float("gamma", 0.5, 0.9999)
+    weight_scale = trial_config["weight_scale"]  # trial.suggest_loguniform("weight_scale", 5e-5, 0.2)  # 0.005
+
+    bias_scale = trial_config["bias_scale"]  # trial.suggest_loguniform("bias_scale", 5e-4, 0.1)  # 0.005
+    alpha_relu_actor = trial_config[
+        "alpha_relu_actor"]  # trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5)  # 0.005
+    alpha_relu_critic = trial_config[
+        "alpha_relu_critic"]  # trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5)  # 0.005
+
+    batch_size = trial_config["batch_size"]  # trial.suggest_int("batch_size", 16, 1024)  # 128
+    buffer_size = trial_config[
+        "buffer_size"]  # trial.suggest_int("buffer_size", int(1e4), number_learning_steps)  # 128
+
+    actor_hidden_size = trial_config[
+        "actor_hidden_size"]  # trial.suggest_int("actor_hidden_size", 10, 200)  # 100  # Using LeakyReLU
+    actor_number_layers = trial_config["actor_number_layers"]  # trial.suggest_int("actor_number_layers", 1, 4)
+
+    critic_hidden_size = trial_config["critic_hidden_size"]  # trial.suggest_int("critic_hidden_size", 10, 300)  # 100
+    critic_number_layers = trial_config["critic_number_layers"]  # trial.suggest_int("critic_number_layers", 1, 4)
+
+    n_trail = str(trial.number)
+    use_gamma_in_rew = 1
+    noise_var = trial_config["noise_var"]  # trial.suggest_loguniform("noise_var", 0.01, 1)  # 2
+    # min var, action noise is reduced to (depends on noise_var)
+    noise_var_min = 0.0013  # trial.suggest_loguniform("noise_var_min", 0.0000001, 2)
+    # min var, action noise is reduced to (depends on training_episode_length)
+    noise_steps_annealing = int(
+        0.25 * number_learning_steps)  # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+    # number_learning_steps)
+    noise_theta = trial_config["noise_theta"]  # trial.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+    error_exponent = 0.5  # trial.suggest_loguniform("error_exponent", 0.001, 4)
+
+    training_episode_length = trial_config[
+        "training_episode_length"]  # trial.suggest_int("training_episode_length", 500, 5000)  # 128
+    # learning_starts = 0.32  # trial.suggest_loguniform("learning_starts", 0.1, 2)  # 128
+    tau = trial_config["tau"]  # trial.suggest_loguniform("tau", 0.0001, 0.3)  # 2
+
+    train_freq_type = "step"  # trial.suggest_categorical("train_freq_type", ["episode", "step"])
+    train_freq = trial_config["train_freq"]  # trial.suggest_int("train_freq", 1, 15000)
+
+    optimizer = trial_config[
+        "optimizer"]  # trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])  # , "LBFGS"])
+
+    number_past_vals = 5  # trial.suggest_int("number_past_vals", 0, 15)
+
+    learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr,
+                                    t_start=t_start,
+                                    t_end=t_end,
+                                    total_timesteps=number_learning_steps)
+
+    trail_config_mongo = {"Name": "Config",
+                          "Node": node,
+                          "Agent": "DDPG",
+                          "Number_learning_Steps": number_learning_steps,
+                          "Trial number": n_trail,
+                          "Database name": cfg['STUDY_NAME'],
+                          "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                          "Info": "P10 setting, EU grid, HPs von Stuy 22 + 5 pastvals"
+                                  "Reward design setzt sich aus MRE [0,1] und clipp-punishment [0,-1] zusammen",
+                          }
+    trail_config_mongo.update(trial.params)
+    # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo)
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo)
+
+    loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               alpha_relu_actor,
+                               batch_size,
+                               actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                               alpha_relu_critic,
+                               noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                               training_episode_length, buffer_size,  # learning_starts,
+                               tau, number_learning_steps, integrator_weight,
+                               integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight,
+                               train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer,
+                               n_trail, number_past_vals)
+
+    return loss
+
+
+def ddpg_objective(trial):
+    number_learning_steps = 500000  # trial.suggest_int("number_learning_steps", 100000, 1000000)
+    # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5)
+    # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5)
+    penalty_I_weight = 1  # trial.suggest_float("penalty_I_weight", 100e-6, 2)
+    penalty_P_weight = 1  # trial.suggest_float("penalty_P_weight", 100e-6, 2)
+
+    penalty_I_decay_start = 0.5  # trial.suggest_float("penalty_I_decay_start", 0.00001, 1)
+    penalty_P_decay_start = 0.5  # trial.suggest_float("penalty_P_decay_start", 0.00001, 1)
+
+    t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps)
+    t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps)
+
+    integrator_weight = 0.1  # trial.suggest_float("integrator_weight", 1 / 200, 0.5)
+    # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0)
+    # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3)
+    antiwindup_weight = 0.1  # trial.suggest_float("antiwindup_weight", 0.00001, 1)
+
+    learning_rate = trial.suggest_loguniform("learning_rate", 1e-7, 1e-2)  # 0.0002#
+
+    lr_decay_start = trial.suggest_float("lr_decay_start", 0.00001, 1)  # 3000  # 0.2 * number_learning_steps?
+    lr_decay_duration = trial.suggest_float("lr_decay_duration", 0.00001,
+                                            1)  # 3000  # 0.2 * number_learning_steps?
+    t_start = int(lr_decay_start * number_learning_steps)
+    t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                           number_learning_steps))
+    final_lr = trial.suggest_float("final_lr", 0.00001, 1)
+
+    gamma = trial.suggest_float("gamma", 0.8, 0.9999)
+    weight_scale = trial.suggest_loguniform("weight_scale", 5e-5, 0.2)  # 0.005
+
+    bias_scale = trial.suggest_loguniform("bias_scale", 0.01, 0.1)  # 0.005
+    alpha_relu_actor = trial.suggest_loguniform("alpha_relu_actor", 0.001, 0.5)  # 0.005
+    alpha_relu_critic = trial.suggest_loguniform("alpha_relu_critic", 0.001, 0.5)  # 0.005
+
+    batch_size = trial.suggest_int("batch_size", 16, 512)  # 128
+    buffer_size = trial.suggest_int("buffer_size", int(20e4), number_learning_steps)  # 128
+
+    actor_hidden_size = trial.suggest_int("actor_hidden_size", 10, 75)  # 100  # Using LeakyReLU
+    actor_number_layers = trial.suggest_int("actor_number_layers", 1, 3)
+
+    critic_hidden_size = trial.suggest_int("critic_hidden_size", 10, 300)  # 100
+    critic_number_layers = trial.suggest_int("critic_number_layers", 1, 4)
+
+    n_trail = str(trial.number)
+    use_gamma_in_rew = 1
+    noise_var = trial.suggest_loguniform("noise_var", 0.01, 1)  # 2
+    # min var, action noise is reduced to (depends on noise_var)
+    noise_var_min = 0.0013  # trial.suggest_loguniform("noise_var_min", 0.0000001, 2)
+    # min var, action noise is reduced to (depends on training_episode_length)
+    noise_steps_annealing = int(
+        0.25 * number_learning_steps)  # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+    # number_learning_steps)
+    noise_theta = trial.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+    error_exponent = 0.5  # 0.5  # trial.suggest_loguniform("error_exponent", 0.001, 4)
+
+    training_episode_length = trial.suggest_int("training_episode_length", 1000, 4000)  # 128
+    # learning_starts = 0.32  # trial.suggest_loguniform("learning_starts", 0.1, 2)  # 128
+    tau = trial.suggest_loguniform("tau", 0.0001, 0.3)  # 2
+
+    train_freq_type = "step"  # trial.suggest_categorical("train_freq_type", ["episode", "step"])
+    train_freq = trial.suggest_int("train_freq", 1, 5000)
+
+    optimizer = trial.suggest_categorical("optimizer", ["Adam"])  # ["Adam", "SGD", "RMSprop"])  # , "LBFGS"])
+
+    learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr,
+                                    t_start=t_start,
+                                    t_end=t_end,
+                                    total_timesteps=number_learning_steps)
+    number_past_vals = trial.suggest_int("number_past_vals", 0, 15)
+
+    trail_config_mongo = {"Name": "Config",
+                          "Node": node,
+                          "Agent": "DDPG",
+                          "Number_learning_Steps": number_learning_steps,
+                          "Trial number": n_trail,
+                          "Database name": cfg['STUDY_NAME'],
+                          "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                          "Optimierer/ Setting stuff": "DDPG HPO ohne Integrator, alle HPs fuer den I-Anteil "
+                                                       "wurden daher fix gesetzt. Vgl. zu DDPG+I-Anteil"
+                          }
+    trail_config_mongo.update(trial.params)
+    # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo)
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo)
+
+    loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               alpha_relu_actor,
+                               batch_size,
+                               actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                               alpha_relu_critic,
+                               noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                               training_episode_length, buffer_size,  # learning_starts,
+                               tau, number_learning_steps, integrator_weight,
+                               integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight,
+                               train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer,
+                               n_trail, number_past_vals)
+
+    return loss
+
+
+def get_storage(url, storage_kws):
+    successfull = False
+    retry_counter = 0
+
+    while not successfull:
+        try:
+            storage = optuna.storages.RDBStorage(
+                url=url, **storage_kws)
+            successfull = True
+        except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.DatabaseError) as e:
+            wait_time = np.random.randint(60, 300)
+            retry_counter += 1
+            if retry_counter > 10:
+                print('Stopped after 10 connection attempts!')
+                raise e
+            print(f'Could not connect, retry in {wait_time} s')
+            time.sleep(wait_time)
+
+    return storage
+
+
+def optuna_optimize_mysql_lea35(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=1, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+    if node in ('LEA-WORK35', 'fe1'):
+        if node == 'fe1':
+            port = PC2_LOCAL_PORT2MYSQL
+        else:
+            port = SERVER_LOCAL_PORT2MYSQL
+
+        storage = get_storage(f'mysql://{optuna_creds}@localhost:{port}/{DB_NAME}')
+
+        study = optuna.create_study(
+            storage=storage,
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=sampler, study_name=study_name,
+            load_if_exists=True,
+            direction='maximize')
+        study.optimize(objective, n_trials=n_trials)
+    else:
+        if node in cfg['lea_vpn_nodes']:
+            # we are in LEA VPN
+            server_name = 'lea38'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               SERVER_LOCAL_PORT2MYSQL)}
+        else:
+            # assume we are on a PC2 compute node
+            server_name = 'fe.pc2.uni-paderborn.de'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               PC2_LOCAL_PORT2MYSQL),
+                       'ssh_username': 'webbah'}
+        with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun:
+
+            study = optuna.create_study(
+                storage=f"mysql+pymysql://{optuna_creds}@127.0.0.1:{tun.local_bind_port}/{DB_NAME}",
+                sampler=sampler, study_name=study_name,
+                load_if_exists=True,
+                direction='maximize')
+            study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize_mysql(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=1, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+    study = optuna.create_study(study_name=study_name,
+                                direction='maximize',
+                                storage=f"mysql://{optuna_creds}@localhost/{DB_NAME}",
+                                load_if_exists=True,
+                                sampler=sampler
+                                )
+    study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize_sqlite(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=50, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 100
+
+    print(n_trials)
+    print('Local optimization is run but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in cfg['lea_vpn_nodes']:
+        optuna_path = './optuna/'
+    else:
+        # assume we are on not of pc2 -> store to project folder
+        optuna_path = '/scratch/hpc-prf-reinfl/weber/OMG/optuna/'
+
+    os.makedirs(optuna_path, exist_ok=True)
+
+    study = optuna.create_study(study_name=study_name,
+                                direction='maximize',
+                                storage=f'sqlite:///{optuna_path}optuna.sqlite',
+                                load_if_exists=True,
+                                sampler=sampler
+                                )
+    study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=50, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_psql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_psql'
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+    # set trial to failed if it seems dead for 20 minutes
+    storage_kws = dict(engine_kwargs={"pool_timeout": 600})
+    if node in ('lea-cyberdyne', 'fe1'):
+        if node == 'fe1':
+            port = PC2_LOCAL_PORT2PSQL
+        else:
+            port = SERVER_LOCAL_PORT2PSQL
+
+        storage = get_storage(f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', storage_kws=storage_kws)
+
+        study = optuna.create_study(
+            storage=storage,
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=sampler, study_name=study_name,
+            load_if_exists=True,
+            direction='maximize')
+        study.optimize(objective, n_trials=n_trials)
+    else:
+        if node in cfg['lea_vpn_nodes']:
+            # we are in LEA VPN
+            server_name = 'lea38'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               SERVER_LOCAL_PORT2PSQL)}
+        else:
+            # assume we are on a PC2 compute node
+            server_name = 'fe.pc2.uni-paderborn.de'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               PC2_LOCAL_PORT2PSQL),
+                       'ssh_username': 'webbah'}
+        with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun:
+
+            storage = get_storage(url=f'postgresql://{optuna_creds}'
+                                      f'@localhost:{tun.local_bind_port}/{DB_NAME}', storage_kws=storage_kws)
+
+            # storage = optuna.storages.RDBStorage(
+            #    url=f'postgresql://{optuna_creds}'
+            #        f'@localhost:{tun.local_bind_port}/{DB_NAME}',
+            #    **storage_kws)
+
+            study = optuna.create_study(
+                storage=storage,
+                # storage=f'postgresql://{optuna_creds}'
+                #        f'@localhost:{tun.local_bind_port}/{DB_NAME}',
+                sampler=sampler, study_name=study_name,
+                load_if_exists=True,
+                direction='maximize')
+            study.optimize(objective, n_trials=n_trials)
+
+
+if __name__ == "__main__":
+    # learning_rate = list(itertools.chain(*[[1e-9] * 1]))
+    # search_space = {'learning_rate': learning_rate}  # , 'number_learning_steps': number_learning_steps}
+
+    TPE_sampler = TPESampler(n_startup_trials=400)  # , constant_liar=True)
+    # TPE_sampler = TPESampler(n_startup_trials=2500)  # , constant_liar=True)
+
+    # optuna_optimize_mysql_lea35(ddpg_objective, study_name=STUDY_NAME, sampler=TPE_sampler)
+
+    optuna_optimize_mysql_lea35(ddpg_objective_fix_params, study_name=STUDY_NAME, sampler=TPE_sampler)
+    # optuna_optimize_sqlite(ddpg_objective_fix_params, study_name=STUDY_NAME, sampler=TPE_sampler)
+
+    # optuna_optimize(ddpg_objective, study_name=STUDY_NAME,
+    # sampler=TPE_sampler)  #, sampler=optuna.samplers.GridSampler(search_space))
diff --git a/experiments/GEM/pc2_schedule_ddpg_GEM.py b/experiments/GEM/pc2_schedule_ddpg_GEM.py
new file mode 100644
index 00000000..522acf32
--- /dev/null
+++ b/experiments/GEM/pc2_schedule_ddpg_GEM.py
@@ -0,0 +1,105 @@
+"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass
+the allowed cpu core limit"""
+
+import os
+import pathlib
+import uuid
+import time
+
+import optuna
+from optuna.samplers import TPESampler
+
+from experiments.hp_tune.util import pc2
+from experiments.GEM.util.config import cfg
+
+# config
+USER = os.getenv('USER')
+ALLOWED_MAX_CPU_CORES = 500  # 512
+STUDY_NAME = cfg['STUDY_NAME']
+DB_NAME = 'optuna'
+# resources request
+job_resource_plan = {
+    'duration': 24,  # in hours
+    'ncpus': 2,
+    'memory': 12,
+    'vmemory': 16,
+}
+
+MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus']
+
+PC2_LOCAL_PORT2MYSQL = 11998
+SERVER_LOCAL_PORT2MYSQL = 3306
+
+
+def main():
+    started_workers = 0
+    print('Start slavedriving loop..')
+    old_ccsinfo_counts = None
+    while True:
+
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+        with open(creds_path, 'r') as f:
+            optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+        study = optuna.create_study(
+            storage=f'mysql+pymysql://{optuna_creds}@localhost:{PC2_LOCAL_PORT2MYSQL}/{DB_NAME}',
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=TPESampler(n_startup_trials=2500), study_name=STUDY_NAME,
+            load_if_exists=True,
+            direction='maximize')
+
+        complete_trials = len([t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE])
+        print(f'Completed trials in study: {complete_trials}')
+        if complete_trials > 1000:
+            print('Maximal completed trials reached - STOPPING')
+            break
+
+        job_files_path = pathlib.Path(
+            f"/scratch/hpc-prf-reinfl/weber/OMG/ccs_job_files/{STUDY_NAME}")  # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet
+        job_files_path.mkdir(parents=False, exist_ok=True)
+
+        # read ccsinfo
+        ccsinfo = pc2.get_ccsinfo(USER)
+        ccsinfo_state_counts = ccsinfo.state.value_counts()
+        ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0)
+        ccs_planned = ccsinfo_state_counts.get('PLANNED', 0)
+        total_busy = ccs_running + ccs_planned
+        if not ccsinfo_state_counts.equals(old_ccsinfo_counts):
+            print("\n## ccs summary ##")
+            print(f"Running: {ccs_running}")
+            print(f"Planned : {ccs_planned}")
+            print(f"Total busy workers (ccs): {total_busy}")
+
+        if total_busy < MAX_WORKERS:
+            #  call workers to work
+            n_workers = MAX_WORKERS - total_busy
+            print(f'Start {n_workers} workers:')
+            for w in range(n_workers):
+                started_workers += 1
+                jobid = str(uuid.uuid4()).split('-')[0]
+                cluster = "oculus"
+                job_name = job_files_path / f"pc2_job_{jobid}.sh"
+                res_plan = pc2.calculate_resources(**job_resource_plan)
+
+                execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \
+                                 "python $HOME/openmodelica-microgrid-gym/experiments/GEM/hp_tune_ddpg_objective_GEM.py -n 1"
+
+                print(f'Start job {jobid} ..')
+                pc2.create_n_run_script(
+                    job_name,
+                    pc2.build_shell_script_lines(job_files_path, cluster,
+                                                 job_name, res_plan,
+                                                 execution_line),
+                    dry=False)
+                print('sleep 10s for better DB interaction', end='\r')
+                time.sleep(10)
+
+        old_ccsinfo_counts = ccsinfo_state_counts
+
+        print('sleep..', end='\r')
+        time.sleep(300)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/experiments/GEM/util/config.py b/experiments/GEM/util/config.py
new file mode 100644
index 00000000..ae3d02db
--- /dev/null
+++ b/experiments/GEM/util/config.py
@@ -0,0 +1,12 @@
+cfg = dict(lea_vpn_nodes=['lea-skynet', 'lea-picard', 'lea-barclay',
+                          'lea-cyberdyne', 'webbah-ThinkPad-L380', 'LEA_WORK35'],
+           STUDY_NAME='GEM_I_term_4',
+           meas_data_folder='Json_to_MonogDB_GEM_I_term_4/',
+           MONGODB_PORT=12001,
+           loglevel='train',
+           is_dq0=False,
+           train_episode_length=2881,  # defines when in training the env is reset e.g. for exploring starts,
+           # nothing -> Standard FeatureWrapper; past -> FeatureWrapper_pastVals; future -> FeatureWrapper_futureVals
+           # I-controller -> DDPG as P-term + standard I-controller; no-I-term -> Pure DDPG without integrator
+           env_wrapper='past'
+           )
diff --git a/experiments/GEM/util/recorder_GEM.py b/experiments/GEM/util/recorder_GEM.py
new file mode 100644
index 00000000..5ed3bc3b
--- /dev/null
+++ b/experiments/GEM/util/recorder_GEM.py
@@ -0,0 +1,69 @@
+import json
+from os import makedirs
+
+import sshtunnel
+from pymongo import MongoClient
+
+from experiments.GEM.util.config import cfg
+
+MONGODB_PORT = cfg['MONGODB_PORT']  # 12001
+
+
+class Recorder:
+
+    def __init__(self, node, database_name):
+        """
+        Class to record measured data to mongo database using pymongo
+        Depending on the node we are operating at it connects via ssh to
+         - in lea_vpn: to cyberdyne port 12001
+         - else: assume pc2 node -> connect to frontend
+         and stores data to mongoDB at port MONGODB_PORT ( =12001).
+         HINT: From pc2 frontend permanent tunnel from cyberdyne port 12001 to frontend 12001
+         is needed (assuming Mongod-Process running on cyberdyne
+         :params node: platform.uname().node
+         :params database_name: string for the database name to store data in
+        """
+        self.node = node
+        self.save_count = 0
+
+        if self.node in cfg['lea_vpn_nodes']:
+            self.server_name = 'lea38'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT)}
+            self.save_folder = cfg['meas_data_folder']
+        else:
+            # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001
+            # from there they can be grep via permanent tunnel from cyberdyne
+            self.server_name = 'fe.pc2.uni-paderborn.de'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT),
+                            'ssh_username': 'webbah'}
+
+            self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder']
+
+        self.database_name = database_name
+        makedirs(self.save_folder, exist_ok=True)
+        # pathlib.Path(self.save_folder.mkdir(exist_ok=True))
+
+    def save_to_mongodb(self, col: str = ' trails', data=None):
+        """
+        Stores data to database in document col
+        """
+        with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun:
+            with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+                db = client[self.database_name]
+                trial_coll = db[col]  # get collection named col
+                trial_coll.insert_one(data)
+
+    def save_to_json(self, col: str = ' trails', data=None):
+        """
+        Stores data to json file in specified directory. From there the data can be grept by another process
+        and can be stored to a DB via ssh
+        To distinguish the files of one trail a save_count is incremented and added to the filename
+        """
+
+        with open(self.save_folder + self.database_name + '_' + col + '_' + str(self.save_count) + '.json',
+                  'w') as outfile:
+            json.dump(data, outfile)
+
+        self.save_count += 1
diff --git a/experiments/GEM/util/reporter_GEM.py b/experiments/GEM/util/reporter_GEM.py
new file mode 100644
index 00000000..f01a2eff
--- /dev/null
+++ b/experiments/GEM/util/reporter_GEM.py
@@ -0,0 +1,149 @@
+import json
+import os
+import platform
+import re
+import time
+
+import numpy as np
+
+import sshtunnel
+from pymongo import MongoClient
+# from experiments.hp_tune.util.config import cfg
+from experiments.GEM.util.config import cfg
+
+print('Log Config: GEM!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
+
+
+class Reporter:
+
+    def __init__(self):
+        """
+        Greps json data which is stored in the cfg[meas_data_folder] and sends it to mongoDB
+        on cyberdyne (lea38) via sshtunnel on port MONGODB_PORT
+        """
+
+        MONGODB_PORT = cfg['MONGODB_PORT']
+
+        node = platform.uname().node
+
+        if node in cfg['lea_vpn_nodes']:
+            self.server_name = 'lea38'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT)}
+            self.save_folder = './' + cfg['meas_data_folder']
+        else:
+            # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001
+            # from there they can be grep via permanent tunnel from cyberdyne
+            self.server_name = 'fe.pc2.uni-paderborn.de'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT),
+                            'ssh_username': 'webbah'}
+
+            self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder']
+
+    def save_to_mongodb(self, database_name: str, col: str = ' trails', data=None):
+        """
+        Stores data to database in document col
+        """
+        with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun:
+            with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+                db = client[database_name]
+                trial_coll = db[col]  # get collection named col
+                trial_coll.insert_one(data)
+
+    def oldest_file_in_tree(self, extension=".json"):
+        """
+        Returns the oldest file-path string
+        """
+        print(os.getcwd())
+        return min(
+            (os.path.join(dirname, filename)
+             for dirname, dirnames, filenames in os.walk(self.save_folder)
+             for filename in filenames
+             if filename.endswith(extension)),
+            key=lambda fn: os.stat(fn).st_mtime)
+
+    def oldest_file_with_name_in_tree(self, count_number_to_find, extension=".json"):
+        """
+        Returns the oldest file-path string
+
+        :param count_number_to_find: List of count_numbers to find and store instead of storing all
+        """
+        print(os.getcwd())
+        return min(
+            (os.path.join(dirname, filename)
+             for dirname, dirnames, filenames in os.walk(self.save_folder)
+             for filename in filenames
+             if filename.endswith(str(count_number_to_find) + extension)),
+            key=lambda fn: os.stat(fn).st_mtime)
+
+    def json_to_mongo_via_sshtunnel(self, file_name_to_store=None):
+
+        if not len(os.listdir(self.save_folder)) == 0:
+
+            if file_name_to_store is None:
+                try:
+                    oldest_file_path = self.oldest_file_in_tree()
+                except(ValueError) as e:
+                    print('Folder seems empty or no matching data found!')
+                    print(f'ValueError{e}')
+                    print('Empty directory! Go to sleep for 5 minutes!')
+                    time.sleep(5 * 60)
+                    return
+            else:
+                oldest_file_path = file_name_to_store
+
+            with open(oldest_file_path) as json_file:
+                data = json.load(json_file)
+
+            successfull = False
+            retry_counter = 0
+
+            while not successfull:
+                try:
+                    now = time.time()
+                    if os.stat(oldest_file_path).st_mtime < now - 60:
+                        self.save_to_mongodb(database_name=data['Database name'],
+                                             col='Trial_number_' + data['Trial number'], data=data)
+                        print('Reporter: Data stored successfully to MongoDB and will be removed locally!')
+                        os.remove(oldest_file_path)
+                        successfull = True
+                except (sshtunnel.BaseSSHTunnelForwarderError) as e:
+                    wait_time = np.random.randint(1, 60)
+                    retry_counter += 1
+                    if retry_counter > 10:
+                        print('Stopped after 10 connection attempts!')
+                        raise e
+                    print(f'Reporter: Could not connect via ssh to frontend, retry in {wait_time} s')
+                    time.sleep(wait_time)
+
+        else:
+            print('Empty directory! Go to sleep for 5 minutes!')
+            time.sleep(5 * 60)
+
+
+if __name__ == "__main__":
+
+    reporter = Reporter()
+    print("Starting Reporter for logging from local savefolder to mongoDB")
+
+    file_ending_number = [178, 179]
+
+    print(f"Searching for files in directory with number ending on {file_ending_number}")
+
+    # print(reporter.oldest_file_in_tree())
+    while True:
+        # reporter.json_to_mongo_via_sshtunnel()
+
+        for number in file_ending_number:
+            try:
+                oldest_named_file_path = reporter.oldest_file_with_name_in_tree(number)
+                print(oldest_named_file_path)
+
+                reporter.json_to_mongo_via_sshtunnel(oldest_named_file_path)
+
+            except(ValueError) as e:
+                print(f'No file with number {number} ending')
+                print(f'ValueError{e}')
+                print('Go to sleep for 5 seconds and go on with next number!')
+                time.sleep(5)
diff --git a/experiments/GEM/viz/get_learningCurve.py b/experiments/GEM/viz/get_learningCurve.py
new file mode 100644
index 00000000..daca650a
--- /dev/null
+++ b/experiments/GEM/viz/get_learningCurve.py
@@ -0,0 +1,46 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+import sshtunnel
+from bson import ObjectId
+from plotly import tools
+from pymongo import MongoClient
+
+from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0
+
+plt_train = True
+plotly = False
+
+# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties'
+# db_name = 'DDPG_SplitActor_Best_study18_6462'
+db_name = 'GEM_no_I_term_4'  # 15
+# db_name = 'GEM_past' # 17
+trial = '0'
+show_episode_number = 19
+
+reward_df = pd.DataFrame()
+
+with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun:
+    with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+        db = client[db_name]
+
+        idx = 0
+        for coll_name in db.list_collection_names():
+            trial = db[coll_name]
+            # trial = db.Trial_number_23
+
+            train_data = trial.find_one({"Name": "After_Training"})
+            # trial_test = trial.find_one({"Name": "Test"})
+
+            if train_data is not None:  # if trial not finished (was in actor_Ddpg > 550)
+
+                if idx == 0:
+                    reward_df = pd.DataFrame({str(idx): train_data['Mean_eps_reward']})
+                else:
+
+                    df_tmp = pd.DataFrame({str(idx): train_data['Mean_eps_reward']})
+                    reward_df = reward_df.join(df_tmp)
+                idx += 1
+
+reward_df.to_pickle(db_name + "_1250_agents_train_data.pkl")
diff --git a/experiments/GEM/viz/mongoDB_get_test_data.py b/experiments/GEM/viz/mongoDB_get_test_data.py
new file mode 100644
index 00000000..20194db6
--- /dev/null
+++ b/experiments/GEM/viz/mongoDB_get_test_data.py
@@ -0,0 +1,122 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+import sshtunnel
+from bson import ObjectId
+from plotly import tools
+from pymongo import MongoClient
+
+from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0
+
+plt_train = True
+plotly = False
+
+# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties'
+# db_name = 'DDPG_SplitActor_Best_study18_6462'
+db_name = 'GEM_I_term_4'  # 15
+# db_name = 'GEM_past' # 17
+trial = '0'
+show_episode_number = 19
+
+ret_mean_list_test = []
+ret_std_list_test = []
+i_q_delta_mean_list_test = []
+i_q_delta_std_list_test = []
+i_d_delta_mean_list_test = []
+i_d_delta_std_list_test = []
+
+with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun:
+    with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+        db = client[db_name]
+
+        for coll_name in db.list_collection_names():
+            trial = db[coll_name]
+            # trial = db.Trial_number_23
+
+            # train_data = trial.find_one({"Name": "After_Training"})
+            trial_test = trial.find_one({"Name": "Test"})
+
+            """
+            ts = 1e-4  # if ts stored: take from db
+            t_test = np.arange(0, len(trial_test['i_d_mess']) * ts, ts).tolist()
+
+            plt.plot(t_test, trial_test['i_d_mess'])
+            plt.plot(t_test, trial_test['i_d_ref'], 'r')
+            plt.plot(t_test, trial_test['i_d_ref'], 'r')
+            plt.grid()
+            plt.xlabel("t")
+            plt.ylabel("i_d")
+            plt.title(f"Test{db_name}")
+            plt.show()
+
+            plt.plot(t_test, trial_test['i_q_mess'])
+            plt.plot(t_test, trial_test['i_q_ref'], 'r')
+            plt.grid()
+            plt.xlabel("t")
+            plt.ylabel("i_q")
+            plt.title(f"Test {db_name}")
+            plt.show()
+
+            plt.plot(t_test, trial_test['action_d'])
+            if len(trial_test['integrator_sum0']) > 0:
+                plt.plot(t_test, trial_test['integrator_sum0'], 'g')
+            plt.grid()
+            plt.xlabel("t")
+            plt.ylabel("action_d (integrator_sum-g)")
+            plt.title(f"Test {db_name}")
+            plt.show()
+
+            plt.plot(t_test, trial_test['action_q'])
+            if len(trial_test['integrator_sum1']) > 0:
+                plt.plot(t_test, trial_test['integrator_sum1'], 'g')
+            plt.grid()
+            plt.xlabel("t")
+            plt.ylabel("action_q (integrator_sum-g)")
+            plt.title(f"Test {db_name}")
+            plt.show()
+
+            plt.plot(t_test, trial_test['Reward'])
+            plt.grid()
+            plt.xlabel("t")
+            plt.ylabel("reward")
+            plt.title(f"Test {db_name}")
+            plt.show()
+
+            print(np.mean(trial_test['Reward']))
+            print(np.std(trial_test['Reward']))
+            """
+
+            ret_mean_list_test.append(np.mean(trial_test['Reward']))
+            ret_std_list_test.append(np.std(trial_test['Reward']))
+            i_q_delta_mean_list_test.append(np.mean(np.array(trial_test['i_q_ref']) - np.array(trial_test['i_q_mess'])))
+            i_q_delta_std_list_test.append(np.std(np.array(trial_test['i_q_ref']) - np.array(trial_test['i_q_mess'])))
+            i_d_delta_mean_list_test.append(np.mean(np.array(trial_test['i_d_ref']) - np.array(trial_test['i_d_mess'])))
+            i_d_delta_std_list_test.append(np.std(np.array(trial_test['i_d_ref']) - np.array(trial_test['i_d_mess'])))
+
+print(ret_mean_list_test)
+print(ret_std_list_test)
+asd = 1
+results = {
+    'return_Mean': ret_mean_list_test,
+    'return_Std': ret_std_list_test,
+    'i_q_delta_Mean': i_q_delta_mean_list_test,
+    'i_q_delta_Std': i_q_delta_std_list_test,
+    'i_d_delta_Mean': i_d_delta_mean_list_test,
+    'i_d_delta_Std': i_d_delta_std_list_test,
+    'study_name': db_name}
+
+df = pd.DataFrame(results)
+df.to_pickle(db_name + "mean_over_1250_agents.pkl")
+
+m = np.array(ret_mean_list_test)
+s = np.array(ret_std_list_test)
+
+plt.plot(m)
+plt.fill_between(m - s, m + s, facecolor='r')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title(db_name)
+plt.show()
diff --git a/experiments/GEM/viz/mongoDB_plt_GEM.py b/experiments/GEM/viz/mongoDB_plt_GEM.py
new file mode 100644
index 00000000..c72253dc
--- /dev/null
+++ b/experiments/GEM/viz/mongoDB_plt_GEM.py
@@ -0,0 +1,138 @@
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+import sshtunnel
+from pymongo import MongoClient
+
+plt_train = True
+plotly = False
+
+folder_name = 'saves/compare_I_noI_4/'
+
+# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties'
+# db_name = 'DDPG_SplitActor_Best_study18_6462'
+db_name = 'GEM_I_term_4'  # 770
+db_name = 'GEM_no_I_term_4'  # 1192
+trial = '0'
+show_episode_number = 19
+
+with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun:
+    with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+        db = client[db_name]
+
+        # trial = db.Trial_number_770
+        trial = db.Trial_number_1192
+
+        train_data = trial.find_one({"Name": "After_Training"})
+        train_episode_data = trial.find_one({"Episode_number": show_episode_number})
+        trial_config = trial.find_one({"Name": "Config"})
+        trial_test = trial.find_one({"Name": "Test"})
+
+        train_reward_per_episode = train_data['Mean_eps_reward']
+
+        ax = plt.plot(train_reward_per_episode)  # [::2])
+        plt.grid()
+        plt.xlabel("Episodes")
+        plt.ylabel("Mean episode Reward")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        ts = 1e-4  # if ts stored: take from db
+        t_test = np.arange(0, len(trial_test['i_d_mess']) * ts, ts).tolist()
+
+        plt.plot(t_test, trial_test['i_d_mess'])
+        plt.plot(t_test, trial_test['i_d_ref'], 'r')
+        # plt.plot(t_test, trial_test['i_d_ref'], 'r')
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("i_d")
+        plt.title(f"Test{db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['i_q_mess'])
+        plt.plot(t_test, trial_test['i_q_ref'], 'r')
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("i_q")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['v_d_mess'][:-1])
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("v_d")
+        plt.title(f"Test{db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['v_q_mess'][:-1])
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("v_q")
+        plt.title(f"Test{db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['action_d'])
+        if len(trial_test['integrator_sum0']) > 0:
+            plt.plot(t_test, trial_test['integrator_sum0'], 'g')
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("action_d (integrator_sum-g)")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['action_q'])
+        if len(trial_test['integrator_sum1']) > 0:
+            plt.plot(t_test, trial_test['integrator_sum1'], 'g')
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("action_q (integrator_sum-g)")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['Reward'])
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("reward")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        ts = time.gmtime()
+        compare_result = {"Name": db_name,
+                          "time": ts,
+                          "i_q_mess": trial_test['i_q_mess'],
+                          "i_q_ref": trial_test['i_q_ref'],
+                          "i_d_mess": trial_test['i_d_mess'],
+                          "i_d_ref": trial_test['i_d_ref'],
+                          "v_d_mess": trial_test['v_d_mess'],
+                          "v_q_mess": trial_test['v_q_mess'],
+                          "Reward_test": trial_test['Reward'],
+                          "train_reward_per_episode": train_reward_per_episode,
+                          "info": "GEM results from testcase",
+                          }
+        store_df = pd.DataFrame([compare_result])
+        store_df.to_pickle(f'{folder_name}/_{db_name}_trial770')
+
+    if plotly:
+        plot = px.Figure()
+        plot.add_trace(
+            px.Scatter(y=train_reward_per_episode))
+
+        plot.update_layout(
+            xaxis=dict(
+                rangeselector=dict(
+                    buttons=list([
+                        dict(count=1,
+                             step="day",
+                             stepmode="backward"),
+                    ])
+                ),
+                rangeslider=dict(
+                    visible=True
+                ),
+            )
+        )
+
+        plot.show()
diff --git a/experiments/GEM/viz/mongoDB_plus_local_plt_GEM.py b/experiments/GEM/viz/mongoDB_plus_local_plt_GEM.py
new file mode 100644
index 00000000..bae435c5
--- /dev/null
+++ b/experiments/GEM/viz/mongoDB_plus_local_plt_GEM.py
@@ -0,0 +1,251 @@
+import json
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+import sshtunnel
+from bson import ObjectId
+from plotly import tools
+from pymongo import MongoClient
+
+from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0
+
+plt_train = True
+plotly = False
+local = True
+
+# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties'
+# db_name = 'DDPG_SplitActor_Best_study18_6462'
+db_name = 'GEM_I_term_3'  # 15
+# db_name = 'GEM_past' # 17
+trial = '0'
+show_episode_number = 19
+
+with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun:
+    with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+        db = client[db_name]
+
+        trial = db.Trial_number_23
+
+        if local:
+            file_train = open('I_term_3_23/GEM_I_term_3_Trial_number_23_178.json', )
+            train_data = json.load(file_train)
+            file_test = open('I_term_3_23/GEM_I_term_3_Trial_number_23_179.json', )
+            trial_test = json.load(file_test)
+        else:
+            train_data = trial.find_one({"Name": "After_Training"})
+            train_episode_data = trial.find_one({"Episode_number": show_episode_number})
+            trial_test = trial.find_one({"Name": "Test"})
+        trial_config = trial.find_one({"Name": "Config"})
+
+        train_reward_per_episode = train_data['Mean_eps_reward']
+
+        ax = plt.plot(train_reward_per_episode)  # [::2])
+        plt.grid()
+        plt.xlabel("Episodes")
+        plt.ylabel("Mean episode Reward")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        ts = 1e-4  # if ts stored: take from db
+        t_test = np.arange(0, len(trial_test['i_d_mess']) * ts, ts).tolist()
+
+        plt.plot(t_test, trial_test['i_d_mess'])
+        plt.plot(t_test, trial_test['i_d_ref'], 'r')
+        plt.plot(t_test, trial_test['i_d_ref'], 'r')
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("i_d")
+        plt.title(f"Test{db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['i_q_mess'])
+        plt.plot(t_test, trial_test['i_q_ref'], 'r')
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("i_q")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['action_d'])
+        if len(trial_test['integrator_sum0']) > 0:
+            plt.plot(t_test, trial_test['integrator_sum0'], 'g')
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("action_d (integrator_sum-g)")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        plt.plot(t_test, trial_test['action_q'])
+        if len(trial_test['integrator_sum1']) > 0:
+            plt.plot(t_test, trial_test['integrator_sum1'], 'g')
+        plt.grid()
+        plt.xlabel("t")
+        plt.ylabel("action_q (integrator_sum-g)")
+        plt.title(f"Test {db_name}")
+        plt.show()
+
+        if train_episode_data is not None:
+            # only available if loglevel == 'train'
+            ##############################################################
+            # Plot example Training Episode
+            R_load = train_episode_data['R_load_training']
+            i_a = train_episode_data['i_a_training']
+            i_b = train_episode_data['i_b_training']
+            i_c = train_episode_data['i_c_training']
+            v_a = train_episode_data['v_a_training']
+            v_b = train_episode_data['v_b_training']
+            v_c = train_episode_data['v_c_training']
+            reward = train_episode_data['Rewards']
+            phase = train_episode_data['Phase']
+
+            plt.plot(R_load)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("R_load")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(i_a)
+            plt.plot(i_b)
+            plt.plot(i_c)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("i_abc")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(v_a)
+            plt.plot(v_b)
+            plt.plot(v_c)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("v_abc")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(reward)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("Reward")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(train_episode_data['Integrator0'])
+            plt.plot(train_episode_data['Integrator1'])
+            plt.plot(train_episode_data['Integrator2'])
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("Int Zustand")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(train_episode_data['actionP0'])
+            plt.plot(train_episode_data['actionP1'])
+            plt.plot(train_episode_data['actionP2'])
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("actionP")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(train_episode_data['actionI0'])
+            plt.plot(train_episode_data['actionI1'])
+            plt.plot(train_episode_data['actionI2'])
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("actionI")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            df = pd.DataFrame()
+            df['R_load'] = R_load
+
+            hist = df['R_load'].hist(bins=50)
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+            """
+            plot = px.Figure()
+            plot.add_trace(
+                px.Scatter(y=R_load)
+            """
+            # df2['v_0_SP'] = pd.DataFrame(test_data['inverter1_v_ref_0'])
+            # df2['v_1_SP'] = pd.DataFrame(test_data['inverter1_v_ref_1'])
+            # df2['v_2_SP'] = pd.DataFrame(test_data['inverter1_v_ref_2'])
+
+            # df2['phase'] = pd.DataFrame(test_data['Phase'])
+
+            # v_sp_abc = dq0_to_abc(np.array([df2['v_0_SP'], df2['v_1_SP'], df2['v_2_SP']]), np.array(df2['phase']))
+
+            v_mess_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), np.array(phase))
+
+            # x = df2['t']
+            v_d = v_mess_dq0[0][:]  # df2['v_a']
+            v_q = v_mess_dq0[1][:]  # df2['v_b']
+            v_0 = v_mess_dq0[2][:]  # df2['v_c']
+
+            plt.plot(v_d)
+            plt.plot(v_q)
+            plt.plot(v_0)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("v_dq0")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            # v_a_SP = df2['v_0_SP']#v_sp_abc[0,:]
+            # v_b_SP = df2['v_1_SP']#v_sp_abc[1,:]
+            # v_c_SP = df2['v_2_SP']#v_sp_abc[2,:]
+
+            plot = px.Figure()
+            plot.add_trace(
+                px.Scatter(y=v_a))
+
+            plot.add_trace(
+                px.Scatter(y=v_b))
+
+            plot.add_trace(
+                px.Scatter(y=v_c))
+
+            plot.update_layout(
+                xaxis=dict(
+                    rangeselector=dict(
+                        buttons=list([
+                            dict(count=1,
+                                 step="day",
+                                 stepmode="backward"),
+                        ])
+                    ),
+                    rangeslider=dict(
+                        visible=True
+                    ),
+                )
+            )
+
+            plot.show()
+
+        ##############################################################
+        # After Training
+
+        if plotly:
+            plot = px.Figure()
+            plot.add_trace(
+                px.Scatter(y=train_reward_per_episode))
+
+            plot.update_layout(
+                xaxis=dict(
+                    rangeselector=dict(
+                        buttons=list([
+                            dict(count=1,
+                                 step="day",
+                                 stepmode="backward"),
+                        ])
+                    ),
+                    rangeslider=dict(
+                        visible=True
+                    ),
+                )
+            )
+
+            plot.show()
diff --git a/experiments/GEM/viz/plt_learningCurve.py b/experiments/GEM/viz/plt_learningCurve.py
new file mode 100644
index 00000000..6dbb6aa1
--- /dev/null
+++ b/experiments/GEM/viz/plt_learningCurve.py
@@ -0,0 +1,143 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+
+save_results = True
+folder_name = 'errorbar_plots/'
+
+# Plot setting
+params = {'backend': 'ps',
+          'text.latex.preamble': [r'\usepackage{gensymb}'
+                                  r'\usepackage{amsmath,amssymb,mathtools}'
+                                  r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                  r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+          'axes.labelsize': 12,  # fontsize for x and y labels (was 10)
+          'axes.titlesize': 12,
+          'font.size': 12,  # was 10
+          'legend.fontsize': 12,  # was 10
+          'xtick.labelsize': 12,
+          'ytick.labelsize': 12,
+          'text.usetex': True,
+          'figure.figsize': [5.5, 3.7],  # [3.9, 3.1],
+          'font.family': 'serif',
+          'lines.linewidth': 1
+          }
+
+SEC = pd.read_pickle('GEM_I_term_4_1250_agents_data_with_rewards.pkl')
+DDPG = pd.read_pickle('GEM_no_I_term_4_1250_agents_data_with_rewards.pkl')
+
+m_sec = np.array(SEC['return_Mean'])
+s_sec = np.array(SEC['return_Std'])
+
+idx_SEC_sort = np.argsort(m_sec)
+
+agents = np.arange(0, 1250)
+
+# take the best 50 and the worst 50 and and 450 random
+
+idxs = np.random.randint(low=50, high=1200, size=450)
+m_sort = np.sort(m_sec)
+m_sec_550 = np.concatenate([m_sort[0:50], m_sort[1200:1250], np.take(m_sort, idxs)])
+
+m_ddgp = np.array(DDPG['return_Mean'])
+s_ddgp = np.array(DDPG['return_Std'])
+
+idx_DDPG_sort = np.argsort(m_ddgp)
+
+# take the best 50 and the worst 50 and and 450 random
+m_sort = np.sort(m_ddgp)
+m_ddpg_550 = np.concatenate([m_sort[0:50], m_sort[1200:1250], np.take(m_sort, idxs)])
+
+if save_results:
+    matplotlib.rcParams.update(params)
+
+fig = plt.figure()
+plt.boxplot((m_sec_550, m_ddpg_550))
+plt.grid()
+plt.ylim([-1, 0])
+plt.xticks([1, 2], ['$\mathrm{SEC}$', '$\mathrm{DDPG}$'])
+plt.ylabel('$\overline{r}_{k,i}$')
+# plt.ylabel('$\overline{\sum{r_k}}$')
+plt.tick_params(direction='in')
+plt.show()
+
+if save_results:
+    fig.savefig(f'{folder_name}/GEM_Errorbar_lim.pgf')
+    fig.savefig(f'{folder_name}/GEM_Errorbar_lim.png')
+    fig.savefig(f'{folder_name}/GEM_Errorbar_lim.pdf')
+
+##########################LearningCurve###############
+
+params = {'backend': 'ps',
+          'text.latex.preamble': [r'\usepackage{gensymb}'
+                                  r'\usepackage{amsmath,amssymb,mathtools}'
+                                  r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                  r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+          'axes.labelsize': 12,  # fontsize for x and y labels (was 10)
+          'axes.titlesize': 12,
+          'font.size': 12,  # was 10
+          'legend.fontsize': 12,  # was 10
+          'xtick.labelsize': 12,
+          'ytick.labelsize': 12,
+          'text.usetex': True,
+          'figure.figsize': [5.5, 3.7],  # [3.9, 3.1],
+          'font.family': 'serif',
+          'lines.linewidth': 1
+          }
+
+matplotlib.rcParams.update(params)
+
+SEC_train_data = pd.read_pickle('GEM_I_term_4_1250_agents_train_data.pkl')
+DDPG_train_data = pd.read_pickle('GEM_no_I_term_4_1250_agents_train_data.pkl')
+
+# sort df based on test return top down
+# sort df by idx (return of test case from above) - not needed, just for doublecheck
+df_sort_sec = SEC_train_data.iloc[:, idx_SEC_sort]
+df_sort_ddpg = SEC_train_data.iloc[:, idx_SEC_sort]
+
+# get the best/worst idx2 out ouf sort_idx and snip the df to 550 based on that idx2
+idx2_ddpg = np.concatenate([idx_DDPG_sort[0:50], idx_DDPG_sort[idxs], idx_DDPG_sort[747:798]])
+ddpg550 = DDPG_train_data.iloc[:, idx2_ddpg]
+
+idx2_sec = np.concatenate([idx_SEC_sort[0:50], idx_SEC_sort[idxs], idx_SEC_sort[747:798]])
+sec550 = SEC_train_data.iloc[:, idx2_sec]
+
+DDPG_mean_learningCurve_550 = ddpg550.mean(axis=1)
+DDPG_std_learningCurve_550 = ddpg550.std(axis=1)
+
+SEC_mean_learningCurve_550 = sec550.mean(axis=1)
+SEC_std_learningCurve_550 = sec550.std(axis=1)
+
+low = (SEC_mean_learningCurve_550 - SEC_std_learningCurve_550).to_numpy()
+up = (SEC_mean_learningCurve_550 + SEC_std_learningCurve_550).to_numpy()
+SEC = SEC_mean_learningCurve_550.to_numpy()
+DDPG = DDPG_mean_learningCurve_550.to_numpy()
+episode = np.array([list(range(0, 177))]).squeeze()
+
+fig, ax = plt.subplots()
+plt.fill_between(episode, up, low, facecolor='b', alpha=0.25)
+plt.fill_between(episode, (DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550).to_numpy(),
+                 (DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550).to_numpy(), facecolor='r', alpha=0.25)
+plt.plot(episode, SEC, 'b', label='$\mathrm{SEC}$', linewidth=2)
+plt.plot(episode, low, '--b', linewidth=0.5)
+plt.plot(episode, up, '--b', linewidth=0.5)
+plt.plot(episode, DDPG, 'r', label='$\mathrm{DDPG}$', linewidth=2)
+plt.plot(episode, (DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550).to_numpy(), '--r', linewidth=0.5)
+plt.plot(episode, (DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550).to_numpy(), '--r', linewidth=0.5)
+plt.grid()
+plt.tick_params(direction='in')
+plt.legend()
+plt.xlim([0, 176])
+# plt.set_xlim([0, 10])
+plt.ylabel('$\overline{r}_{k,i}$')
+# plt.ylabel('$\overline{{r}}$')
+plt.xlabel(r'$\mathrm{Episode}$')
+plt.show()
+
+if save_results:
+    matplotlib.rcParams.update(params)
+
+    fig.savefig(f'{folder_name}/GEM_learning_curve.pgf')
+    fig.savefig(f'{folder_name}/GEM_learning_curve.png')
+    fig.savefig(f'{folder_name}/GEM_learning_curve.pdf')
diff --git a/experiments/GEM/viz/plt_pkl.py b/experiments/GEM/viz/plt_pkl.py
new file mode 100644
index 00000000..5cb5d459
--- /dev/null
+++ b/experiments/GEM/viz/plt_pkl.py
@@ -0,0 +1,96 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+
+save_results = False
+folder_name = 'errorbar_plots/'
+
+# Plot setting
+params = {'backend': 'ps',
+          'text.latex.preamble': [r'\usepackage{gensymb}'
+                                  r'\usepackage{amsmath,amssymb,mathtools}'
+                                  r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                  r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+          'axes.labelsize': 8,  # fontsize for x and y labels (was 10)
+          'axes.titlesize': 8,
+          'font.size': 10,  # was 10
+          'legend.fontsize': 10,  # was 10
+          'xtick.labelsize': 10,
+          'ytick.labelsize': 10,
+          'text.usetex': True,
+          'figure.figsize': [5.8, 3.8],  # [3.9, 3.1],
+          'font.family': 'serif',
+          'lines.linewidth': 1
+          }
+
+I_term = pd.read_pickle('GEM_I_term_4mean_over_1250_agents.pkl')
+no_I_term = pd.read_pickle('GEM_no_I_term_4mean_over_1250_agents.pkl')
+
+asd = 1
+
+m = np.array(I_term['return_Mean'])
+s = np.array(I_term['return_Std'])
+agents = np.arange(0, 1250)
+
+plt.plot(agents, m)
+plt.fill_between(agents, m - s, m + s, facecolor='r')
+plt.ylabel('Average return ')
+plt.xlabel('Agents')
+plt.ylim([-0.6, 0.2])
+plt.grid()
+plt.title('I_term')
+plt.show()
+
+plt.plot(s)
+# plt.fill_between( m - s, m + s, facecolor='r')
+plt.ylabel('Average return  sdt')
+plt.xlabel('agents')
+# plt.ylim([-0.4, 0])
+plt.grid()
+plt.title('I_term')
+plt.show()
+
+m_no_I = np.array(no_I_term['return_Mean'])
+s_no_I = np.array(no_I_term['return_Std'])
+agents = np.arange(0, 1250)
+
+plt.plot(agents, m_no_I)
+plt.fill_between(agents, m_no_I - s_no_I, m_no_I + s_no_I, facecolor='r')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Agents')
+plt.ylim([-0.6, 0.2])
+plt.grid()
+plt.title('no_I_term')
+plt.show()
+
+plt.plot(s_no_I)
+# plt.fill_between( m - s, m + s, facecolor='r')
+plt.ylabel('Average return  sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([-0.4, 0])
+plt.grid()
+plt.title('no_I_term')
+plt.show()
+
+if save_results:
+    matplotlib.rcParams.update(params)
+
+fig = plt.figure()
+plt.boxplot((m_no_I, m))
+plt.grid()
+plt.ylim([-1, 0])
+plt.xticks([1, 2], ['$\mathrm{DDPG}$', '$\mathrm{SEC}$'])
+plt.ylabel('$\overline{\sum{r_k}}$')
+plt.tick_params(direction='in')
+plt.show()
+
+if save_results:
+    fig.savefig(f'{folder_name}/GEM_Errorbar_lim.pgf')
+    fig.savefig(f'{folder_name}/GEM_Errorbar_lim.png')
+    fig.savefig(f'{folder_name}/GEM_Errorbar_lim.pdf')
+
+plt.boxplot((m, m_no_I))
+plt.grid()
+plt.xticks([1, 2], ['I', 'no-I'])
+plt.show()
diff --git a/experiments/GEM/viz/saves/compare_I_noI_4/Qualitativ_comparison.py b/experiments/GEM/viz/saves/compare_I_noI_4/Qualitativ_comparison.py
new file mode 100644
index 00000000..59552a11
--- /dev/null
+++ b/experiments/GEM/viz/saves/compare_I_noI_4/Qualitativ_comparison.py
@@ -0,0 +1,140 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+make_pyplot = False
+show_load = True
+interval_plt = True
+
+# Fuer den Detzerministc case
+interval_list_x = [[0.197, 0.206], [0.795, 0.81], [0.8025, 0.81], [0.849, 0.855]]
+interval_list_x = [[0.699, 0.705], [0.499, 0.505]]  # ,[0.8025, 0.81], [0.849, 0.855]]
+interval_list_y_q = [[-0.9, 0.9], [-1.2, 0.7], [0.55, 0.725], [-0.2, 0.67]]
+interval_list_y_d = [[-1.5, -0.1], [-1.2, 0.2], [-0.3, -0.1], [-1.1, -0.21]]
+interval_list_y_q = [[-1, 1], [-1, 1]]  # , [-1, 1], [-1, 1]]
+interval_list_y_d = [[-1, 1], [-1, 1]]  # , [-1, 1], [-1, 1]]
+
+file_names = ['_GEM_I_term_4_trial770', '_GEM_no_I_term_4_trial1192']
+ylabels = ['I', 'no-I']
+
+reward_list_DDPG = []
+
+# fig, axs = plt.subplots(len(model_names)+2, len(interval_list_y), figsize=(16, 12))  # , sharex=True)  # a new figure window
+fig, axs = plt.subplots(len(file_names) * 2 + 1, len(interval_list_y_q),
+                        figsize=(14, 10))  # , sharex=True)  # a new figure window
+
+for i in range(len(interval_list_y_q)):
+    plt_count = 1
+    ############## Subplots
+    # fig = plt.figure(figsize=(10,12))  # a new figure window
+
+    for file_name, ylabel_use in zip(file_names, ylabels):
+
+        df_DDPG = pd.read_pickle(file_name)
+        # df_DDPG = pd.read_pickle(folder_name + '/' 'model_5_pastVals.zip_100000steps_NoPhaseFeature_1427')
+
+        ts = 1e-4  # if ts stored: take from db
+        t_test = np.arange(0, len(df_DDPG['i_d_mess'][0]) * ts, ts).tolist()
+
+        Name = df_DDPG['Name'].tolist()[0]
+        reward = df_DDPG['Reward_test'].tolist()[0]
+
+        if plt_count == 1:
+            axs[0, i].plot(t_test, reward, 'b', label=f'      {Name}: '
+                                                      f'{round(sum(reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+        else:
+            axs[0, i].plot(t_test, reward, 'r', label=f'{Name}: '
+                                                      f'{round(sum(reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+        axs[0, i].grid()
+        axs[0, i].set_xlim(interval_list_x[i])
+        # axs[0, i].set_ylim(interval_list_y[i])
+        axs[0, i].legend()
+        if 0 == 0:
+            axs[0, i].set_ylabel("Reward")
+
+        axs[plt_count, i].plot(t_test, df_DDPG['i_q_mess'].tolist()[0], 'b', label='i_q')
+        axs[plt_count, i].plot(t_test, df_DDPG['i_q_ref'].tolist()[0], 'r', label='i_q_ref')
+        axs[plt_count, i].grid()
+        axs[plt_count, i].legend()
+        axs[plt_count, i].set_xlim(interval_list_x[i])
+        axs[plt_count, i].set_ylim(interval_list_y_q[i])
+        axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+        if i == 0:
+            # axs[plt_count, i].set_ylabel(pV)
+            axs[plt_count, i].set_ylabel(Name)
+            # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$")
+        # else:
+        #    axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$")
+        plt_count += 1
+
+        axs[plt_count, i].plot(t_test, df_DDPG['i_d_mess'].tolist()[0], 'b', label='i_d')
+        axs[plt_count, i].plot(t_test, df_DDPG['i_d_ref'].tolist()[0], 'r', label='i_d_ref')
+        axs[plt_count, i].grid()
+        axs[plt_count, i].legend()
+        axs[plt_count, i].set_xlim(interval_list_x[i])
+        axs[plt_count, i].set_ylim(interval_list_y_d[i])
+        axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+        if i == 0:
+            # axs[plt_count, i].set_ylabel(pV)
+            axs[plt_count, i].set_ylabel(Name)
+            # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$")
+        # else:
+        #    axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$")
+        plt_count += 1
+        """
+        axs[plt_count, i].plot(t_test, df_DDPG['v_d_mess'].tolist()[0][:-1], 'b', label='v_d')
+        axs[plt_count, i].plot(t_test, df_DDPG['v_q_mess'].tolist()[0][:-1], 'r', label='v_q')
+        axs[plt_count, i].grid()
+        axs[plt_count, i].legend()
+        axs[plt_count, i].set_xlim(interval_list_x[i])
+        #axs[plt_count, i].set_ylim(interval_list_y_d[i])
+        axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+        if i == 0:
+            # axs[plt_count, i].set_ylabel(pV)
+            axs[plt_count, i].set_ylabel(Name)
+            # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$")
+        # else:
+        #    axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$")
+        plt_count += 1
+        """
+
+"""
+fig.suptitle(f'Model using pastVals:' + str(pastVals) + ' \n '
+                                                        f'Model-return(MRE)' + str(return_list_DDPG) + ' \n'
+                                                                                                          f'  PI-return(MRE):     {round(return_PI, 7)} \n '
+                                                                                                          f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}',
+             fontsize=14)
+"""
+fig.subplots_adjust(wspace=0.4, hspace=0.4)
+plt.show()
+
+fig.savefig(f'/Ausschnitt.pdf')
+
+"""
+plt.plot(t_test, trial_test['i_d_mess'])
+plt.plot(t_test, trial_test['i_d_ref'], 'r')
+plt.plot(t_test, trial_test['i_d_ref'], 'r')
+plt.grid()
+plt.xlabel("t")
+plt.ylabel("i_d")
+plt.title(f"Test{db_name}")
+plt.show()
+
+plt.plot(t_test, df_DDPG['i_q_mess'].tolist())
+plt.plot(t_test, df_DDPG['i_q_ref'].tolist(), 'r')
+plt.grid()
+plt.xlabel("t")
+plt.ylabel("i_q")
+plt.title(f"Test {Name}")
+plt.show()
+
+plt.plot(t_test, trial_test['Reward'])
+plt.grid()
+plt.xlabel("t")
+plt.ylabel("reward")
+plt.title(f"Test {db_name}")
+plt.show()
+"""
diff --git a/experiments/GEM/viz/saves/compare_I_noI_4/paper_lpt_single.py b/experiments/GEM/viz/saves/compare_I_noI_4/paper_lpt_single.py
new file mode 100644
index 00000000..1c33d86b
--- /dev/null
+++ b/experiments/GEM/viz/saves/compare_I_noI_4/paper_lpt_single.py
@@ -0,0 +1,78 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+save_results = False
+
+# Fuer den 10s Fall
+interval_list_x = [0.498, 0.505]
+interval_list_y = [80, 345]
+
+if save_results:
+    # Plot setting
+    params = {'backend': 'ps',
+              'text.latex.preamble': [r'\usepackage{gensymb}'
+                                      r'\usepackage{amsmath,amssymb,mathtools}'
+                                      r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                      r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+              'axes.labelsize': 10,  # fontsize for x and y labels (was 10)
+              'axes.titlesize': 10,
+              'font.size': 10,  # was 10
+              'legend.fontsize': 10,  # was 10
+              'xtick.labelsize': 10,
+              'ytick.labelsize': 10,
+              'text.usetex': True,
+              'figure.figsize': [4.5, 4.6],  # [5.4, 6],#[3.9, 3.1],
+              'font.family': 'serif',
+              'lines.linewidth': 1
+              }
+    matplotlib.rcParams.update(params)
+
+folder_name = 'saves/'  # _deterministic'
+
+df_DDPG = pd.read_pickle('_GEM_no_I_term_4_trial1192')
+df_DDPG_I = pd.read_pickle('_GEM_I_term_4_trial770')
+
+ts = 1e-4
+t_test = np.arange(0, len(df_DDPG['i_d_mess'][0]) * ts, ts).tolist()
+
+# fig, axs = plt.subplots(len(model_names) + 4, len(interval_list_y),
+fig = plt.figure()
+
+iq_I = df_DDPG_I['i_q_mess']
+
+fig, axs = plt.subplots(2, 1)
+axs[1].plot(t_test, [i * 160 * 1.41 for i in df_DDPG_I['i_q_mess'].tolist()[0]], 'r', label='$\mathrm{SEC}$')
+axs[1].plot(t_test, [i * 160 * 1.41 for i in df_DDPG['i_q_mess'].tolist()[0]], '--r', label='$\mathrm{DDPG}_\mathrm{}$')
+axs[1].plot(t_test, [i * 160 * 1.41 for i in df_DDPG_I['i_q_ref'].tolist()[0]], ':', color='gray',
+            label='$\mathrm{i}_\mathrm{q}^*$')
+axs[1].grid()
+# axs[1].legend()
+axs[1].set_xlim(interval_list_x)
+axs[1].set_ylim([-0.5 * 160 * 1.41, 0.55 * 160 * 1.41])
+# axs[0].set_xlabel(r'$t\,/\,\mathrm{s}$')
+axs[1].set_xlabel(r'$t\,/\,\mathrm{s}$')
+axs[1].set_ylabel("$i_{\mathrm{q}}\,/\,{\mathrm{A}}$")
+axs[1].tick_params(direction='in')
+
+axs[0].plot(t_test, [i * 160 * 1.41 for i in df_DDPG_I['i_d_mess'].tolist()[0]], 'b', label='$\mathrm{SEC}_\mathrm{}$')
+axs[0].plot(t_test, [i * 160 * 1.41 for i in df_DDPG['i_d_mess'].tolist()[0]], '--b', label='$\mathrm{DDPG}_\mathrm{}$')
+axs[0].plot(t_test, [i * 160 * 1.41 for i in df_DDPG_I['i_d_ref'].tolist()[0]], ':', color='gray',
+            label='$i_\mathrm{}^*$')
+axs[0].grid()
+axs[0].legend()
+axs[0].set_xlim(interval_list_x)
+axs[0].set_ylim([-0.78 * 160 * 1.41, 0.05 * 160 * 1.41])
+axs[0].tick_params(axis='x', colors='w')
+axs[0].set_ylabel("$i_{\mathrm{d}}\,/\,{\mathrm{A}}$")
+axs[0].tick_params(direction='in')
+fig.subplots_adjust(wspace=0, hspace=0.05)
+plt.show()
+
+if save_results:
+    fig.savefig(f'{folder_name}/GEM_DDPG_I_noI_idq.pgf')
+    fig.savefig(f'{folder_name}/GEM_DDPG_I_noI_idq.png')
+    fig.savefig(f'{folder_name}/GEM_DDPG_I_noI_idq.pdf')
diff --git a/experiments/P10/env/env_wrapper_P10.py b/experiments/P10/env/env_wrapper_P10.py
new file mode 100644
index 00000000..e70569ca
--- /dev/null
+++ b/experiments/P10/env/env_wrapper_P10.py
@@ -0,0 +1,688 @@
+import platform
+from functools import partial
+from typing import Union
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.type_aliases import GymStepReturn
+from stochastic.processes import VasicekProcess
+
+from experiments.P10.env.random_load_P10 import RandomLoad
+from experiments.P10.env.vctrl_single_inv_P10 import net
+from experiments.P10.util.config import cfg
+from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc, abc_to_dq0, Fastqueue, RandProcess
+
+
+class BaseWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000,
+                 recorder=None, n_trail="", gamma=0,
+                 number_learing_steps=500000, number_past_vals=0):
+        """
+        Base Env Wrapper to add features to the env-observations and adds information to env.step output which can be
+        used in case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        # increase action-space for PI-seperation
+        # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1))
+
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_a = []
+        self.i_b = []
+        self.i_c = []
+        self.v_a = []
+        self.v_b = []
+        self.v_c = []
+        self._v_pahsor = 0.0
+        self.n_episode = 0
+        self.R_training = []
+        self.i_phasor_training = []
+        self.v_phasor_training = []
+        self.reward_episode_mean = []
+        self.n_trail = n_trail
+        self.phase = []
+        self.used_P = np.zeros(self.action_space.shape)
+        self.gamma = gamma
+        self.number_learing_steps = number_learing_steps
+        self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)]
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+
+        if cfg['is_dq0']:
+            # Action: dq0 -> abc
+            action_abc = dq0_to_abc(action, self.env.net.components[0].phase)
+        else:
+            action_abc = action
+
+        # check if m_abc will be clipped
+        if np.any(abs(action_abc) > 1):
+
+            clipped_action = np.clip(action_abc, -1, 1)
+
+            delta_action = clipped_action - action_abc
+            # if, reduce integrator by clipped delta
+            action_delta = abc_to_dq0(delta_action, self.env.net.components[0].phase)
+            self.integrator_sum += action_delta * self.antiwindup_weight
+
+            clip_reward = np.clip(np.sum(np.abs(delta_action) * \
+                                         (-1 / (self.env.net.components[0].v_lim / self.env.net.components[
+                                             0].v_DC))) / 3 * (1 - self.gamma),
+                                  -1, 0)
+
+            # clip_reward = 0
+
+            action_abc = clipped_action
+
+        else:
+            clip_reward = 0
+
+        obs, reward, done, info = super().step(action_abc)
+
+        reward = reward + clip_reward
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        super().render()
+
+        self._n_training_steps += 1
+
+        # if self._n_training_steps % round(self.training_episode_length / 10) == 0:
+        #    self.env.on_episode_reset_callback()
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+            super().close()
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "R_load_training": self.R_training,
+                                "i_phasor_training": self.i_phasor_training,
+                                "i_a_training": self.i_a,
+                                "i_b_training": self.i_b,
+                                "i_c_training": self.i_c,
+                                "v_a_training": self.v_a,
+                                "v_b_training": self.v_b,
+                                "v_c_training": self.v_c,
+                                "v_phasor_training": self.v_phasor_training,
+                                "Rewards": self.rewards,
+                                "Phase": self.phase,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": cfg['STUDY_NAME'],
+                                "Reward function": 'rew.rew_fun_dq0',
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.R_training = []
+                self.i_phasor_training = []
+                self.v_phasor_training = []
+                self.i_a = []
+                self.i_b = []
+                self.i_c = []
+                self.v_a = []
+                self.v_b = []
+                self.v_c = []
+                self.phase = []
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Features
+        """
+        error = (obs[6:9] - obs[3:6]) / 2  # control error: v_setpoint - v_mess
+        obs = np.append(obs, error)
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+
+        # todo efficiency?
+        self.used_P = np.copy(action)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        [x.clear() for x in self.delay_queues]
+        obs = super().reset()
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        self._n_training_steps = 0
+        self.used_P = np.zeros(self.action_space.shape)
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Features
+        """
+        error = (obs[6:9] - obs[3:6]) / 2  # control error: v_setpoint - v_mess
+        obs = np.append(obs, error)
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs
+
+    def shift_and_append(self, obs):
+        """
+        Takes the observation and shifts throught the queue
+        every queue output is added to total obs
+        """
+        obs_delay_array = np.array([])
+        obs_temp = obs
+        for queue in self.delay_queues:
+            obs_temp = queue.shift(obs_temp)
+            obs_delay_array = np.append(obs_delay_array, obs_temp)
+
+        return obs_delay_array
+
+
+class FeatureWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0,
+                 penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0,
+                 number_learing_steps=500000):  # , use_past_vals=False, number_past_vals=0):
+        """
+        Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in
+        case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        # increase action-space for PI-seperation
+        # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1))
+
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_a = []
+        self.i_b = []
+        self.i_c = []
+        self.v_a = []
+        self.v_b = []
+        self.v_c = []
+        self._v_pahsor = 0.0
+        self.n_episode = 0
+        self.R_training = []
+        self.i_phasor_training = []
+        self.v_phasor_training = []
+        self.reward_episode_mean = []
+        self.reward_plus_addon_episode_mean = []
+        self.n_trail = n_trail
+        self.phase = []
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.integrator_weight = integrator_weight
+        self.antiwindup_weight = antiwindup_weight
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+        self.gamma = gamma
+        self.penalty_I_weight = penalty_I_weight
+        self.penalty_P_weight = penalty_P_weight
+        self.t_start_penalty_I = t_start_penalty_I
+        self.t_start_penalty_P = t_start_penalty_P
+        self.number_learing_steps = number_learing_steps
+        self.integrator_sum_list0 = []
+        self.integrator_sum_list1 = []
+        self.integrator_sum_list2 = []
+        self.action_P0 = []
+        self.action_P1 = []
+        self.action_P2 = []
+        self.action_I0 = []
+        self.action_I1 = []
+        self.action_I2 = []
+        self.rew = []
+        self.rew_sum = []
+        self.penaltyP = []
+        self.penaltyI = []
+        self.clipped_rew = []
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+        action_P = action[0:3]
+        action_I = action[3:6]
+
+        self.integrator_sum += action_I * self.integrator_weight
+
+        action_PI = action_P + self.integrator_sum
+
+        if cfg['is_dq0']:
+            # Action: dq0 -> abc
+            action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase)
+
+        # check if m_abc will be clipped
+        if np.any(abs(action_abc) > 1):
+
+            clipped_action = np.clip(action_abc, -1, 1)
+
+            delta_action = clipped_action - action_abc
+            # if, reduce integrator by clipped delta
+            action_delta = abc_to_dq0(delta_action, self.env.net.components[0].phase)
+            self.integrator_sum += action_delta * self.antiwindup_weight
+
+            clip_reward = np.clip(np.sum(np.abs(delta_action) * \
+                                         (-1 / (self.env.net.components[0].v_lim / self.env.net.components[
+                                             0].v_DC / 2))) / 3,
+                                  -1, 0) * (1 - self.gamma)
+
+            # clip_reward = 0
+
+            action_abc = clipped_action
+
+        else:
+            clip_reward = 0
+
+        obs, reward, done, info = super().step(action_abc)
+
+        # reward = reward + clip_reward shifted to reward sum
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        super().render()
+
+        integrator_penalty = np.sum(-((np.abs(action_I)) ** 0.5)) * (1 - self.gamma) / 3
+        # action_P_penalty = - np.sum((np.abs(action_P - self.used_P)) ** 0.5) * (1 - self.gamma) / 3
+        action_P_penalty = np.sum(-((np.abs(action_P)) ** 0.5)) * (1 - self.gamma) / 3
+
+        # reward_weight is = 1
+
+        if self.total_steps > self.t_start_penalty_I:
+            penalty_I_weight_scale = 1 / (self.t_start_penalty_I - self.number_learing_steps) * self.total_steps - \
+                                     self.number_learing_steps / (self.t_start_penalty_I - self.number_learing_steps)
+
+        else:
+            penalty_I_weight_scale = 1
+
+        if self.total_steps > self.t_start_penalty_P:
+            penalty_P_weight_scale = 1 / (self.t_start_penalty_P - self.number_learing_steps) * self.total_steps - \
+                                     self.number_learing_steps / (self.t_start_penalty_P - self.number_learing_steps)
+
+        else:
+
+            penalty_P_weight_scale = 1
+
+        lam_P = self.penalty_P_weight * penalty_P_weight_scale
+        lam_I = self.penalty_I_weight * penalty_I_weight_scale
+
+        if cfg['loglevel'] == 'debug_reward':
+            self.rew.append(reward)
+            self.penaltyP.append(lam_P * action_P_penalty)
+            self.penaltyI.append(lam_I * integrator_penalty)
+            self.clipped_rew.append(clip_reward)
+
+        if reward > -1:
+            # if reward = -1, env is abort, worst reward = -1, if not, sum up components:
+            reward_sum = (reward + clip_reward + lam_I * integrator_penalty + lam_P * action_P_penalty)
+
+            # normalize r_sum between [-1, 1] from [-1-lam_P-lam_I, 1] using min-max normalization from
+            # https://en.wikipedia.org/wiki/Feature_scaling
+
+            reward = 2 * (reward_sum + 1 + lam_P + lam_I) / (1 + 1 + lam_P + lam_I) - 1
+
+        self._n_training_steps += 1
+
+        # if self._n_training_steps % round(self.training_episode_length / 10) == 0:
+        #    self.env.on_episode_reset_callback()
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+            super().close()
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+            self.integrator_sum_list0.append(self.integrator_sum[0])
+            self.integrator_sum_list1.append(self.integrator_sum[1])
+            self.integrator_sum_list2.append(self.integrator_sum[2])
+            self.action_P0.append(np.float64(action_P[0]))
+            self.action_P1.append(np.float64(action_P[1]))
+            self.action_P2.append(np.float64(action_P[2]))
+            self.action_I0.append(np.float64(action_I[0]))
+            self.action_I1.append(np.float64(action_I[1]))
+            self.action_I2.append(np.float64(action_I[2]))
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Features
+        """
+        error = (obs[6:9] - obs[3:6]) / 2  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        # obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        # obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, self.used_action)
+
+        # todo efficiency?
+        self.used_P = np.copy(action_P)
+        self.used_I = np.copy(self.integrator_sum)
+        # self.used_P = action_P
+        # self.used_I = self.integrator_sum
+
+        self.rew_sum.append(reward)
+
+        if done:
+            # log train curve with additional rewards:
+            self.reward_plus_addon_episode_mean.append(np.mean(self.rew_sum))
+            # log train curve with raw env-reward:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                reward_Data = {'Reward_env': self.rew,
+                               'penaltyP': self.penaltyP,
+                               'penaltyI': self.penaltyI,
+                               'clipped_rew': self.clipped_rew,
+                               "Trial number": self.n_trail,
+                               "Database name": cfg['STUDY_NAME'],
+                               }
+
+                self.recorder.save_to_json('Trial_number_' + self.n_trail, reward_Data)
+
+                self.rew = []
+                self.penaltyP = []
+                self.penaltyI = []
+                self.clipped_rew = []
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "R_load_training": self.R_training,
+                                "i_phasor_training": self.i_phasor_training,
+                                "i_a_training": self.i_a,
+                                "i_b_training": self.i_b,
+                                "i_c_training": self.i_c,
+                                "v_a_training": self.v_a,
+                                "v_b_training": self.v_b,
+                                "v_c_training": self.v_c,
+                                "v_phasor_training": self.v_phasor_training,
+                                "Rewards": self.rewards,
+                                "Phase": self.phase,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": cfg['STUDY_NAME'],
+                                "Reward function": 'rew.rew_fun_dq0',
+                                'Integrator0': self.integrator_sum_list0,
+                                'Integrator1': self.integrator_sum_list1,
+                                'Integrator2': self.integrator_sum_list2,
+                                'actionP0': self.action_P0,
+                                'actionP1': self.action_P1,
+                                'actionP2': self.action_P2,
+                                'actionI0': self.action_I0,
+                                'actionI1': self.action_I1,
+                                'actionI2': self.action_I2
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.R_training = []
+                self.i_phasor_training = []
+                self.v_phasor_training = []
+                self.i_a = []
+                self.i_b = []
+                self.i_c = []
+                self.v_a = []
+                self.v_b = []
+                self.v_c = []
+                self.phase = []
+
+            # if self._n_training_steps > 500:
+            super().close()
+            # plt.plot(self.integrator_sum_list0)
+            # plt.plot(self.integrator_sum_list1)
+            # plt.plot(self.integrator_sum_list2)
+            # plt.ylabel('intergratorzustand')
+            # plt.show()
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        obs = super().reset()
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        self._n_training_steps = 0
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+        """
+        Features
+        """
+        error = (obs[6:9] - obs[3:6]) / 2  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        # obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        # obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+
+        # obs = np.append(obs, delta_i_lim_i_phasor)
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, self.used_action)
+
+        return obs
+
+
+class FeatureWrapper_pastVals(FeatureWrapper):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 500000,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0,
+                 penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0,
+                 number_learing_steps=500000, number_past_vals=10):
+        """
+        Env Wrapper which adds the number_past_vals voltage ([3:6]!!!) observations to the observations.
+        Initialized with zeros!
+        """
+        super().__init__(env, number_of_features, training_episode_length,
+                         recorder, n_trail, integrator_weight, antiwindup_weight, gamma,
+                         penalty_I_weight, penalty_P_weight, t_start_penalty_I, t_start_penalty_P,
+                         number_learing_steps)
+
+        # self.observation_space = gym.spaces.Box(
+        #    low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+        #    high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)]
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        obs, reward, done, info = super().step(action)
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        [x.clear() for x in self.delay_queues]
+        obs = super().reset()
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs
+
+    def shift_and_append(self, obs):
+        """
+        Takes the observation and shifts throught the queue
+        every queue output is added to total obs
+        """
+        obs_delay_array = np.array([])
+        obs_temp = obs
+        for queue in self.delay_queues:
+            obs_temp = queue.shift(obs_temp)
+            obs_delay_array = np.append(obs_delay_array, obs_temp)
+
+        return obs_delay_array
diff --git a/experiments/P10/env/random_load_P10.py b/experiments/P10/env/random_load_P10.py
new file mode 100644
index 00000000..7300e489
--- /dev/null
+++ b/experiments/P10/env/random_load_P10.py
@@ -0,0 +1,122 @@
+import numpy as np
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import RandProcess
+
+
+class RandomLoad:
+    def __init__(self, train_episode_length: int, ts: float, rand_process: RandProcess, loadstep_time: int = None,
+                 load_curve: pd.DataFrame = None, bounds=None, bounds_std=None):
+        """
+
+        :param max_episode_steps: number of steps per training episode (can differ from env.max_episode_steps)
+        :param ts: sampletime of env
+        :param rand_pocess: Instance of random process defines noise added to load
+        :param loadstep_time: number of env step where load step should happen
+        :param load_curve: Stored load data to sample from instead of smaple from distribution
+        :param bounds: Bounds to clip the sampled load data
+        :param bounds_std: Chosen bounds are sampled from a distribution with std=bounds_std and mean=bounds
+
+        """
+        self.train_episode_length = train_episode_length
+        self.ts = ts
+        self.rand_process = rand_process
+        if loadstep_time is None:
+            self.loadstep_time = np.random.randint(0, self.train_episode_length)
+        else:
+            self.loadstep_time = loadstep_time
+        self.load_curve = load_curve
+        if bounds is None:
+            self.bounds = (-np.inf, np.inf)
+        else:
+            self.bounds = bounds
+        if bounds_std is None:
+            self.bounds_std = (0, 0)
+        else:
+            self.bounds_std = bounds_std
+
+        self.lowerbound_std = 0
+        self.upperbound_std = 0
+
+    def reset(self, loadstep_time=None):
+        if loadstep_time is None:
+            self.loadstep_time = np.random.randint(0, self.train_episode_length)
+        else:
+            self.loadstep_time = loadstep_time
+
+    def clipped_step(self, t):
+        return np.clip(self.rand_process.sample(t),
+                       self.bounds[0] + self.lowerbound_std,
+                       self.bounds[1] + self.upperbound_std
+                       )
+
+    def give_dataframe_value(self, t, col):
+        """
+        Gives load values from a stored dataframe (self.load_curve)
+        :parma t: time - represents here the row of the dataframe
+        :param col: colon name of the dataframe (typically str)
+        """
+        if t < 0:
+            # return None
+            return self.load_curve[col][0]
+        if self.load_curve is None:
+            raise ValueError('No dataframe given! Please feed load class (.load_curve) with data')
+        return self.load_curve[col][int(t / self.ts)]
+
+    def random_load_step(self, t, event_prob: int = 2, step_prob: int = 50):
+        """
+        Changes the load parameters applying a loadstep with 0.2% probability which is a pure step with 50 %
+        probability otherwise a drift. In every event the random process variance is drawn randomly [1, 150].
+        :param t: time
+        :param event_prob: probability (in pre mill) that the step event is triggered in the current step
+        :param step_prob: probability (in pre cent) that event is a abrupt step (drift otherwise!, random process speed
+                          not adjustable yet
+        :return: Sample from SP
+        """
+        # Changes rand process data with probability of 5% and sets new value randomly
+        if np.random.randint(0, 1001) < 2:
+
+            gain = np.random.randint(self.rand_process.bounds[0], self.rand_process.bounds[1])
+
+            self.rand_process.proc.mean = gain
+            self.rand_process.proc.vol = np.random.randint(1, 150)
+            self.rand_process.proc.speed = np.random.randint(10, 1200)
+            # define sdt for clipping once every event
+            # np.maximum to not allow negative values
+            self.lowerbound_std = np.maximum(np.random.normal(scale=self.bounds_std[0]), 0.0001)
+            self.upperbound_std = np.random.normal(scale=self.bounds_std[1])
+
+            # With 50% probability do a step or a drift
+            if np.random.randint(0, 101) < 50:
+                # step
+                self.rand_process.reserve = gain
+
+            else:
+                # drift -> Lower speed to allow
+                self.rand_process.proc.speed = np.random.randint(10, 100)
+
+        return np.clip(self.rand_process.sample(t),
+                       self.bounds[0] + self.lowerbound_std,
+                       self.bounds[1] + self.upperbound_std
+                       )
+
+    def do_change(self, event_prob_permill=2, step_prob_percent=50):
+        if np.random.randint(0, 1001) < event_prob_permill:
+
+            gain = np.random.randint(self.rand_process.bounds[0], self.rand_process.bounds[1])
+
+            self.rand_process.proc.mean = gain
+            self.rand_process.proc.vol = np.random.randint(1, 150)
+            self.rand_process.proc.speed = np.random.randint(10, 1200)
+            # define sdt for clipping once every event
+            self.lowerbound_std = np.random.normal(scale=self.bounds_std[0])
+            self.upperbound_std = np.random.normal(scale=self.bounds_std[1])
+
+            # With 50% probability do a step or a drift
+            if np.random.randint(0, 101) < step_prob_percent:
+                # step
+                self.rand_process.reserve = gain
+
+            else:
+                # drift -> Lower speed to allow
+                self.rand_process.proc.speed = np.random.randint(10, 100)
diff --git a/experiments/P10/env/rewards_P10.py b/experiments/P10/env/rewards_P10.py
new file mode 100644
index 00000000..bca03916
--- /dev/null
+++ b/experiments/P10/env/rewards_P10.py
@@ -0,0 +1,132 @@
+import numpy as np
+from openmodelica_microgrid_gym.util import nested_map, abc_to_dq0, dq0_to_abc
+from typing import List
+
+
+class Reward:
+    def __init__(self, nom, lim, v_DC, gamma, det_run=False, nom_region: float = 1.1, use_gamma_normalization=1,
+                 error_exponent: float = 1.0, i_lim: float = np.inf, i_nom: float = np.inf, i_exponent: float = 1.0):
+        """
+
+        :param nom: Nominal value for the voltage
+        :param lim: Limit value for the voltage
+        :param v_DC: DC-Link voltage
+        :param gamma: Discount factor to map critic values -> [-1, 1]
+        :param use_gamma_normalization: if 0 normalization depending on gamma is not used
+        :param nom_region: Defines cliff in the reward landscape where the reward is pulled down because the nominal
+                           value is exceeded. nom_region defines how much the nominal value can be exceeded before
+                           the cliff (e.g. 1.1 -> cliff @ 1.1*self.nom
+        :param error_exponent: defines the used error-function: E.g.: 1 -> Mean absolute error
+                                                                2 -> Mean squared error
+                                                              0.5 -> Mean root error
+        :param i_lim: Limit value for the current
+        :param i_nom: Nominal value for the current
+        """
+        self._idx = None
+        self.nom = nom
+        self.lim = lim
+        self.v_DC = v_DC
+        self.use_gamma_normalization = use_gamma_normalization
+        if self.use_gamma_normalization == 1:
+            self.gamma = gamma
+        else:
+            self.gamma = 0
+        self.det_run = det_run
+        self.nom_region = nom_region
+        self.exponent = error_exponent
+        self.i_lim = i_lim
+        self.i_nom = i_nom
+        self.i_exponent = i_exponent
+
+    def set_idx(self, obs):
+        if self._idx is None:
+            self._idx = nested_map(
+                lambda n: obs.index(n),
+                [[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'],
+                 [f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012'],
+                 'inverter1.phase.0'])
+
+    def rew_fun_dq0(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        uses the same reward for voltage like defined above
+
+         If v_lim is exceeded, episode abort -> env.abort_reward (should be -1) is given back
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        phase = data[idx[4]]
+
+        vdq0_master = abc_to_dq0(data[idx[2]], phase)  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        vsp_dq0_master = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        # SP = vsp_dq0_master * self.lim
+        # mess = vdq0_master * self.lim
+
+        if any(np.abs(data[idx[2]]) > 1):
+            if self.det_run:
+                return -(1 - self.gamma)
+            else:
+                return
+        else:
+            # rew = np.sum(1 - (2 * (np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3  # /2
+
+            # / 2 da normiert auf v_lim = 1, max abweichung 2*vlim=2, damit worst case bei 0
+            rew = np.sum(1 - ((np.abs(vsp_dq0_master - vdq0_master) / 2) ** self.exponent)) * (1 - self.gamma) / 3  # /2
+        """
+        rew = np.sum(-((np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3  # /2
+        """
+        return rew
+
+    def rew_fun_PIPI_MRE(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        uses the same reward for voltage like defined above but also includes reward depending on the current
+        If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage
+        Before r_voltage is scaled to the region [0,1]:
+         - r_voltage = (r_voltage+1)/2
+         - r = r_voltage * r_current
+         - r = r-1
+
+         If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        i_mess = data[idx[0]]  # 3 phase currents at LC inductors
+        mess = data[idx[2]]  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        isp_abc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+        SP = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        # SP = vsp_dq0_master * self.lim
+        # mess = vdq0_master * self.lim
+
+        # rew = np.sum(-((np.abs(SP - mess)) ** 0.5)) * (1 - self.gamma) / 3
+
+        phase = data[idx[4]]
+
+        vdq0_master = abc_to_dq0(data[idx[2]], phase) / self.lim  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        vsp_dq0_master = abc_to_dq0(data[idx[3]],
+                                    phase) / self.lim  # convert dq set-points into three-phase abc coordinates
+
+        # SP = vsp_dq0_master * self.lim
+        # mess = vdq0_master * self.lim
+
+        # rew = np.sum(-((np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3
+
+        rew = np.sum(1 - ((np.abs(vsp_dq0_master - vdq0_master) / 2) ** self.exponent)) * (1 - self.gamma) / 3
+
+        return rew
diff --git a/experiments/P10/env/vctrl_single_inv_P10.py b/experiments/P10/env/vctrl_single_inv_P10.py
new file mode 100644
index 00000000..96c36ffc
--- /dev/null
+++ b/experiments/P10/env/vctrl_single_inv_P10.py
@@ -0,0 +1,297 @@
+from datetime import datetime, time
+from functools import partial
+from itertools import accumulate
+from os import makedirs
+
+import time
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from stochastic.processes import VasicekProcess
+
+from experiments.P10.env.random_load_P10 import RandomLoad
+
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+
+from openmodelica_microgrid_gym.util import RandProcess
+from gym.envs.registration import register
+from experiments.P10.util.config import cfg
+
+folder_name = cfg['STUDY_NAME']  # 'DDPG_MRE_sqlite_PC2'
+# experiment_name = 'DDPG_VC_Reward_MRE_reward_NOT_NORMED'
+experiment_name = 'plots'
+timestamp = datetime.now().strftime(f'_%Y.%b.%d_%X')
+
+makedirs(folder_name, exist_ok=True)
+# makedirs(folder_name + experiment_name, exist_ok=True)
+
+
+# Simulation definitions
+if not cfg['is_dq0']:
+    # load net using abc reference values
+    print('abc not implemented yet! dq0 is used!!!!')
+    net = Network.load('net/net_p10.yaml')
+else:
+    # load net using dq0 reference values
+
+    net = Network.load('net/net_p10.yaml')
+
+# set high to not terminate env! Termination should be done in wrapper by env after episode-length-HP
+max_episode_steps = 1500000  # net.max_episode_steps  # number of simulation steps per episode
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+
+# plant
+
+print('Using P10 setting')
+L_filter = 70e-6  # / H
+R_filter = 1.1e-3  # / Ohm
+R_filter_C = 7e-3  # / Ohm
+C_filter = 250e-6  # / F
+lower_bound_load_clip = 1  # to allow maximal load that draws i_limit (let exceed?)
+lower_bound_load_clip_std = 1
+
+"""
+old TB:
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+# R = 40  # nomVoltPeak / 7.5   # / Ohm
+#lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+#lower_bound_load_clip_std = 2
+"""
+
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+upper_bound_load_clip_std = 0
+R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                  bounds=(lower_bound_load, upper_bound_load))
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+# if save needed in dependence of trial ( -> foldername) shift to executive file?
+def xylables_i(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+    ax.grid(which='both')
+    # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+    plt.close()
+
+
+def xylables_v(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+    ax.grid(which='both')
+    # ax.set_xlim([0, 0.005])
+    ts = time.gmtime()
+    # fig.savefig(
+    #    f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.close()
+
+
+def xylables_R(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+    ax.grid(which='both')
+    # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+    # ts = time.gmtime()
+    # fig.savefig(f'{folder_name + experiment_name}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.close()
+
+
+# toDo: train_episode_length not needed in used methods, reset not as well?
+rand_load_train = RandomLoad(5000, net.ts, gen,
+                             bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                             bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+
+cb = CallbackList()
+# set initial = None to reset load random in range of bounds
+cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+cb.append(rand_load_train.reset)
+
+register(id='vctrl_single_inv_train-v0',
+         entry_point='openmodelica_microgrid_gym.env:ModelicaEnv',
+         kwargs=dict(  # reward_fun=rew.rew_fun,
+             viz_cols=[
+                 PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                          callback=xylables_v,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                          callback=xylables_i,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                          callback=xylables_R,
+                          color=[['b', 'r', 'g']],
+                          style=[[None]]
+                          )
+             ],
+             viz_mode='episode',
+             max_episode_steps=max_episode_steps,
+             model_params={'lc.resistor1.R': R_filter,
+                           'lc.resistor2.R': R_filter,
+                           'lc.resistor3.R': R_filter,
+                           'lc.resistor4.R': R_filter_C,
+                           'lc.resistor5.R': R_filter_C,
+                           'lc.resistor6.R': R_filter_C,
+                           'lc.inductor1.L': L_filter,
+                           'lc.inductor2.L': L_filter,
+                           'lc.inductor3.L': L_filter,
+                           'lc.capacitor1.C': C_filter,
+                           'lc.capacitor2.C': C_filter,
+                           'lc.capacitor3.C': C_filter,
+                           # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                           'r_load.resistor1.R': rand_load_train.random_load_step,
+                           'r_load.resistor2.R': rand_load_train.clipped_step,
+                           'r_load.resistor3.R': rand_load_train.clipped_step,
+                           'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           },
+             net=net,
+             model_path='omg_grid/grid.paper_loadstep.fmu',
+             on_episode_reset_callback=cb.fire,
+             is_normalized=True,
+             action_time_delay=1
+         )
+         )
+
+register(id='vctrl_single_inv_test-v0',
+         entry_point='openmodelica_microgrid_gym.env:ModelicaEnv',
+         kwargs=dict(  # reward_fun=rew.rew_fun,
+             viz_cols=[
+                 PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                          callback=xylables_v,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                          callback=xylables_i,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                          callback=xylables_R,
+                          color=[['b', 'r', 'g']],
+                          style=[[None]]
+                          )
+             ],
+             viz_mode='episode',
+             max_episode_steps=20000,
+             model_params={'lc.resistor1.R': R_filter,
+                           'lc.resistor2.R': R_filter,
+                           'lc.resistor3.R': R_filter,
+                           'lc.resistor4.R': 0.0000001,
+                           'lc.resistor5.R': 0.0000001,
+                           'lc.resistor6.R': 0.0000001,
+                           'lc.inductor1.L': L_filter,
+                           'lc.inductor2.L': L_filter,
+                           'lc.inductor3.L': L_filter,
+                           'lc.capacitor1.C': C_filter,
+                           'lc.capacitor2.C': C_filter,
+                           'lc.capacitor3.C': C_filter,
+                           # 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor1.R'),
+                           # 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor2.R'),
+                           # 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor3.R')
+                           },
+             net=net,
+             model_path='omg_grid/grid.paper_loadstep.fmu',
+             on_episode_reset_callback=cb.fire,
+             is_normalized=True,
+             action_time_delay=1
+         )
+         )
+
+register(id='vctrl_single_inv_test-v1',
+         entry_point='openmodelica_microgrid_gym.env:ModelicaEnv',
+         kwargs=dict(  # reward_fun=rew.rew_fun,
+             viz_cols=[
+                 PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                          callback=xylables_v,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                          callback=xylables_i,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                          callback=xylables_R,
+                          color=[['b', 'r', 'g']],
+                          style=[[None]]
+                          )
+             ],
+             viz_mode='episode',
+             max_episode_steps=100001,
+             model_params={'lc.resistor1.R': R_filter,
+                           'lc.resistor2.R': R_filter,
+                           'lc.resistor3.R': R_filter,
+                           'lc.resistor4.R': R_filter_C,
+                           'lc.resistor5.R': R_filter_C,
+                           'lc.resistor6.R': R_filter_C,
+                           'lc.inductor1.L': L_filter,
+                           'lc.inductor2.L': L_filter,
+                           'lc.inductor3.L': L_filter,
+                           'lc.capacitor1.C': C_filter,
+                           'lc.capacitor2.C': C_filter,
+                           'lc.capacitor3.C': C_filter,
+                           # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                           'r_load.resistor1.R': rand_load_train.random_load_step,
+                           # 'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode,
+                           'r_load.resistor2.R': rand_load_train.clipped_step,
+                           'r_load.resistor3.R': rand_load_train.clipped_step,
+                           'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           },
+             net=net,
+             model_path='omg_grid/grid.paper_loadstep.fmu',
+             on_episode_reset_callback=cb.fire,
+             is_normalized=True,
+             action_time_delay=1
+         )
+         )
diff --git a/experiments/P10/experiment_vctrl_single_inv_P10.py b/experiments/P10/experiment_vctrl_single_inv_P10.py
new file mode 100644
index 00000000..b4aad907
--- /dev/null
+++ b/experiments/P10/experiment_vctrl_single_inv_P10.py
@@ -0,0 +1,353 @@
+import platform
+import time
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+from experiments.P10.env.env_wrapper_P10 import FeatureWrapper, FeatureWrapper_pastVals, BaseWrapper
+from experiments.P10.env.rewards_P10 import Reward
+from experiments.P10.env.vctrl_single_inv_P10 import net  # , folder_name
+from experiments.P10.util.config import cfg
+from experiments.P10.util.recorder_P10 import Recorder
+
+# np.random.seed(0)
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+
+def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor,
+                        batch_size,
+                        actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                        alpha_relu_critic,
+                        noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                        training_episode_length, buffer_size,  # learning_starts,
+                        tau, number_learning_steps, integrator_weight, antiwindup_weight,
+                        penalty_I_weight, penalty_P_weight,
+                        train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail,
+                        number_past_vals=0):
+    if node not in cfg['lea_vpn_nodes']:
+        # assume we are on pc2
+        pc2_log_path = cfg['pc2_logpath']
+        log_path = f'{pc2_log_path}/{folder_name}/{n_trail}/'
+    else:
+        log_path = f'{folder_name}/{n_trail}/'
+
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    env = gym.make('experiments.P10.env:vctrl_single_inv_train-v0',
+                   reward_fun=rew.rew_fun_dq0,
+                   abort_reward=-1,
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                   # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i']
+                   )
+
+    if cfg['env_wrapper'] == 'past':
+        env = FeatureWrapper_pastVals(env, number_of_features=9 + number_past_vals * 3,
+                                      training_episode_length=training_episode_length,
+                                      recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                                      antiwindup_weight=antiwindup_weight, gamma=gamma,
+                                      penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                                      t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                                      number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+
+    elif cfg['env_wrapper'] == 'no-I-term':
+        env = BaseWrapper(env, number_of_features=6 + number_past_vals * 3,
+                          training_episode_length=training_episode_length,
+                          recorder=mongo_recorder, n_trail=n_trail, gamma=gamma,
+                          number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+    else:
+        env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                             recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                             antiwindup_weight=antiwindup_weight, gamma=gamma,
+                             penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                             t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                             number_learing_steps=number_learning_steps)  # , use_past_vals=True, number_past_vals=30)
+
+    # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+    if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']:
+        env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+    n_actions = env.action_space.shape[-1]
+    noise_var = noise_var  # 20#0.2
+    noise_theta = noise_theta  # 50 # stiffness of OU
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing,
+    #                                              sigma_min=noise_var * np.ones(n_actions) * noise_var_min,
+    #                                              mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+    #                                              sigma=noise_var * np.ones(n_actions), dt=net.ts)
+    print(optimizer)
+    if optimizer == 'SGD':
+        used_optimzer = th.optim.SGD
+    elif optimizer == 'RMSprop':
+        used_optimzer = th.optim.RMSprop
+    # elif optimizer == 'LBFGS':
+    # needs in step additional argument
+    #    used_optimzer = th.optim.LBFGS
+    else:
+        used_optimzer = th.optim.Adam
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                      , qf=[critic_hidden_size] * critic_number_layers),
+                         optimizer_class=used_optimzer)
+
+    model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=log_path,
+                 policy_kwargs=policy_kwargs,
+                 learning_rate=learning_rate, buffer_size=buffer_size,
+                 # learning_starts=int(learning_starts * training_episode_length),
+                 batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise,
+                 train_freq=(train_freq, train_freq_type), gradient_steps=- 1,
+                 optimize_memory_usage=False,
+                 create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
+    # adn scale weights and biases
+    count = 0
+    for kk in range(actor_number_layers + 1):
+
+        model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale
+        model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[
+                                                                     str(count)].weight.data * weight_scale
+
+        model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale
+        model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[
+                                                                   str(count)].bias.data * bias_scale
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    # todo: Downscale actionspace - lessulgy possible? Interaction pytorch...
+    if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']:
+        env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+    # start training
+    model.learn(total_timesteps=number_learning_steps)
+
+    # Log Train-info data
+    train_data = {#"Name": "After_Training",
+                  "Mean_eps_env_reward_raw": env.reward_episode_mean,
+                  "Mean_eps_reward_sum": env.reward_plus_addon_episode_mean,
+                  #"Trial number": n_trail,
+                  #"Database name": folder_name,
+                  #"Sum_eps_reward": env.get_episode_rewards()
+                  }
+    #mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data)
+    mongo_recorder.save_local_to_pkl('traing_rewards_Trial_number_' + n_trail, train_data, n_trail)
+
+    model.save(log_path + f'model.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+
+    env_test = gym.make('experiments.P10.env:vctrl_single_inv_test-v1',
+                        reward_fun=rew.rew_fun_dq0,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i']
+                        )
+
+    if cfg['env_wrapper'] == 'past':
+        env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + number_past_vals * 3,
+                                           integrator_weight=integrator_weight,
+                                           recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                           gamma=1, penalty_I_weight=0,
+                                           penalty_P_weight=0, number_past_vals=number_past_vals,
+                                           training_episode_length=training_episode_length, )
+
+    elif cfg['env_wrapper'] == 'no-I-term':
+        env_test = BaseWrapper(env_test, number_of_features=6 + number_past_vals * 3,
+                               training_episode_length=training_episode_length,
+                               recorder=mongo_recorder, n_trail=n_trail, gamma=gamma,
+                               number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+    else:
+        env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                                  recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                  gamma=1, penalty_I_weight=0,
+                                  penalty_P_weight=0,
+                                  training_episode_length=training_episode_length, )  # , use_past_vals=True, number_past_vals=30)
+    # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+
+    aP0 = []
+    aP1 = []
+    aP2 = []
+    aI0 = []
+    aI1 = []
+    aI2 = []
+    integrator_sum0 = []
+    integrator_sum1 = []
+    integrator_sum2 = []
+    va = []
+    vb = []
+    vc = []
+    v_ref0 = []
+    v_ref1 = []
+    v_ref2 = []
+    ia = []
+    ib = []
+    ic = []
+    R_load = []
+
+    for step in range(env_test.max_episode_steps):
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        if cfg['loglevel'] in ['train', 'test']:
+            phase_list.append(env_test.env.net.components[0].phase)
+            aP0.append(np.float64(action[0]))
+            aP1.append(np.float64(action[1]))
+            aP2.append(np.float64(action[2]))
+            if cfg['env_wrapper'] not in ['no-I-term']:
+                aI0.append(np.float64(action[3]))
+                aI1.append(np.float64(action[4]))
+                aI2.append(np.float64(action[5]))
+                integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+                integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+                integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+        if rewards == -1 and not limit_exceeded_in_test:# and env_test.rew[-1]:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+
+        if limit_exceeded_in_test:
+            # if limit was exceeded once, reward will be kept to -1 till the end of the episode,
+            # nevertheless what the agent does
+            rewards = -1
+
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+
+        if step % 1000 == 0 and step != 0:
+            # if step % cfg['train_episode_length'] == 0 and step != 0:
+            if cfg['loglevel'] in ['train', 'test']:
+                va.extend(env_test.history[env_test.viz_col_tmpls[0].vars[0]].copy().values.tolist())
+                vb.extend(env_test.history[env_test.viz_col_tmpls[0].vars[1]].copy().values.tolist())
+                vc.extend(env_test.history[env_test.viz_col_tmpls[0].vars[2]].copy().values.tolist())
+                v_ref0.extend(env_test.history[env_test.viz_col_tmpls[0].vars[3]].copy().values.tolist())
+                v_ref1.extend(env_test.history[env_test.viz_col_tmpls[0].vars[4]].copy().values.tolist())
+                v_ref2.extend(env_test.history[env_test.viz_col_tmpls[0].vars[5]].copy().values.tolist())
+                ia.extend(env_test.history[env_test.viz_col_tmpls[1].vars[0]].copy().values.tolist())
+                ib.extend(env_test.history[env_test.viz_col_tmpls[1].vars[1]].copy().values.tolist())
+                ic.extend(env_test.history[env_test.viz_col_tmpls[1].vars[2]].copy().values.tolist())
+                R_load.extend(env_test.history[env_test.viz_col_tmpls[2].vars[1]].copy().values.tolist())
+
+            env_test.close()
+            obs = env_test.reset()
+            if cfg['loglevel'] in ['train', 'test']:
+                phase_list.append(env_test.env.net.components[0].phase)
+
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    """
+    test_after_training_config = {"Name": "Test_Reward",
+                                  "time": ts,
+                                  #"Reward": rew_list,
+                                  "Return": (return_sum / env_test.max_episode_steps),
+                                  "Trial number": n_trail,
+                                  "Database name": folder_name,
+                                  "Node": platform.uname().node,
+                                  "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime())}
+    """
+
+    reward_test_after_training = {"Reward": rew_list}
+
+    if cfg['loglevel'] in ['train', 'test', 'setting']:
+        #mongo_recorder.save_to_json('Test_setting_Trial_number_' + n_trail, test_after_training_config, n_trail)
+        mongo_recorder.save_local_to_pkl('Test_reward_Trial_number_' + n_trail, reward_test_after_training, n_trail)
+
+    if cfg['loglevel'] in ['train', 'test']:
+        test_after_training = {"Name": "Test",
+                               "time": ts,
+                               "Reward": rew_list,
+                               "lc_capacitor1_v": va,
+                               "lc_capacitor2_v": vb,
+                               "lc_capacitor3_v": vc,
+                               "inverter1_v_ref_0": v_ref0,
+                               "inverter1_v_ref_1": v_ref1,
+                               "inverter1_v_ref_2": v_ref2,
+                               "lc_inductor1_i": ia,
+                               "lc_inductor2_i": ib,
+                               "lc_inductor3_i": ic,
+                               "r_load_resistor1_R": R_load,
+                               "ActionP0": aP0,
+                               "ActionP1": aP1,
+                               "ActionP2": aP2,
+                               "ActionI0": aI0,
+                               "ActionI1": aI1,
+                               "ActionI2": aI2,
+                               "integrator_sum0": integrator_sum0,
+                               "integrator_sum1": integrator_sum1,
+                               "integrator_sum2": integrator_sum2,
+                               "Phase": phase_list,
+                               "Node": platform.uname().node,
+                               "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                               "Reward function": 'rew.rew_fun_dq0',
+                               "Trial number": n_trail,
+                               "Database name": folder_name,
+                               "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase),"
+                                       "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; "
+                                       "Reward = MRE, PI-Approch using AntiWindUp"
+                                       "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+        """
+        In new testenv not used, because then only the last episode is stored
+        """
+        # Add v-&i-measurements
+        test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+            env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                    })
+        test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+            env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                    })
+
+        mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training, n_trail)
+
+    return (return_sum / env_test.max_episode_steps)
diff --git a/experiments/P10/hp_tune_ddpg_objective_P10.py b/experiments/P10/hp_tune_ddpg_objective_P10.py
new file mode 100644
index 00000000..25c920b4
--- /dev/null
+++ b/experiments/P10/hp_tune_ddpg_objective_P10.py
@@ -0,0 +1,444 @@
+import json
+import os
+import time
+
+import sqlalchemy
+from optuna.samplers import TPESampler
+
+os.environ['PGOPTIONS'] = '-c statement_timeout=1000'
+
+import optuna
+import platform
+import argparse
+import sshtunnel
+import numpy as np
+# np.random.seed(0)
+from experiments.P10.util.config import cfg
+
+from experiments.P10.experiment_vctrl_single_inv_P10 import mongo_recorder, experiment_fit_DDPG
+from experiments.hp_tune.util.scheduler import linear_schedule
+
+model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/'
+
+PC2_LOCAL_PORT2PSQL = 11999
+SERVER_LOCAL_PORT2PSQL = 6432
+DB_NAME = 'optuna'
+PC2_LOCAL_PORT2MYSQL = 11998
+SERVER_LOCAL_PORT2MYSQL = 3306
+STUDY_NAME = cfg['STUDY_NAME']  # 'DDPG_MRE_sqlite_PC2'
+
+node = platform.uname().node
+
+
+def ddpg_objective_fix_params(trial):
+    file_congfig = open(model_path +
+                        'PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json', )
+    trial_config = json.load(file_congfig)
+
+    number_learning_steps = 500000  # trial.suggest_int("number_learning_steps", 100000, 1000000)
+    # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5)
+    # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5)
+    penalty_I_weight = trial_config["penalty_I_weight"]  # trial.suggest_float("penalty_I_weight", 100e-6, 2)
+    penalty_P_weight = trial_config["penalty_P_weight"]  # trial.suggest_float("penalty_P_weight", 100e-6, 2)
+
+    penalty_I_decay_start = trial_config[
+        "penalty_I_decay_start"]  # trial.suggest_float("penalty_I_decay_start", 0.00001, 1)
+    penalty_P_decay_start = trial_config[
+        "penalty_P_decay_start"]  # trial.suggest_float("penalty_P_decay_start", 0.00001, 1)
+
+    t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps)
+    t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps)
+
+    integrator_weight = trial_config["integrator_weight"]  # trial.suggest_float("integrator_weight", 1 / 200, 2)
+    # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0)
+    # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3)
+    antiwindup_weight = trial_config["antiwindup_weight"]  # trial.suggest_float("antiwindup_weight", 0.00001, 1)
+
+    learning_rate = trial_config["learning_rate"]  # trial.suggest_loguniform("learning_rate", 1e-6, 1e-1)  # 0.0002#
+
+    lr_decay_start = trial_config[
+        "lr_decay_start"]  # trial.suggest_float("lr_decay_start", 0.00001, 1)  # 3000  # 0.2 * number_learning_steps?
+    lr_decay_duration = trial_config["lr_decay_duration"]  # trial.suggest_float("lr_decay_duration", 0.00001,
+    #  1)  # 3000  # 0.2 * number_learning_steps?
+    t_start = int(lr_decay_start * number_learning_steps)
+    t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                           number_learning_steps))
+    final_lr = trial_config["final_lr"]  # trial.suggest_float("final_lr", 0.00001, 1)
+
+    gamma = trial_config["gamma"]  # trial.suggest_float("gamma", 0.5, 0.9999)
+    weight_scale = trial_config["weight_scale"]  # trial.suggest_loguniform("weight_scale", 5e-5, 0.2)  # 0.005
+
+    bias_scale = trial_config["bias_scale"]  # trial.suggest_loguniform("bias_scale", 5e-4, 0.1)  # 0.005
+    alpha_relu_actor = trial_config[
+        "alpha_relu_actor"]  # trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5)  # 0.005
+    alpha_relu_critic = trial_config[
+        "alpha_relu_critic"]  # trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5)  # 0.005
+
+    batch_size = trial_config["batch_size"]  # trial.suggest_int("batch_size", 16, 1024)  # 128
+    buffer_size = trial_config[
+        "buffer_size"]  # trial.suggest_int("buffer_size", int(1e4), number_learning_steps)  # 128
+
+    actor_hidden_size = trial_config[
+        "actor_hidden_size"]  # trial.suggest_int("actor_hidden_size", 10, 200)  # 100  # Using LeakyReLU
+    actor_number_layers = trial_config["actor_number_layers"]  # trial.suggest_int("actor_number_layers", 1, 4)
+
+    critic_hidden_size = trial_config["critic_hidden_size"]  # trial.suggest_int("critic_hidden_size", 10, 300)  # 100
+    critic_number_layers = trial_config["critic_number_layers"]  # trial.suggest_int("critic_number_layers", 1, 4)
+
+    n_trail = str(trial.number)
+    use_gamma_in_rew = 1
+    noise_var = trial_config["noise_var"]  # trial.suggest_loguniform("noise_var", 0.01, 1)  # 2
+    # min var, action noise is reduced to (depends on noise_var)
+    noise_var_min = 0.0013  # trial.suggest_loguniform("noise_var_min", 0.0000001, 2)
+    # min var, action noise is reduced to (depends on training_episode_length)
+    noise_steps_annealing = int(
+        0.25 * number_learning_steps)  # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+    # number_learning_steps)
+    noise_theta = trial_config["noise_theta"]  # trial.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+    error_exponent = 0.5  # trial.suggest_loguniform("error_exponent", 0.001, 4)
+
+    training_episode_length = trial_config[
+        "training_episode_length"]  # trial.suggest_int("training_episode_length", 500, 5000)  # 128
+    # learning_starts = 0.32  # trial.suggest_loguniform("learning_starts", 0.1, 2)  # 128
+    tau = trial_config["tau"]  # trial.suggest_loguniform("tau", 0.0001, 0.3)  # 2
+
+    train_freq_type = "step"  # trial.suggest_categorical("train_freq_type", ["episode", "step"])
+    train_freq = trial_config["train_freq"]  # trial.suggest_int("train_freq", 1, 15000)
+
+    optimizer = trial_config[
+        "optimizer"]  # trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])  # , "LBFGS"])
+
+    number_past_vals = 5  # trial.suggest_int("number_past_vals", 0, 15)
+
+    learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr,
+                                    t_start=t_start,
+                                    t_end=t_end,
+                                    total_timesteps=number_learning_steps)
+
+    trail_config_mongo = {"Name": "Config",
+                          "Node": node,
+                          "Agent": "DDPG",
+                          "Number_learning_Steps": number_learning_steps,
+                          "Trial number": n_trail,
+                          "Database name": cfg['STUDY_NAME'],
+                          "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                          "Info": "AltesTestcase setting mit Integrator-Actor; 50 runs mit bestem HP-setting",
+                          }
+    trail_config_mongo.update(trial.params)
+    # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo)
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo)
+
+    loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               alpha_relu_actor,
+                               batch_size,
+                               actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                               alpha_relu_critic,
+                               noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                               training_episode_length, buffer_size,  # learning_starts,
+                               tau, number_learning_steps, integrator_weight,
+                               integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight,
+                               train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer,
+                               n_trail, number_past_vals)
+
+    return loss
+
+
+def ddpg_objective(trial):
+    number_learning_steps = 500000  # trial.suggest_int("number_learning_steps", 100000, 1000000)
+    actor_hidden_size = trial.suggest_int("actor_hidden_size", 10, 100)  # Using LeakyReLU
+    actor_number_layers = trial.suggest_int("actor_number_layers", 1, 4)
+    alpha_relu_actor = trial.suggest_loguniform("alpha_relu_actor", 0.001, 0.5)
+    alpha_relu_critic = trial.suggest_loguniform("alpha_relu_critic", 0.001, 0.5)
+    antiwindup_weight = trial.suggest_float("antiwindup_weight", 1e-4, 1)
+    batch_size = trial.suggest_int("batch_size", 16, 1024)
+    bias_scale = trial.suggest_loguniform("bias_scale", 5e-5, 0.2)
+    buffer_size = trial.suggest_int("buffer_size", int(20e4), number_learning_steps)  # 128
+    critic_hidden_size = trial.suggest_int("critic_hidden_size", 10, 300)
+    critic_number_layers = trial.suggest_int("critic_number_layers", 1, 4)
+    error_exponent = 0.5  # 0.5  # trial.suggest_loguniform("error_exponent", 0.001, 4)
+    final_lr = trial.suggest_float("final_lr", 0.00001, 1)
+    gamma = trial.suggest_float("gamma", 0.6, 0.99999)
+    integrator_weight = trial.suggest_float("integrator_weight", 1e-4, 0.5)
+    learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1e-2)
+    lr_decay_start = trial.suggest_float("lr_decay_start", 0.00001, 1)
+    lr_decay_duration = trial.suggest_float("lr_decay_duration", 0.00001, 1)
+    n_trail = str(trial.number)
+    noise_steps_annealing = int(
+        0.25 * number_learning_steps)  # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+    # number_learning_steps)
+    noise_theta = trial.suggest_loguniform("noise_theta", 1, 100)  # 25  # stiffness of OU
+    noise_var = trial.suggest_loguniform("noise_var", 0.001, 1)  # 2
+    noise_var_min = 0.0013  # trial.suggest_loguniform("noise_var_min", 0.0000001, 2)
+    number_past_vals = trial.suggest_int("number_past_vals", 0, 20)
+    optimizer = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])  # , "LBFGS"])
+    penalty_I_weight = trial.suggest_float("penalty_I_weight", 100e-6, 2)
+    penalty_P_weight = trial.suggest_float("penalty_P_weight", 100e-6, 2)
+
+    penalty_I_decay_start = trial.suggest_float("penalty_I_decay_start", 0.00001, 1)
+    penalty_P_decay_start = trial.suggest_float("penalty_P_decay_start", 0.00001, 1)
+
+    t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps)
+    t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps)
+    t_start = int(lr_decay_start * number_learning_steps)
+    t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                           number_learning_steps))
+    tau = trial.suggest_loguniform("tau", 0.0001, 0.3)  # 2
+    train_freq_type = "step"  # trial.suggest_categorical("train_freq_type", ["episode", "step"])
+    training_episode_length = trial.suggest_int("training_episode_length", 1000, 4000)  # 128
+    train_freq = trial.suggest_int("train_freq", 1, 5000)
+    use_gamma_in_rew = 1
+    weight_scale = trial.suggest_loguniform("weight_scale", 5e-5, 0.2)
+
+    learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr,
+                                    t_start=t_start,
+                                    t_end=t_end,
+                                    total_timesteps=number_learning_steps)
+    """
+    trail_config_mongo = {"Name": "Config",
+                          "Node": node,
+                          "Agent": "DDPG",
+                          "Number_learning_Steps": number_learning_steps,
+                          "Trial number": n_trail,
+                          "Database name": cfg['STUDY_NAME'],
+                          "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                          "Optimierer/ Setting stuff": "DDPG HPO mit Integrator und pastVals (HP) "
+                                                       "P10 setting thrid try"
+                                                       "Reward in test is kept to -1 if limit exceeded once"
+                          }
+
+    trail_config_mongo.update(trial.params)
+    mongo_recorder.save_to_json('Config_Trial_number_' + n_trail, trail_config_mongo, n_trail)
+    #mongo_recorder.save_local_to_pkl('Trial_number_' + n_trail, trail_config_mongo)
+    """
+
+    loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               alpha_relu_actor,
+                               batch_size,
+                               actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                               alpha_relu_critic,
+                               noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                               training_episode_length, buffer_size,  # learning_starts,
+                               tau, number_learning_steps, integrator_weight,
+                               integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight,
+                               train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer,
+                               n_trail, number_past_vals)
+
+    return loss
+
+
+def get_storage(url, storage_kws):
+    successfull = False
+    retry_counter = 0
+
+    while not successfull:
+        try:
+            storage = optuna.storages.RDBStorage(
+                url=url, **storage_kws)
+            successfull = True
+        except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.DatabaseError) as e:
+            wait_time = np.random.randint(60, 300)
+            retry_counter += 1
+            if retry_counter > 10:
+                print('Stopped after 10 connection attempts!')
+                raise e
+            print(f'Could not connect, retry in {wait_time} s')
+            time.sleep(wait_time)
+
+    return storage
+
+
+def optuna_optimize_mysql_lea35(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=1, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+    if node in ('LEA-WORK35', 'fe1'):
+        if node == 'fe1':
+            port = PC2_LOCAL_PORT2MYSQL
+        else:
+            port = SERVER_LOCAL_PORT2MYSQL
+
+        storage = get_storage(f'mysql://{optuna_creds}@localhost:{port}/{DB_NAME}')
+
+        study = optuna.create_study(
+            storage=storage,
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=sampler, study_name=study_name,
+            load_if_exists=True,
+            direction='maximize')
+        study.optimize(objective, n_trials=n_trials)
+    else:
+        if node in cfg['lea_vpn_nodes']:
+            # we are in LEA VPN
+            server_name = 'lea38'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               SERVER_LOCAL_PORT2MYSQL)}
+        else:
+            # assume we are on a PC2 compute node
+            server_name = 'fe.pc2.uni-paderborn.de'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               PC2_LOCAL_PORT2MYSQL),
+                       'ssh_username': 'webbah'}
+        with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun:
+
+            study = optuna.create_study(
+                storage=f"mysql+pymysql://{optuna_creds}@127.0.0.1:{tun.local_bind_port}/{DB_NAME}",
+                sampler=sampler, study_name=study_name,
+                load_if_exists=True,
+                direction='maximize')
+            study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize_mysql(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=1, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+    study = optuna.create_study(study_name=study_name,
+                                direction='maximize',
+                                storage=f"mysql://{optuna_creds}@localhost/{DB_NAME}",
+                                load_if_exists=True,
+                                sampler=sampler
+                                )
+    study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize_sqlite(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=50, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 100
+
+    print(n_trials)
+    print('Local optimization is run but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in cfg['lea_vpn_nodes']:
+        optuna_path = './optuna/'
+    else:
+        # assume we are on not of pc2 -> store to project folder
+        optuna_path = '/scratch/hpc-prf-reinfl/weber/OMG/optuna/'
+
+    os.makedirs(optuna_path, exist_ok=True)
+
+    study = optuna.create_study(study_name=study_name,
+                                direction='maximize',
+                                storage=f'sqlite:///{optuna_path}optuna.sqlite',
+                                load_if_exists=True,
+                                sampler=sampler
+                                )
+    study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=50, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_psql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_psql'
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+    # set trial to failed if it seems dead for 20 minutes
+    storage_kws = dict(engine_kwargs={"pool_timeout": 600})
+    if node in ('lea-cyberdyne', 'fe1'):
+        if node == 'fe1':
+            port = PC2_LOCAL_PORT2PSQL
+        else:
+            port = SERVER_LOCAL_PORT2PSQL
+
+        storage = get_storage(f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', storage_kws=storage_kws)
+
+        study = optuna.create_study(
+            storage=storage,
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=sampler, study_name=study_name,
+            load_if_exists=True,
+            direction='maximize')
+        study.optimize(objective, n_trials=n_trials)
+    else:
+        if node in cfg['lea_vpn_nodes']:
+            # we are in LEA VPN
+            server_name = 'lea38'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               SERVER_LOCAL_PORT2PSQL)}
+        else:
+            # assume we are on a PC2 compute node
+            server_name = 'fe.pc2.uni-paderborn.de'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               PC2_LOCAL_PORT2PSQL),
+                       'ssh_username': 'webbah'}
+        with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun:
+
+            storage = get_storage(url=f'postgresql://{optuna_creds}'
+                                      f'@localhost:{tun.local_bind_port}/{DB_NAME}', storage_kws=storage_kws)
+
+            # storage = optuna.storages.RDBStorage(
+            #    url=f'postgresql://{optuna_creds}'
+            #        f'@localhost:{tun.local_bind_port}/{DB_NAME}',
+            #    **storage_kws)
+
+            study = optuna.create_study(
+                storage=storage,
+                # storage=f'postgresql://{optuna_creds}'
+                #        f'@localhost:{tun.local_bind_port}/{DB_NAME}',
+                sampler=sampler, study_name=study_name,
+                load_if_exists=True,
+                direction='maximize')
+            study.optimize(objective, n_trials=n_trials)
+
+
+if __name__ == "__main__":
+    TPE_sampler = TPESampler(n_startup_trials=1000)  # , constant_liar=True)
+
+    optuna_optimize_mysql_lea35(ddpg_objective, study_name=STUDY_NAME, sampler=TPE_sampler)
+    # optuna_optimize_sqlite(ddpg_objective, study_name=STUDY_NAME, sampler=TPE_sampler)
diff --git a/experiments/P10/pc2_schedule_ddpg_P10.py b/experiments/P10/pc2_schedule_ddpg_P10.py
new file mode 100644
index 00000000..355d708c
--- /dev/null
+++ b/experiments/P10/pc2_schedule_ddpg_P10.py
@@ -0,0 +1,105 @@
+"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass
+the allowed cpu core limit"""
+
+import os
+import pathlib
+import uuid
+import time
+
+import optuna
+from optuna.samplers import TPESampler
+
+from experiments.hp_tune.util import pc2
+from experiments.P10.util.config import cfg
+
+# config
+USER = os.getenv('USER')
+ALLOWED_MAX_CPU_CORES = 300  # 512
+STUDY_NAME = cfg['STUDY_NAME']
+DB_NAME = 'optuna'
+# resources request
+job_resource_plan = {
+    'duration': 24,  # in hours
+    'ncpus': 2,
+    'memory': 12,
+    'vmemory': 16,
+}
+
+MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus']
+
+PC2_LOCAL_PORT2MYSQL = 11998
+SERVER_LOCAL_PORT2MYSQL = 3306
+
+
+def main():
+    started_workers = 0
+    print('Start slavedriving loop..')
+    old_ccsinfo_counts = None
+    while True:
+
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+        with open(creds_path, 'r') as f:
+            optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+        study = optuna.create_study(
+            storage=f'mysql+pymysql://{optuna_creds}@localhost:{PC2_LOCAL_PORT2MYSQL}/{DB_NAME}',
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=TPESampler(n_startup_trials=2500), study_name=STUDY_NAME,
+            load_if_exists=True,
+            direction='maximize')
+
+        complete_trials = len([t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE])
+        print(f'Completed trials in study: {complete_trials}')
+        if complete_trials > 12000:
+            print('Maximal completed trials reached - STOPPING')
+            break
+
+        job_files_path = pathlib.Path(
+            f"/scratch/hpc-prf-reinfl/weber/P10/ccs_job_files/{STUDY_NAME}")  # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet
+        job_files_path.mkdir(parents=False, exist_ok=True)
+
+        # read ccsinfo
+        ccsinfo = pc2.get_ccsinfo(USER)
+        ccsinfo_state_counts = ccsinfo.state.value_counts()
+        ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0)
+        ccs_planned = ccsinfo_state_counts.get('PLANNED', 0)
+        total_busy = ccs_running + ccs_planned
+        if not ccsinfo_state_counts.equals(old_ccsinfo_counts):
+            print("\n## ccs summary ##")
+            print(f"Running: {ccs_running}")
+            print(f"Planned : {ccs_planned}")
+            print(f"Total busy workers (ccs): {total_busy}")
+
+        if total_busy < MAX_WORKERS:
+            #  call workers to work
+            n_workers = MAX_WORKERS - total_busy
+            print(f'Start {n_workers} workers:')
+            for w in range(n_workers):
+                started_workers += 1
+                jobid = str(uuid.uuid4()).split('-')[0]
+                cluster = "oculus"
+                job_name = job_files_path / f"pc2_job_{jobid}.sh"
+                res_plan = pc2.calculate_resources(**job_resource_plan)
+
+                execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \
+                                 "python $HOME/openmodelica-microgrid-gym/experiments/P10/hp_tune_ddpg_objective_P10.py -n 1"
+
+                print(f'Start job {jobid} ..')
+                pc2.create_n_run_script(
+                    job_name,
+                    pc2.build_shell_script_lines(job_files_path, cluster,
+                                                 job_name, res_plan,
+                                                 execution_line),
+                    dry=False)
+                print('sleep 10s for better DB interaction', end='\r')
+                time.sleep(10)
+
+        old_ccsinfo_counts = ccsinfo_state_counts
+
+        print('sleep..', end='\r')
+        time.sleep(300)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/experiments/P10/retrain_agent.py b/experiments/P10/retrain_agent.py
new file mode 100644
index 00000000..878e93b7
--- /dev/null
+++ b/experiments/P10/retrain_agent.py
@@ -0,0 +1,212 @@
+print('Start script')
+import gym
+import logging
+import os
+import platform
+import time
+from functools import partial
+
+import GPy
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.P10.env.env_wrapper_P10 import FeatureWrapper, FeatureWrapper_pastVals, BaseWrapper
+from experiments.P10.env.rewards_P10 import Reward
+from experiments.P10.env.vctrl_single_inv_P10 import net  # , folder_name
+from experiments.P10.util.config import cfg
+from experiments.P10.util.recorder_P10 import Recorder
+import pandas as pd
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+show_plots = True
+save_results = False
+# 2128 ->0; 3125 -> -1, 956-> best
+trial = '956'
+
+number_learning_steps = 200000
+
+folder_name = 'experiments/P10/retrain/'
+os.makedirs(folder_name, exist_ok=True)
+model_path = 'experiments/P10/viz/data/'
+node = platform.uname().node
+model_name = 'model_'+trial+'.zip'
+
+################DDPG Config Stuff#########################################################################
+print('Using model_'+trial+' setting')
+if trial == '956':
+    actor_number_layers = 2
+    alpha_relu_actor = 0.0225049
+    alpha_relu_critic = 0.00861825
+    antiwindup_weight = 0.350646
+    critic_number_layers = 4
+    error_exponent = 0.5
+    gamma = 0.794337
+    integrator_weight = 0.214138
+    use_gamma_in_rew = 1
+    n_trail = 50001
+    number_past_vals = 18
+    training_episode_length = 2577
+    penalty_I_weight = 1.46321
+    penalty_P_weight = 0.662572
+    t_start_penalty_I = 100000
+    t_start_penalty_P = 100000
+
+
+
+mongo_recorder = Recorder(node=node, database_name=folder_name)
+
+current_directory = os.getcwd()
+
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+             use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent,
+             i_lim=net['inverter1'].i_lim,
+             i_nom=net['inverter1'].i_nom, det_run=True)
+
+####################################DDPG Stuff##############################################
+
+rew.gamma = 0
+# episodes will not abort, if limit is exceeded reward = -1
+rew.det_run = True
+rew.exponent = 0.5  # 1
+
+env = gym.make('experiments.P10.env:vctrl_single_inv_train-v0',
+               reward_fun=rew.rew_fun_dq0,
+               abort_reward=-1,
+               obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                           'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                           'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+               )
+
+env = FeatureWrapper_pastVals(env, number_of_features=9 + number_past_vals * 3,
+                              training_episode_length=training_episode_length,
+                              recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                              antiwindup_weight=antiwindup_weight, gamma=gamma,
+                              penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                              t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                              number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+# model2 = DDPG.load(model_path + f'model.zip')  # , env=env_test)
+print('Before load')
+
+model = DDPG.load(model_path + f'{model_name}', env=env)
+
+print('After load')
+
+count = 0
+for kk in range(actor_number_layers + 1):
+
+    if kk < actor_number_layers:
+        model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+        model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+    count = count + 2
+
+count = 0
+
+for kk in range(critic_number_layers + 1):
+
+    if kk < critic_number_layers:
+        model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+        model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+    count = count + 2
+
+env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+###################################################################################
+# retrain!
+print('Train ' +str(number_learning_steps) + ' steps')
+model.learn(total_timesteps=number_learning_steps)
+print('Finished ' +str(number_learning_steps) + 'training steps')
+model.save(folder_name + f'model' + trial + '_retrained.zip')
+
+# Log Train-info data
+train_data = {  # "Name": "After_Training",
+    "Mean_eps_env_reward_raw": env.reward_episode_mean,
+    "Mean_eps_reward_sum": env.reward_plus_addon_episode_mean,
+}
+df = pd.DataFrame(train_data)
+df.to_pickle(f'{folder_name}/' + 'trainRewards_model' + trial + '_retrained' + ".pkl.bz2")
+
+"""
+####### Run Test #########
+return_sum = 0.0
+rew.gamma = 0
+# episodes will not abort, if limit is exceeded reward = -1
+rew.det_run = True
+rew.exponent = 0.5  # 1
+limit_exceeded_in_test = False
+
+env_test = gym.make('experiments.P10.env:vctrl_single_inv_test-v1',
+                    reward_fun=rew.rew_fun_dq0,
+                    abort_reward=-1,  # no needed if in rew no None is given back
+                    # on_episode_reset_callback=cb.fire  # needed?
+                    obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                    )
+
+env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + number_past_vals * 3,
+                                   integrator_weight=integrator_weight,
+                                   recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                   gamma=1, penalty_I_weight=0,
+                                   penalty_P_weight=0, number_past_vals=number_past_vals,
+                                   training_episode_length=training_episode_length, )
+
+obs = env_test.reset()
+rew_list = []
+
+for step in range(env_test.max_episode_steps):
+
+    action, _states = model.predict(obs, deterministic=True)
+    obs, rewards, done, info = env_test.step(action)
+
+    if rewards == -1 and not limit_exceeded_in_test:  # and env_test.rew[-1]:
+        # Set addidional penalty of -1 if limit is exceeded once in the test case
+        limit_exceeded_in_test = True
+
+    if limit_exceeded_in_test:
+        # if limit was exceeded once, reward will be kept to -1 till the end of the episode,
+        # nevertheless what the agent does
+        rewards = -1
+
+    env_test.render()
+    return_sum += rewards
+    rew_list.append(rewards)
+    # print(rewards)
+
+    if step % 1000 == 0 and step != 0:
+        env_test.close()
+        obs = env_test.reset()
+
+    if done:
+        env_test.close()
+        break
+
+ts = time.gmtime()
+
+reward_test_after_training = {"Reward": rew_list}
+
+df = pd.DataFrame(reward_test_after_training)
+df.to_pickle(f'{folder_name}/' + 'model' + trial + '_retrained' + ".pkl.bz2")
+
+print((return_sum / env_test.max_episode_steps))
+"""
diff --git a/experiments/P10/util/config.py b/experiments/P10/util/config.py
new file mode 100644
index 00000000..db49a173
--- /dev/null
+++ b/experiments/P10/util/config.py
@@ -0,0 +1,18 @@
+cfg = dict(lea_vpn_nodes=['lea-skynet', 'lea-picard', 'lea-barclay',
+                          'lea-cyberdyne', 'webbah-ThinkPad-L380', 'LEA_WORK35', 'webbah-ThinkPad-T14-Gen-2a'],
+           STUDY_NAME='P10_SEC_R_load',
+           meas_data_folder='experiment_data/',
+           MONGODB_PORT=12001,
+           loglevel='setting',  # setting ~ config + return/learning curve (most is stored anyway, only effects in
+           #           test saving stuff
+           # test ~ setting + test-results (measurements)
+           # train ~ test + training measurements
+           is_dq0=True,
+
+           # train_episode_length=2881,  # defines when in training the env is reset e.g. for exploring starts,
+
+           # nothing -> Standard FeatureWrapper; past -> FeatureWrapper_pastVals; future -> FeatureWrapper_futureVals
+           # I-controller -> DDPG as P-term + standard I-controller; no-I-term -> Pure DDPG without integrator
+           env_wrapper='past',
+           pc2_logpath='/scratch/hpc-prf-reinfl/weber/P10'
+           )
diff --git a/experiments/P10/util/recorder_P10.py b/experiments/P10/util/recorder_P10.py
new file mode 100644
index 00000000..550e79b3
--- /dev/null
+++ b/experiments/P10/util/recorder_P10.py
@@ -0,0 +1,78 @@
+import json
+from os import makedirs
+import pandas as pd
+
+import sshtunnel
+from pymongo import MongoClient
+
+from experiments.P10.util.config import cfg
+
+MONGODB_PORT = cfg['MONGODB_PORT']  # 12001
+
+
+class Recorder:
+
+    def __init__(self, node, database_name):
+        """
+        Class to record measured data to mongo database using pymongo
+        Depending on the node we are operating at it connects via ssh to
+         - in lea_vpn: to cyberdyne port 12001
+         - else: assume pc2 node -> connect to frontend
+         and stores data to mongoDB at port MONGODB_PORT ( =12001).
+         HINT: From pc2 frontend permanent tunnel from cyberdyne port 12001 to frontend 12001
+         is needed (assuming Mongod-Process running on cyberdyne
+         :params node: platform.uname().node
+         :params database_name: string for the database name to store data in
+        """
+        self.node = node
+        self.save_count = 0
+
+        if self.node in cfg['lea_vpn_nodes']:
+            self.server_name = 'lea38'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT)}
+            self.save_folder = cfg['meas_data_folder']
+        else:
+            # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001
+            # from there they can be grep via permanent tunnel from cyberdyne
+            self.server_name = 'fe.pc2.uni-paderborn.de'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT),
+                            'ssh_username': 'webbah'}
+
+            #self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder']
+            self.save_folder = cfg['pc2_logpath'] + '/' + cfg['meas_data_folder']
+
+        self.database_name = database_name
+        makedirs(self.save_folder, exist_ok=True)
+        # pathlib.Path(self.save_folder.mkdir(exist_ok=True))
+
+    def save_to_mongodb(self, col: str = ' trails', data=None):
+        """
+        Stores data to database in document col
+        """
+        with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun:
+            with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+                db = client[self.database_name]
+                trial_coll = db[col]  # get collection named col
+                trial_coll.insert_one(data)
+
+    def save_to_json(self, col: str = ' trails', data=None, n_trail=999999):
+        """
+        Stores data to json file in specified directory. From there the data can be grept by another process
+        and can be stored to a DB via ssh
+        To distinguish the files of one trail a save_count is incremented and added to the filename
+        """
+
+        with open(self.save_folder + str(n_trail) + '_' + self.database_name + '_' + col + '_' + str(self.save_count) + '.json',
+                  'w') as outfile:
+            json.dump(data, outfile)
+
+        self.save_count += 1
+
+    def save_local_to_pkl(self, col: str = ' trails', data=None, n_trail=999999):
+        """
+        Stores data locally to comp. pkl
+        """
+        df = pd.DataFrame(data)
+        df.to_pickle(self.save_folder + str(n_trail) + '_' + self.database_name + '_' + col + '_' + str(self.save_count) + ".pkl.bz2")
diff --git a/experiments/P10/util/reporter_P10.py b/experiments/P10/util/reporter_P10.py
new file mode 100644
index 00000000..ebd59f81
--- /dev/null
+++ b/experiments/P10/util/reporter_P10.py
@@ -0,0 +1,149 @@
+import json
+import os
+import platform
+import re
+import time
+
+import numpy as np
+
+import sshtunnel
+from pymongo import MongoClient
+from experiments.P10.util.config import cfg
+
+
+class Reporter:
+
+    def __init__(self):
+        """
+        Greps json data which is stored in the cfg[meas_data_folder] and sends it to mongoDB
+        on cyberdyne (lea38) via sshtunnel on port MONGODB_PORT
+        """
+
+        MONGODB_PORT = cfg['MONGODB_PORT']
+
+        node = platform.uname().node
+
+        if node in cfg['lea_vpn_nodes']:
+            self.server_name = 'lea38'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT)}
+            self.save_folder = './' + cfg['meas_data_folder']
+        else:
+            # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001
+            # from there they can be grep via permanent tunnel from cyberdyne
+            self.server_name = 'fe.pc2.uni-paderborn.de'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT),
+                            'ssh_username': 'webbah'}
+
+            self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder']
+
+    def save_to_mongodb(self, database_name: str, col: str = ' trails', data=None):
+        """
+        Stores data to database in document col
+        """
+        with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun:
+            with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+                db = client[database_name]
+                trial_coll = db[col]  # get collection named col
+                trial_coll.insert_one(data)
+
+    def oldest_file_in_tree(self, extension=".json"):
+        """
+        Returns the oldest file-path string
+        """
+        print(os.getcwd())
+        return min(
+            (os.path.join(dirname, filename)
+             for dirname, dirnames, filenames in os.walk(self.save_folder)
+             for filename in filenames
+             if filename.endswith(extension)),
+            key=lambda fn: os.stat(fn).st_mtime)
+
+    def oldest_file_with_name_in_tree(self, count_number_to_find, extension=".json"):
+        """
+        Returns the oldest file-path string
+
+        :param count_number_to_find: List of count_numbers to find and store instead of storing all
+        """
+        print(os.getcwd())
+        return min(
+            (os.path.join(dirname, filename)
+             for dirname, dirnames, filenames in os.walk(self.save_folder)
+             for filename in filenames
+             if filename.endswith(str(count_number_to_find) + extension)),
+            key=lambda fn: os.stat(fn).st_mtime)
+
+    def json_to_mongo_via_sshtunnel(self, file_name_to_store=None):
+
+        if not len(os.listdir(self.save_folder)) == 0:
+
+            if file_name_to_store is None:
+                try:
+                    oldest_file_path = self.oldest_file_in_tree()
+                except(ValueError) as e:
+                    print('Folder seems empty or no matching data found!')
+                    print(f'ValueError{e}')
+                    print('Empty directory! Go to sleep for 5 minutes!')
+                    time.sleep(5 * 60)
+                    return
+            else:
+                oldest_file_path = file_name_to_store
+
+            with open(oldest_file_path) as json_file:
+                data = json.load(json_file)
+
+            successfull = False
+            retry_counter = 0
+
+            while not successfull:
+                try:
+                    now = time.time()
+                    if os.stat(oldest_file_path).st_mtime < now - 60:
+                        self.save_to_mongodb(database_name=data['Database name'],
+                                             col='Trial_number_' + data['Trial number'], data=data)
+                        print('Reporter: Data stored successfully to MongoDB and will be removed locally!')
+                        os.remove(oldest_file_path)
+                        successfull = True
+                except (sshtunnel.BaseSSHTunnelForwarderError) as e:
+                    wait_time = np.random.randint(1, 60)
+                    retry_counter += 1
+                    if retry_counter > 10:
+                        print('Stopped after 10 connection attempts!')
+                        raise e
+                    print(f'Reporter: Could not connect via ssh to frontend, retry in {wait_time} s')
+                    time.sleep(wait_time)
+
+        else:
+            print('Empty directory! Go to sleep for 5 minutes!')
+            time.sleep(5 * 60)
+
+
+if __name__ == "__main__":
+
+    reporter = Reporter()
+    print("Starting Reporter for logging from local savefolder to mongoDB")
+
+    file_ending_number = [178, 179]
+
+    print(f"Searching for files in directory with number ending on {file_ending_number}")
+
+    # print(reporter.oldest_file_in_tree())
+    while True:
+        reporter.json_to_mongo_via_sshtunnel()
+
+        """
+        # to send only files ending with number file_ending_number
+        for number in file_ending_number:
+            try:
+                oldest_named_file_path = reporter.oldest_file_with_name_in_tree(number)
+                print(oldest_named_file_path)
+
+                reporter.json_to_mongo_via_sshtunnel(oldest_named_file_path)
+
+            except(ValueError) as e:
+                print(f'No file with number {number} ending')
+                print(f'ValueError{e}')
+                print('Go to sleep for 5 seconds and go on with next number!')
+                time.sleep(5)
+        """
diff --git a/experiments/P10/viz/Com_models_pc2_P10.py b/experiments/P10/viz/Com_models_pc2_P10.py
new file mode 100644
index 00000000..3f6ad320
--- /dev/null
+++ b/experiments/P10/viz/Com_models_pc2_P10.py
@@ -0,0 +1,723 @@
+print('Start script')
+import gym
+import logging
+import os
+import platform
+import time
+from functools import partial
+
+import GPy
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.P10.env.env_wrapper_P10 import FeatureWrapper, FeatureWrapper_pastVals, BaseWrapper
+from experiments.P10.env.rewards_P10 import Reward
+from experiments.P10.env.vctrl_single_inv_P10 import net  # , folder_name
+from experiments.P10.util.config import cfg
+from experiments.P10.util.recorder_P10 import Recorder
+
+# imports for PIPI
+from experiments.P10.env.random_load_P10 import RandomLoad
+from openmodelica_microgrid_gym.agents import SafeOptAgent
+from openmodelica_microgrid_gym.agents.util import MutableFloat
+from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \
+    MultiPhaseDQ0PIPIController
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.env.plotmanager import PlotManager
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import FullHistory, RandProcess
+
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+show_plots = True
+save_results = False
+# 2128 ->0; 3125 -> -1, 956-> best
+trial = '956'
+
+folder_name = 'saves/P10_SEC_R_load'
+os.makedirs(folder_name, exist_ok=True)
+wrapper = ['past']
+model_path = 'experiments/P10/viz/data/'
+model_path = 'experiments/P10/retrain/'
+node = platform.uname().node
+model_name = ['model_'+trial+'.zip']
+model_name = ['model'+trial+'_retrained']
+
+################DDPG Config Stuff#########################################################################
+print('Using model_'+trial+' setting')
+if trial == '956':
+    actor_number_layers = 2
+    alpha_relu_actor = 0.0225049
+    alpha_relu_critic = 0.00861825
+    antiwindup_weight = 0.350646
+    critic_number_layers = 4
+    error_exponent = 0.5
+    gamma = 0.794337
+    integrator_weight = 0.214138
+    use_gamma_in_rew = 1
+    n_trail = 50001
+    number_past_vals = [18]
+
+if trial == '3125':
+    actor_number_layers = 1
+    alpha_relu_actor = 0.305758
+    alpha_relu_critic = 0.0119687
+    antiwindup_weight = 0.767766
+    critic_number_layers = 4
+    error_exponent = 0.5
+    gamma = 0.922121
+    integrator_weight = 0.237488
+    use_gamma_in_rew = 1
+    n_trail = 50001
+    number_past_vals = [2]
+
+if trial == '2128':
+    actor_number_layers = 1
+    alpha_relu_actor = 0.334101
+    alpha_relu_critic = 0.0729528
+    antiwindup_weight = 0.648373
+    critic_number_layers = 4
+    error_exponent = 0.5
+    gamma = 0.798319
+    integrator_weight = 0.122662
+    use_gamma_in_rew = 1
+    n_trail = 50001
+    number_past_vals = [7]
+
+mongo_recorder = Recorder(node=node, database_name=folder_name)
+
+num_average = 1
+max_episode_steps_list = [1000]
+
+data_str = 'experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl'
+#data_str = 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl'
+# data_str = 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl'
+
+result_list = []
+ret_list = []
+mean_list = []
+std_list = []
+ret_array = np.zeros(num_average)
+
+df = pd.DataFrame()
+ret_dict = dict()
+
+#################PI Config stuff##############################################################################
+current_directory = os.getcwd()
+
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+# Simulation definitions
+# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+net = Network.load('net/net_p10.yaml')
+delta_t = 1e-4  # simulation time step size / s
+undersample = 1
+# max_episode_steps = 1002  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
+DroopGain = 0.0  # virtual droop gain for active power / W/Hz
+QDroopGain = 0.0  # virtual droop gain for reactive power / VAR/V
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+
+print("P10 stuff!")
+L_filter = 70e-6  # / H
+R_filter = 1.1e-3  # / Ohm
+C_filter = 250e-6  # / F
+
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 1  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 1
+upper_bound_load_clip_std = 0
+#####################################
+# Definitions for the GP
+prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the initial set
+noise_var = 0.001  # ** 2  # measurement noise sigma_omega
+prior_var = 2  # prior variance of the GP
+
+bounds = [(0.000, 0.045), (4, 450)]  # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp
+lengthscale = [.003, 50.]  # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP
+
+safe_threshold = 0
+j_min = -5  # cal min allowed performance
+
+explore_threshold = 0
+
+# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
+# limit exceeded
+abort_reward = 100 * j_min
+
+# Definition of the kernel
+kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True)
+
+#####################################
+# Definition of the controllers
+#From MATLAB:
+#################################################################################
+#Layout using mangtude optimum for inverter with LC-filter using L = 7e-05 H, C = 0.00025 F, R = 0.001 Ohm.
+#Current controller:
+#Kp = 0.0009773 A/V and Ki = 0.13159 A/(Vs)
+#Voltage controller:
+#Kp = 0.45052 V/A and Ki = 305.6655 V/(As)
+# P10:
+print('using p10 setting')
+kp_v = 0.45052
+ki_v = 305.6655
+kp_c = 0.0009773
+ki_c = 0.13159
+
+# Choose Kp and Ki for the current and voltage controller as mutable parameters
+mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v))  # 300Hz
+# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105))  # 300Hz
+voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
+                                limits=(-i_lim * 10, i_lim * 10))
+
+current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1))  # Current controller values
+droop_param = DroopParams(DroopGain, 0.005, net.freq_nom)
+qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom)
+
+ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
+                                   ts_sim=delta_t,
+                                   ts_ctrl=undersample * delta_t,
+                                   name='master')
+
+agent = SafeOptAgent(mutable_params,
+                     abort_reward,
+                     j_min,
+                     kernel,
+                     dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean,
+                          safe_threshold=safe_threshold, explore_threshold=explore_threshold),
+                     [ctrl],
+                     dict(master=[[f'lc.inductor{k}.i' for k in '123'],
+                                  [f'lc.capacitor{k}.v' for k in '123']
+                                  ]),
+                     history=FullHistory(),
+                     )
+
+""""""
+for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+    for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False):
+
+        rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                     use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent,
+                     i_lim=net['inverter1'].i_lim,
+                     i_nom=net['inverter1'].i_nom, det_run=True)
+
+        ####################################PI Stuff################################################
+        R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+        gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                          bounds=(lower_bound_load, upper_bound_load))
+
+        rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                     bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                                     bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+        rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                    load_curve=pd.read_pickle(
+                                        # 'experiments/hp_tune/data/R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_deterministic_test_case_25_ohm_1_seconds.pkl'))
+                                        data_str))
+
+        cb = CallbackList()
+        # set initial = None to reset load random in range of bounds
+        cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+        cb.append(rand_load_train.reset)
+
+        plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder,
+                              show_plots=show_plots)
+
+
+        # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+        #                            load_curve=pd.read_pickle(
+        #                                'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl'))
+
+        def xylables_R(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            ax.grid(which='both')
+            # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+            ts = time.gmtime()
+            # fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_i(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+            ax.grid(which='both')
+            # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_v(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+            ax.grid(which='both')
+            # ax.set_xlim([0, 0.005])
+            ts = time.gmtime()
+            # fig.savefig(
+            #    f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                       reward_fun=rew.rew_fun_PIPI_MRE,
+                       viz_cols=[
+                           PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']],
+                                    callback=plotter.xylables_v_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_v_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']],
+                                    callback=plotter.xylables_i_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                    callback=xylables_R,
+                                    color=[['b', 'r', 'g']],
+                                    style=[[None]]
+                                    ),
+                           PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_i_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    )
+                       ],
+                       viz_mode='episode',
+                       max_episode_steps=max_episode_steps_list[max_eps_steps],
+                       model_params={'lc.resistor1.R': R_filter,
+                                     'lc.resistor2.R': R_filter,
+                                     'lc.resistor3.R': R_filter,
+                                     'lc.resistor4.R': R_filter,
+                                     'lc.resistor5.R': R_filter,
+                                     'lc.resistor6.R': R_filter,
+                                     'lc.inductor1.L': L_filter,
+                                     'lc.inductor2.L': L_filter,
+                                     'lc.inductor3.L': L_filter,
+                                     'lc.capacitor1.C': C_filter,
+                                     'lc.capacitor2.C': C_filter,
+                                     'lc.capacitor3.C': C_filter,
+                                     'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor1.R'),
+                                     'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor2.R'),
+                                     'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor3.R'),
+                                     # 'lc.capacitor1.v': 0,
+                                     # 'lc.capacitor2.v': 0,
+                                     # 'lc.capacitor3.v': 0,
+                                     # 'lc.inductor1.i': 0,
+                                     # 'lc.inductor2.i': 0,
+                                     # 'lc.inductor3.i': 0,
+                                     },
+                       net=net,
+                       model_path='omg_grid/grid.paper_loadstep.fmu',
+                       history=FullHistory(),
+                       # on_episode_reset_callback=cb.fire,
+                       action_time_delay=1 * undersample
+                       )
+
+        rew.gamma = 0
+        return_sum_PI = 0.0
+        rew_list_PI = []
+        v_d_PI = []
+        v_q_PI = []
+        v_0_PI = []
+        R_load_PI = []
+        limit_exceeded_in_test_PI = False
+        limit_exceeded_penalty_PI = 0
+        """
+        agent.reset()
+        agent.obs_varnames = env.history.cols
+        env.history.cols = env.history.structured_cols(None) + agent.measurement_cols
+        env.measure = agent.measure
+        agent_fig = None
+        obs_PI = env.reset()
+
+        for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False):
+            # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+            agent.observe(None, False)
+            act_PI = agent.act(obs_PI)
+            obs_PI, r_PI, done_PI, info_PI = env.step(act_PI)
+            rew_list_PI.append(r_PI)
+            env.render()
+            return_sum_PI += r_PI
+            if r_PI == -1 and not limit_exceeded_in_test_PI:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test_PI = True
+                limit_exceeded_penalty_PI = -1
+
+        # _, env_fig = env.close()
+        agent.observe(r_PI, done_PI)
+
+        v_a_PI = env.history.df['lc.capacitor1.v']
+        v_b_PI = env.history.df['lc.capacitor2.v']
+        v_c_PI = env.history.df['lc.capacitor3.v']
+        i_a_PI = env.history.df['lc.inductor1.i']
+        i_b_PI = env.history.df['lc.inductor2.i']
+        i_c_PI = env.history.df['lc.inductor3.i']
+        R_load_PI = (env.history.df['r_load.resistor1.R'].tolist())
+        phase_PI = env.history.df['inverter1.phase.0']  # env.net.components[0].phase
+
+        i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI)
+        v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+
+        i_d_PI = i_dq0_PI[0].tolist()
+        i_q_PI = i_dq0_PI[1].tolist()
+        i_0_PI = i_dq0_PI[2].tolist()
+        v_d_PI = (v_dq0_PI[0].tolist())
+        v_q_PI = (v_dq0_PI[1].tolist())
+        v_0_PI = (v_dq0_PI[2].tolist())
+
+        ts = time.gmtime()
+        compare_result = {"Name": "comparison_PI_DDPG",
+                          "time": ts,
+                          "PI_Kp_c": kp_c,
+                          "PI_Ki_c": ki_c,
+                          "PI_Kp_v": kp_v,
+                          "PI_Ki_v": ki_v,
+                          "DDPG_model_path": model_path,
+                          "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI),
+                          "Reward PI": rew_list_PI,
+                          "env_hist_PI": env.history.df,
+                          "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                          "number of averages per run": num_average,
+                          "info": "PI result for comparison with RL agent",
+                          "optimization node": 'Thinkpad',
+                          "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2'
+                          }
+        store_df = pd.DataFrame([compare_result])
+        store_df.to_pickle(f'{folder_name}/PI_{max_episode_steps_list[max_eps_steps]}steps')
+        """
+        ####################################DDPG Stuff##############################################
+
+        rew.gamma = 0
+        # episodes will not abort, if limit is exceeded reward = -1
+        rew.det_run = True
+        rew.exponent = 0.5  # 1
+
+        #net = Network.load('net/net_vctrl_single_inv_dq0.yaml')  # is used from vctrl_single_env, not needed here
+
+        for used_model, wrapper_mode, used_number_past_vales in zip(model_name, wrapper, number_past_vals):
+
+            if wrapper_mode == 'i_load':
+                env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                                    reward_fun=rew.rew_fun_dq0,
+                                    abort_reward=-1,  # no needed if in rew no None is given back
+                                    # on_episode_reset_callback=cb.fire  # needed?
+                                    obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                                'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                                'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'  # ],
+                                        , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'],
+                                    max_episode_steps=max_episode_steps_list[max_eps_steps],
+                                    model_params={'lc.resistor1.R': R_filter,
+                                                  'lc.resistor2.R': R_filter,
+                                                  'lc.resistor3.R': R_filter,
+                                                  'lc.resistor4.R': R_filter,
+                                                  'lc.resistor5.R': R_filter,
+                                                  'lc.resistor6.R': R_filter,
+                                                  'lc.inductor1.L': L_filter,
+                                                  'lc.inductor2.L': L_filter,
+                                                  'lc.inductor3.L': L_filter,
+                                                  'lc.capacitor1.C': C_filter,
+                                                  'lc.capacitor2.C': C_filter,
+                                                  'lc.capacitor3.C': C_filter,
+                                                  'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor1.R'),
+                                                  'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor2.R'),
+                                                  'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor3.R'),
+                                                  # 'lc.capacitor1.v': 0,
+                                                  # 'lc.capacitor2.v': 0,
+                                                  # 'lc.capacitor3.v': 0,
+                                                  # 'lc.inductor1.i': 0,
+                                                  # 'lc.inductor2.i': 0,
+                                                  # 'lc.inductor3.i': 0,
+                                                  },
+                                    )
+            else:
+                env_test = gym.make('experiments.P10.env:vctrl_single_inv_test-v0',
+                                    reward_fun=rew.rew_fun_dq0,
+                                    abort_reward=-1,  # no needed if in rew no None is given back
+                                    # on_episode_reset_callback=cb.fire  # needed?
+                                    obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                                'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                                'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'],
+                                    # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'],
+                                    max_episode_steps=max_episode_steps_list[max_eps_steps],
+                                    model_params={'lc.resistor1.R': R_filter,
+                                                  'lc.resistor2.R': R_filter,
+                                                  'lc.resistor3.R': R_filter,
+                                                  'lc.resistor4.R': R_filter,
+                                                  'lc.resistor5.R': R_filter,
+                                                  'lc.resistor6.R': R_filter,
+                                                  'lc.inductor1.L': L_filter,
+                                                  'lc.inductor2.L': L_filter,
+                                                  'lc.inductor3.L': L_filter,
+                                                  'lc.capacitor1.C': C_filter,
+                                                  'lc.capacitor2.C': C_filter,
+                                                  'lc.capacitor3.C': C_filter,
+                                                  'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor1.R'),
+                                                  'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor2.R'),
+                                                  'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor3.R'),
+                                                  # 'lc.capacitor1.v': 0,
+                                                  # 'lc.capacitor2.v': 0,
+                                                  # 'lc.capacitor3.v': 0,
+                                                  # 'lc.inductor1.i': 0,
+                                                  # 'lc.inductor2.i': 0,
+                                                  # 'lc.inductor3.i': 0,
+                                                  },
+                                    )
+
+            if wrapper_mode in ['past', 'i_load']:
+                env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + used_number_past_vales * 3,
+                                                   # training_episode_length=training_episode_length, (da aus pickle!)
+                                                   recorder=mongo_recorder, n_trail=n_trail,
+                                                   integrator_weight=integrator_weight,
+                                                   antiwindup_weight=antiwindup_weight, gamma=1,
+                                                   penalty_I_weight=0, penalty_P_weight=0,
+                                                   number_past_vals=used_number_past_vales)
+
+
+            elif wrapper_mode == 'no-I-term':
+                env_test = BaseWrapper(env_test, number_of_features=6 + used_number_past_vales * 3,
+                                       recorder=mongo_recorder, n_trail=n_trail, gamma=gamma,
+                                       number_past_vals=used_number_past_vales)
+
+            else:
+                env_test = FeatureWrapper(env_test, number_of_features=11,
+                                          recorder=mongo_recorder, integrator_weight=integrator_weight,
+                                          antiwindup_weight=antiwindup_weight, gamma=1,
+                                          penalty_I_weight=0,
+                                          penalty_P_weight=0)  # , use_past_vals=True, number_past_vals=30)
+
+            if wrapper_mode not in ['no-I-term', 'I-controller']:
+                env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+            # model2 = DDPG.load(model_path + f'model.zip')  # , env=env_test)
+            print('Before load')
+
+            model = DDPG.load(model_path + f'{used_model}', env=env_test)
+
+            print('After load')
+
+            count = 0
+            for kk in range(actor_number_layers + 1):
+
+                if kk < actor_number_layers:
+                    model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+                    model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+                count = count + 2
+
+            count = 0
+
+            for kk in range(critic_number_layers + 1):
+
+                if kk < critic_number_layers:
+                    model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+                    model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+                count = count + 2
+
+            if wrapper_mode not in ['no-I-term', 'I-controller']:
+                env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+            return_sum = 0.0
+            limit_exceeded_in_test = False
+            limit_exceeded_penalty = 0
+
+            rew_list = []
+            v_d = []
+            v_q = []
+            v_0 = []
+            action_P0 = []
+            action_P1 = []
+            action_P2 = []
+            action_I0 = []
+            action_I1 = []
+            action_I2 = []
+            integrator_sum0 = []
+            integrator_sum1 = []
+            integrator_sum2 = []
+            R_load = []
+
+            ####### Run Test #########
+            # agent ~ PI Controllerv using env
+            # model ~ RL Controller using env_test
+            # Both run in the same loop
+
+            obs = env_test.reset()
+
+            for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False):
+                # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+                action, _states = model.predict(obs, deterministic=True)
+                if step == 988:
+                    asd = 1
+                obs, rewards, done, info = env_test.step(action)
+                action_P0.append(np.float64(action[0]))
+                action_P1.append(np.float64(action[1]))
+                action_P2.append(np.float64(action[2]))
+                if wrapper_mode not in ['no-I-term', 'I-controller']:
+                    action_I0.append(np.float64(action[3]))
+                    action_I1.append(np.float64(action[4]))
+                    action_I2.append(np.float64(action[5]))
+                    integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+                    integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+                    integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+                if rewards == -1 and not limit_exceeded_in_test:
+                    # Set addidional penalty of -1 if limit is exceeded once in the test case
+                    limit_exceeded_in_test = True
+                    limit_exceeded_penalty = -1
+                env_test.render()
+                return_sum += rewards
+                rew_list.append(rewards)
+
+                # print(rewards)
+                if done:
+                    env_test.close()
+
+                    # print(limit_exceeded_in_test)
+                    break
+
+            env_test.close()
+
+            v_a = env_test.history.df['lc.capacitor1.v']
+            v_b = env_test.history.df['lc.capacitor2.v']
+            v_c = env_test.history.df['lc.capacitor3.v']
+            i_a = env_test.history.df['lc.inductor1.i']
+            i_b = env_test.history.df['lc.inductor2.i']
+            i_c = env_test.history.df['lc.inductor3.i']
+            R_load = (env_test.history.df['r_load.resistor1.R'].tolist())
+            phase = env_test.history.df['inverter1.phase.0']  # env_test.env.net.components[0].phase
+            v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+            i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+
+            i_d = i_dq0[0].tolist()
+            i_q = i_dq0[1].tolist()
+            i_0 = i_dq0[2].tolist()
+            v_d = (v_dq0[0].tolist())
+            v_q = (v_dq0[1].tolist())
+            v_0 = (v_dq0[2].tolist())
+
+
+            plt.plot(v_d)
+            plt.show()
+            plt.plot(v_q)
+            plt.show()
+            plt.plot(v_0)
+            plt.show()
+
+            # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+            print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}')
+            # print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}')
+
+            ts = time.gmtime()
+            compare_result = {"Name": "comparison_PI_DDPG",
+                              "model name": model_name,
+                              "Wrapper": wrapper,
+                              "used_number_past_vales": used_number_past_vales,
+                              "time": ts,
+                              "ActionP0": action_P0,
+                              "ActionP1": action_P1,
+                              "ActionP2": action_P2,
+                              "ActionI0": action_I0,
+                              "ActionI1": action_I1,
+                              "ActionI2": action_I2,
+                              "integrator_sum0": integrator_sum0,
+                              "integrator_sum1": integrator_sum1,
+                              "integrator_sum2": integrator_sum2,
+                              "DDPG_model_path": model_path,
+                              "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty),
+                              "Reward DDPG": rew_list,
+                              "env_hist_DDPG": env_test.env.history.df,
+                              "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                              "number of averages per run": num_average,
+                              "info": "execution of RL agent on 10 s test case-loading values",
+                              "optimization node": 'Thinkpad',
+                              }
+            store_df = pd.DataFrame([compare_result])
+            store_df.to_pickle(f'{folder_name}/' + used_model + f'_{max_episode_steps_list[max_eps_steps]}steps')
+
+        ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty))
+        ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+        # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+        # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list)
+        # temp_dict = dict(zipped)
+    temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list}
+    result_list.append(temp_dict)
+    # ret_dict.append(zipped)
+    # df = df.append(ret_dict)
+
+    mean_list.append(np.mean(ret_array))
+    std_list.append(np.std(ret_array))
+
+# df = df.append(temp_list, True)
+print(mean_list)
+print(std_list)
+print(result_list)
+
+results = {
+    'Mean': mean_list,
+    'Std': std_list,
+    'All results': result_list,
+    'max_episode_steps_list': max_episode_steps_list
+}
+
+df = pd.DataFrame(results)
+# df.to_pickle("DDPG_study18_best_test_varianz.pkl")
+asd = 1
diff --git a/experiments/P10/viz/CompareModelsPlotting_P10.py b/experiments/P10/viz/CompareModelsPlotting_P10.py
new file mode 100644
index 00000000..e5488d37
--- /dev/null
+++ b/experiments/P10/viz/CompareModelsPlotting_P10.py
@@ -0,0 +1,234 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+make_pyplot = False
+show_load = True
+interval_plt = True
+
+# interval_list_x = [[0, 0.01], [0.01, 1.0], [0.78, 0.9]]
+# interval_list_y = [[-25, 210], [-40, 210], [165, 175]]
+
+
+# Fuer den Detzerministc case
+interval_list_x = [[0, 0.01], [0.105, 0.2], [0.695, 0.71], [0.85, 0.88]]
+interval_list_y = [[-25, 400], [-25, 400], [-25, 400], [-25, 400]]
+
+# Fuer den 10s Fall
+# interval_list_x = [[0, 0.02], [2.09, 2.1], [2.11, 2.12], [7.08, 7.16], [7.145, 7.16]]
+# interval_list_y = [[-50, 400], [-25, 340], [160, 190], [-25, 340], [125, 340]]
+
+run = '204 Return: -2 '
+# run = '374 Return: 0 '
+# run = '213 Return: 0.8 '
+run = '1080 Return: 0.9 '
+
+folder_names = ['saves/P10_SEC_R_load']  # _deterministic'
+
+
+number_of_steps = '_1000steps'
+"""
+df = pd.read_pickle('saves/P10_I_term_2/204_deterministic/PI' + number_of_steps)
+
+env_hist_PI = df['env_hist_PI']
+v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist()
+v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist()
+v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist()
+R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist())
+phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+v_d_PI = (v_dq0_PI[0].tolist())
+v_q_PI = (v_dq0_PI[1].tolist())
+v_0_PI = (v_dq0_PI[2].tolist())
+
+reward_PI = df['Reward PI'][0]
+return_PI = df['Return PI'][0]
+kp_c = df['PI_Kp_c'][0]
+ki_c = df['PI_Ki_c'][0]
+kp_v = df['PI_Kp_v'][0]
+ki_v = df['PI_Ki_v'][0]
+"""
+model_names = ['model.zip']
+ylabels = ['DDPG-I+pastVals']
+
+pastVals = ['16']
+return_list_DDPG = []
+reward_list_DDPG = []
+
+ts = 1e-4  # if ts stored: take from db
+
+# t_test_R = np.arange(ts, (len(testcase_100k['v_d_PI'])) * ts, ts).tolist()
+
+t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist()
+t_reward = np.arange(0, round((len(reward_PI)) * ts, 4), ts).tolist()
+
+# fig, axs = plt.subplots(len(model_names)+2, len(interval_list_y), figsize=(16, 12))  # , sharex=True)  # a new figure window
+fig, axs = plt.subplots(len(model_names) + 3, len(interval_list_y),
+                        figsize=(12, 10))  # , sharex=True)  # a new figure window
+
+for i in range(len(interval_list_y)):
+    plt_count = 3
+    ############## Subplots
+    # fig = plt.figure(figsize=(10,12))  # a new figure window
+
+    for model_name, pV, folder_name, ylabel_use in zip(model_names, pastVals, folder_names, ylabels):
+
+        df_DDPG = pd.read_pickle(folder_name + '/' + model_name + number_of_steps)
+        # df_DDPG = pd.read_pickle(folder_name + '/' 'model_5_pastVals.zip_100000steps_NoPhaseFeature_1427')
+
+        if i == 0:
+            return_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 7))
+        #    reward_list_DDPG.append(df_DDPG['Reward DDPG'][0])
+
+        env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+        v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+        v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+        v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+        phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+        v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+        v_d_DDPG = (v_dq0[0].tolist())
+        v_q_DDPG = (v_dq0[1].tolist())
+        v_0_DDPG = (v_dq0[2].tolist())
+
+        axs[0, i].plot(t_test, R_load_PI)
+        axs[0, i].grid()
+        axs[0, i].set_xlim(interval_list_x[i])
+        # axs[0, i].set_ylim([15, 75])
+        if i == 0:
+            axs[0, i].set_ylabel("$R_{\mathrm{load}}\,/\,\mathrm{\Omega}$")
+        # ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+
+        DDPG_reward = df_DDPG['Reward DDPG'][0]
+        if plt_count == 3:
+            axs[1, i].plot(t_reward, reward_PI, 'b', label=f'      PI: '
+                                                           f'{round(sum(reward_PI[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+        axs[1, i].plot(t_reward, DDPG_reward, 'r', label=f'DDPG: '
+                                                         f'{round(sum(DDPG_reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+        axs[1, i].grid()
+        axs[1, i].set_xlim(interval_list_x[i])
+        # axs[1, i].set_ylim(interval_list_y[i])
+        axs[1, i].legend()
+        if i == 0:
+            axs[1, i].set_ylabel("Reward")
+
+        axs[2, i].plot(t_test, v_d_PI, 'b', label='v_d')
+        axs[2, i].plot(t_test, v_q_PI, 'r', label='v_q')
+        axs[2, i].plot(t_test, v_0_PI, 'g', label='v_0')
+        axs[2, i].grid()
+        axs[2, i].set_xlim(interval_list_x[i])
+        axs[2, i].set_ylim(interval_list_y[i])
+        if i == 0:
+            axs[2, i].set_ylabel("$v_{\mathrm{dq0, PI}}\,/\,\mathrm{V}$")
+        # else:
+        #    axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$")
+
+        axs[plt_count, i].plot(t_test, v_d_DDPG, 'b')
+        axs[plt_count, i].plot(t_test, v_q_DDPG, 'r')
+        axs[plt_count, i].plot(t_test, v_0_DDPG, 'g')
+        axs[plt_count, i].grid()
+        axs[plt_count, i].set_xlim(interval_list_x[i])
+        axs[plt_count, i].set_ylim(interval_list_y[i])
+        axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+        if i == 0:
+            # axs[plt_count, i].set_ylabel(pV)
+            axs[plt_count, i].set_ylabel(ylabel_use)
+            # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$")
+        # else:
+        #    axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$")
+        plt_count += 1
+
+fig.suptitle(run)
+
+fig.subplots_adjust(wspace=0.2, hspace=0.2)
+plt.show()
+
+fig.savefig(f'{folder_name}/Ausschnitt_q0' + number_of_steps + '.pdf')
+
+if make_pyplot:
+    # pyplot Load
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_test, y=R_load_PI))  # , title='R_load')
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
+
+    # pyplot PI
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_reward, y=DDPG_reward))
+    plot.add_trace(
+        px.Scatter(x=t_reward, y=reward_PI))
+    # plot.add_trace(
+    #    px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
+
+    for model_name in model_names:
+        df_DDPG = pd.read_pickle(folder_name + '/' + model_name + number_of_steps)
+
+        env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+        v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+        v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+        v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+        phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+        v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+        v_d_DDPG = (v_dq0[0].tolist())
+        v_q_DDPG = (v_dq0[1].tolist())
+        v_0_DDPG = (v_dq0[2].tolist())
+        # pyplot ddpg
+        plot = px.Figure()
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_d_DDPG))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_q_DDPG))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_0_DDPG))
+        # plot.add_trace(
+        #    px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_d_PI))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_q_PI))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_0_PI))
+
+        plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+            dict(count=1, step="day", stepmode="backward"), ])),
+            rangeslider=dict(visible=True), ))
+        plot.show()
+
+plt.plot(t_test, v_d_DDPG, 'b')
+plt.plot(t_test, v_q_DDPG, 'r')
+plt.plot(t_test, v_0_DDPG, 'g')
+# plt.plot(t_test, v_d_PI, 'r')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+# plt.xlim([0.1, 0.11])
+# plt.ylim([290, 360])
+plt.xlabel("time")
+plt.ylabel("v_dq0_DDPG")
+plt.title(f'DDPG' + run)
+plt.show()
+
+plt.plot(t_test, v_d_PI, 'b')
+plt.plot(t_test, v_q_PI, 'r')
+plt.plot(t_test, v_q_PI, 'g')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+# plt.xlim([0.1, 0.2])
+# plt.ylim([290, 360])
+plt.xlabel("time")
+plt.ylabel("v_dq0_PI")
+plt.title(f'PI')
+plt.show()
diff --git a/experiments/hp_tune/agents/my_ddpg.py b/experiments/hp_tune/agents/my_ddpg.py
new file mode 100644
index 00000000..8998afc7
--- /dev/null
+++ b/experiments/hp_tune/agents/my_ddpg.py
@@ -0,0 +1,25 @@
+from stable_baselines3 import DDPG
+
+from experiments.hp_tune.agents.my_td3 import myTD3
+
+
+class myDDPG(DDPG, myTD3):
+    """
+    Deep Deterministic Policy Gradient (DDPG) based on pytorch version from stable_baseline3.
+
+    Additionally makes more training data accessible for logging e.g. in database
+
+    Hint: for model.train() the train algorithm form myTD3 is used because python3 does NOT use depth-first search
+    (in that case DDPG->TD3.train() would have been taken)
+    See https://www.python-kurs.eu/python3_mehrfachvererbung.php "diamond_problem"
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(myDDPG, self).__init__(*args, **kwargs)
+        # training variables for logging
+        self.critic_loss_batch_mean = []  # mean of critic losses of the batch
+        self.critic_estimate_target_diff_mean = []  # sum(Q_estimat - target)/N_batch_size
+        self.actor_loss_batch_mean = []  # mean of critic losses of the batch
+        self.current_q_estimates_batch_mean = []  # Q(s,a)    (mean of the batch!)
+        self.target_q_batch_mean = []  # yi = r + gamma*Q_target(s',µ_target(s')) (mean of the batch!)
+        self.reward_batch_mean = []  # mean of the batch reward used in training
diff --git a/experiments/hp_tune/agents/my_off_policy_algorithm.py b/experiments/hp_tune/agents/my_off_policy_algorithm.py
new file mode 100644
index 00000000..95500236
--- /dev/null
+++ b/experiments/hp_tune/agents/my_off_policy_algorithm.py
@@ -0,0 +1,137 @@
+from typing import Optional
+import numpy as np
+
+from stable_baselines3.common.buffers import ReplayBuffer
+from stable_baselines3.common.callbacks import BaseCallback
+from stable_baselines3.common.noise import ActionNoise
+from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm
+
+from stable_baselines3.common.type_aliases import RolloutReturn
+from stable_baselines3.common.vec_env import VecEnv
+
+
+class myOffPolicyAlgorithm(OffPolicyAlgorithm):
+
+    def collect_rollouts(
+            self,
+            env: VecEnv,
+            callback: BaseCallback,
+            n_episodes: int = 1,
+            n_steps: int = -1,
+            action_noise: Optional[ActionNoise] = None,
+            learning_starts: int = 0,
+            replay_buffer: Optional[ReplayBuffer] = None,
+            log_interval: Optional[int] = None,
+    ) -> RolloutReturn:
+        """
+        Collect experiences and store them into a ReplayBuffer.
+
+        :param env: The training environment
+        :param callback: Callback that will be called at each step
+            (and at the beginning and end of the rollout)
+        :param n_episodes: Number of episodes to use to collect rollout data
+            You can also specify a ``n_steps`` instead
+        :param n_steps: Number of steps to use to collect rollout data
+            You can also specify a ``n_episodes`` instead.
+        :param action_noise: Action noise that will be used for exploration
+            Required for deterministic policy (e.g. TD3). This can also be used
+            in addition to the stochastic policy for SAC.
+        :param learning_starts: Number of steps before learning for the warm-up phase.
+        :param replay_buffer:
+        :param log_interval: Log data every ``log_interval`` episodes
+        :return:
+        """
+        episode_rewards, total_timesteps = [], []
+        total_steps, total_episodes = 0, 0
+
+        assert isinstance(env, VecEnv), "You must pass a VecEnv"
+        assert env.num_envs == 1, "OffPolicyAlgorithm only support single environment"
+
+        if self.use_sde:
+            self.actor.reset_noise()
+
+        callback.on_rollout_start()
+        continue_training = True
+
+        while total_steps < n_steps or total_episodes < n_episodes:
+            done = False
+            episode_reward, episode_timesteps = 0.0, 0
+
+            while not done:
+
+                if self.use_sde and self.sde_sample_freq > 0 and total_steps % self.sde_sample_freq == 0:
+                    # Sample a new noise matrix
+                    self.actor.reset_noise()
+
+                # Select action randomly or according to policy
+                action, buffer_action = self._sample_action(learning_starts, action_noise)
+
+                # Rescale and perform action
+                new_obs, reward, done, infos = env.step(action)
+
+                self.num_timesteps += 1
+                episode_timesteps += 1
+                total_steps += 1
+
+                # Give access to local variables
+                callback.update_locals(locals())
+                # Only stop training if return value is False, not when it is None.
+                if callback.on_step() is False:
+                    return RolloutReturn(0.0, total_steps, total_episodes, continue_training=False)
+
+                episode_reward += reward
+
+                # Retrieve reward and episode length if using Monitor wrapper
+                self._update_info_buffer(infos, done)
+
+                # Store data in replay buffer
+                if replay_buffer is not None:
+                    # Store only the unnormalized version
+                    if self._vec_normalize_env is not None:
+                        new_obs_ = self._vec_normalize_env.get_original_obs()
+                        reward_ = self._vec_normalize_env.get_original_reward()
+                    else:
+                        # Avoid changing the original ones
+                        self._last_original_obs, new_obs_, reward_ = self._last_obs, new_obs, reward
+
+                    if not infos[0]['timelimit_reached']:
+                        replay_buffer.add(self._last_original_obs, new_obs_, buffer_action, reward_, done)
+
+                if infos[0]['timelimit_reached']:
+                    # self._last_original_obs = None
+                    self._last_obs = self.env.reset()
+                else:
+                    self._last_obs = new_obs
+                    # Save the unnormalized observation
+                    if self._vec_normalize_env is not None:
+                        self._last_original_obs = new_obs_
+
+                self._update_current_progress_remaining(self.num_timesteps, self._total_timesteps)
+
+                # For DQN, check if the target network should be updated
+                # and update the exploration schedule
+                # For SAC/TD3, the update is done as the same time as the gradient update
+                # see https://github.com/hill-a/stable-baselines/issues/900
+                self._on_step()
+
+                if 0 < n_steps <= total_steps:
+                    break
+
+            if done:
+                total_episodes += 1
+                self._episode_num += 1
+                episode_rewards.append(episode_reward)
+                total_timesteps.append(episode_timesteps)
+
+                if action_noise is not None:
+                    action_noise.reset()
+
+                # Log training infos
+                if log_interval is not None and self._episode_num % log_interval == 0:
+                    self._dump_logs()
+
+        mean_reward = np.mean(episode_rewards) if total_episodes > 0 else 0.0
+
+        callback.on_rollout_end()
+
+        return RolloutReturn(mean_reward, total_steps, total_episodes, continue_training)
diff --git a/experiments/hp_tune/agents/my_td3.py b/experiments/hp_tune/agents/my_td3.py
new file mode 100644
index 00000000..cc74b8e2
--- /dev/null
+++ b/experiments/hp_tune/agents/my_td3.py
@@ -0,0 +1,153 @@
+from stable_baselines3 import TD3
+
+import numpy as np
+import torch as th
+from torch.nn import functional as F
+from stable_baselines3.common import logger
+from stable_baselines3.common.utils import polyak_update
+
+from experiments.hp_tune.agents.my_off_policy_algorithm import myOffPolicyAlgorithm
+
+
+# class myTD3(TD3, myOffPolicyAlgorithm):  # so, falls timelimit_reached verwendet werden soll
+class myTD3(TD3):
+
+    def train(self, gradient_steps: int, batch_size: int = 100) -> None:
+
+        # Update learning rate according to lr schedule
+        self._update_learning_rate([self.actor.optimizer, self.critic.optimizer])
+
+        actor_losses, critic_losses = [], []
+
+        for gradient_step in range(gradient_steps):
+
+            self._n_updates += 1
+            # Sample replay buffer
+            replay_data = self.replay_buffer.sample(batch_size, env=self._vec_normalize_env)
+
+            with th.no_grad():
+                # Select action according to policy and add clipped noise
+                noise = replay_data.actions.clone().data.normal_(0, self.target_policy_noise)
+                noise = noise.clamp(-self.target_noise_clip, self.target_noise_clip)
+                next_actions = (self.actor_target(replay_data.next_observations) + noise).clamp(-1, 1)
+
+                # Compute the next Q-values: min over all critics targets
+                next_q_values = th.cat(self.critic_target(replay_data.next_observations, next_actions), dim=1)
+                next_q_values, _ = th.min(next_q_values, dim=1, keepdim=True)
+                target_q_values = replay_data.rewards + (1 - replay_data.dones) * self.gamma * next_q_values
+
+            # Get current Q-values estimates for each critic network
+            current_q_values = self.critic(replay_data.observations, replay_data.actions)
+
+            # Compute critic loss
+            critic_loss = sum([F.mse_loss(current_q, target_q_values) for current_q in current_q_values])
+            critic_losses.append(critic_loss.item())
+
+            # store data for logging - use mean from batch
+            self.critic_loss_batch_mean.append(critic_loss.item())
+            self.critic_estimate_target_diff_mean.append(
+                (sum(current_q_values[0] - target_q_values) / target_q_values.shape[0]).item())
+            self.current_q_estimates_batch_mean.append(current_q_values[0].mean().item())
+            self.target_q_batch_mean.append(np.mean(target_q_values.mean().item()))
+            self.reward_batch_mean.append(np.mean(replay_data.rewards.mean().item()))
+
+            # Optimize the critics
+            self.critic.optimizer.zero_grad()
+            critic_loss.backward()
+            self.critic.optimizer.step()
+
+            # Delayed policy updates
+            if self._n_updates % self.policy_delay == 0:
+                # Compute actor loss
+                actor_loss = -self.critic.q1_forward(replay_data.observations,
+                                                     self.actor(replay_data.observations)).mean()
+                actor_losses.append(actor_loss.item())
+
+                # Optimize the actor
+                self.actor.optimizer.zero_grad()
+                actor_loss.backward()
+                self.actor.optimizer.step()
+
+                polyak_update(self.critic.parameters(), self.critic_target.parameters(), self.tau)
+                polyak_update(self.actor.parameters(), self.actor_target.parameters(), self.tau)
+
+                # store data for logging - use mean from batch
+                self.actor_loss_batch_mean.append(np.mean(actor_losses))
+
+        logger.record("train/n_updates", self._n_updates, exclude="tensorboard")
+        if len(actor_losses) > 0:
+            logger.record("train/actor_loss", np.mean(actor_losses))
+        logger.record("train/critic_loss", np.mean(critic_losses))
+
+    """
+    def train(self, gradient_steps: int, batch_size: int = 100) -> None:
+
+        # Update learning rate according to lr schedule
+        self._update_learning_rate([self.actor.optimizer, self.critic.optimizer])
+
+        actor_losses, critic_losses = [], []
+
+        for gradient_step in range(gradient_steps):
+
+            # Sample replay buffer
+            replay_data = self.replay_buffer.sample(batch_size, env=self._vec_normalize_env)
+
+            with th.no_grad():
+                # Select action according to policy and add clipped noise
+                noise = replay_data.actions.clone().data.normal_(0, self.target_policy_noise)
+                noise = noise.clamp(-self.target_noise_clip, self.target_noise_clip)
+                next_actions = (self.actor_target(replay_data.next_observations) + noise).clamp(-1, 1)
+
+                # Compute the target Q value: min over all critics targets
+                targets = th.cat(self.critic_target(replay_data.next_observations, next_actions), dim=1)
+                target_q, _ = th.min(targets, dim=1, keepdim=True)
+                # toDo: Fusch am Bau
+                # if timelimit -> reset: use target_q!
+                # if done = True caused by abort -> do not use target_q
+                target_q = replay_data.rewards + (1 - replay_data.dones) * self.gamma * target_q
+
+            # Get current Q estimates for each critic network
+            current_q_estimates = self.critic(replay_data.observations, replay_data.actions)
+
+            # Compute critic loss
+            critic_loss = sum([F.mse_loss(current_q, target_q) for current_q in current_q_estimates])
+            critic_losses.append(critic_loss.item())
+
+            # store data for logging - use mean from batch
+            self.critic_loss_batch_mean.append(critic_loss.item())
+            self.critic_estimate_target_diff_mean.append(
+                (sum(current_q_estimates[0] - target_q) / target_q.shape[0]).item())
+            self.current_q_estimates_batch_mean.append(current_q_estimates[0].mean().item())
+            self.target_q_batch_mean.append(np.mean(target_q.mean().item()))
+            self.reward_batch_mean.append(np.mean(replay_data.rewards.mean().item()))
+
+            # Optimize the critics
+            self.critic.optimizer.zero_grad()
+            critic_loss.backward()
+            self.critic.optimizer.step()
+
+            # Delayed policy updates
+            if gradient_step % self.policy_delay == 0:
+                # Compute actor loss
+                actor_loss = -self.critic.q1_forward(replay_data.observations,
+                                                     self.actor(replay_data.observations)).mean()
+                actor_losses.append(actor_loss.item())
+
+                # Optimize the actor
+                self.actor.optimizer.zero_grad()
+                actor_loss.backward()
+                self.actor.optimizer.step()
+
+                polyak_update(self.critic.parameters(), self.critic_target.parameters(), self.tau)
+                polyak_update(self.actor.parameters(), self.actor_target.parameters(), self.tau)
+
+                # store data for logging - use mean from batch
+                self.actor_loss_batch_mean.append(np.mean(actor_losses))
+
+        self._n_updates += gradient_steps
+        # print('new Training function!')
+
+        logger.record("train/n_updates", self._n_updates, exclude="tensorboard")
+        logger.record("train/actor_loss", np.mean(actor_losses))
+        logger.record("train/critic_loss", np.mean(critic_losses))
+    """
diff --git a/experiments/hp_tune/comparison_PI_ddpg.py b/experiments/hp_tune/comparison_PI_ddpg.py
new file mode 100644
index 00000000..55d935ba
--- /dev/null
+++ b/experiments/hp_tune/comparison_PI_ddpg.py
@@ -0,0 +1,726 @@
+import logging
+import os
+import platform
+import time
+from functools import partial
+
+import GPy
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# imports for PIPI
+from experiments.hp_tune.env.random_load import RandomLoad
+from openmodelica_microgrid_gym.agents import SafeOptAgent
+from openmodelica_microgrid_gym.agents.util import MutableFloat
+from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \
+    MultiPhaseDQ0PIPIController
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.env.plotmanager import PlotManager
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import FullHistory, RandProcess
+
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+import gym
+
+# np.random.seed(0)
+
+show_plots = False
+balanced_load = False
+save_results = False
+
+folder_name = 'Comparison_PI_DDPG_retrain_oneLoadstep'  # cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+num_average = 1
+max_episode_steps_list = [10000]  # [1000, 5000, 10000, 20000, 50000, 100000]
+
+result_list = []
+ret_list = []
+mean_list = []
+std_list = []
+ret_array = np.zeros(num_average)
+
+df = pd.DataFrame()
+ret_dict = dict()
+
+#################PI Config stuff##############################################################################
+
+current_directory = os.getcwd()
+# folder_name = 'Pipi_safeopt_best_run4d'
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+# Simulation definitions
+# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+net = Network.load('net/net_vctrl_single_inv.yaml')
+delta_t = 1e-4  # simulation time step size / s
+undersample = 1
+# max_episode_steps = 1002  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
+v_DC = 600  # DC-link voltage / V; will be set as model parameter in the FMU
+nomFreq = 60  # nominal grid frequency / Hz
+nomVoltPeak = 169.7  # 230 * 1.414  # nominal grid voltage / V
+iLimit = 16  # inverter current limit / A
+iNominal = 12  # nominal inverter current / A
+vNominal = 190  # nominal inverter current / A
+vLimit = vNominal * 1.5  # inverter current limit / A
+funnelFactor = 0.02
+vFunnel = np.array([vNominal * funnelFactor, vNominal * funnelFactor, vNominal * funnelFactor])
+mu = 400  # factor for barrier function (see below)
+DroopGain = 0.0  # virtual droop gain for active power / W/Hz
+QDroopGain = 0.0  # virtual droop gain for reactive power / VAR/V
+
+#####################################
+# Definitions for the GP
+prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the initial set
+noise_var = 0.001  # ** 2  # measurement noise sigma_omega
+prior_var = 2  # prior variance of the GP
+
+# Choose Kp and Ki (current and voltage controller) as mutable parameters (below) and define bounds and lengthscale
+# for both of them
+bounds = [(0.000, 0.045), (4, 450)]  # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp
+lengthscale = [.003, 50.]  # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP
+
+# The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times
+# the initial performance: safe_threshold = 1.2 means: performance measurement for optimization are seen as
+# unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!)
+# parameter set
+safe_threshold = 0
+j_min = -5  # cal min allowed performance
+
+# The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop
+# expanding points eventually.
+# The following variable is multiplied with the first performance of the initial set by the factor below:
+explore_threshold = 0
+
+# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
+# limit exceeded
+abort_reward = 100 * j_min
+
+# Definition of the kernel
+kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True)
+
+#####################################
+# Definition of the controllers
+# kp_v = 0.002
+# ki_v = 143
+kp_v = 0  # 0.0095  # 0.0
+ki_v = 182  # 173.22  # 200
+# Choose Kp and Ki for the current and voltage controller as mutable parameters
+mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v))  # 300Hz
+# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105))  # 300Hz
+voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
+                                limits=(-iLimit, iLimit))
+
+# kp_c = 0.033
+# ki_c = 17.4  # 11.8
+
+kp_c = 0.0308  # 0.0404  # 0.04
+ki_c = 13.3584  # 4.065  # 11.8
+current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1))  # Current controller values
+
+# Define the droop parameters for the inverter of the active power Watt/Hz (DroopGain), delta_t (0.005) used for the
+# filter and the nominal frequency
+# Droop controller used to calculate the virtual frequency drop due to load changes
+droop_param = DroopParams(DroopGain, 0.005, net.freq_nom)
+
+# Define the Q-droop parameters for the inverter of the reactive power VAR/Volt, delta_t (0.002) used for the
+# filter and the nominal voltage
+qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom)
+
+# Define a voltage forming inverter using the PIPI and droop parameters from above
+
+# Controller with observer
+# ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param,
+#                                   observer=[Lueneberger(*params) for params in
+#                                             repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample,
+#                                   name='master')
+
+# Controller without observer
+ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
+                                   ts_sim=delta_t,
+                                   ts_ctrl=undersample * delta_t,
+                                   name='master')
+
+#####################################
+# Definition of the optimization agent
+# The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example
+# Arguments described above
+# History is used to store results
+agent = SafeOptAgent(mutable_params,
+                     abort_reward,
+                     j_min,
+                     kernel,
+                     dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean,
+                          safe_threshold=safe_threshold, explore_threshold=explore_threshold),
+                     [ctrl],
+                     dict(master=[[f'lc.inductor{k}.i' for k in '123'],
+                                  [f'lc.capacitor{k}.v' for k in '123']
+                                  ]),
+                     history=FullHistory(),
+                     )
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+
+################DDPG Config Stuff#########################################################################
+gamma = 0.946218
+integrator_weight = 0.311135
+antiwindup_weight = 0.660818
+model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/'
+error_exponent = 0.5
+use_gamma_in_rew = 1
+n_trail = 50001
+actor_number_layers = 2
+critic_number_layers = 4
+alpha_relu_actor = 0.208098
+alpha_relu_critic = 0.00678497
+
+for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+    for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False):
+
+        rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                     use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent,
+                     i_lim=net['inverter1'].i_lim,
+                     i_nom=net['inverter1'].i_nom)
+
+        ####################################PI Stuff################################################
+        R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+        gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                          bounds=(lower_bound_load, upper_bound_load))
+
+        rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                     bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                                     bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+
+        cb = CallbackList()
+        # set initial = None to reset load random in range of bounds
+        cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+        cb.append(rand_load_train.reset)
+
+        plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder,
+                              show_plots=show_plots)
+
+
+        # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+        #                            load_curve=pd.read_pickle(
+        #                                'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl'))
+
+        def xylables_R(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            ax.grid(which='both')
+            # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+            ts = time.gmtime()
+            fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_i(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+            ax.grid(which='both')
+            # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_v(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+            ax.grid(which='both')
+            # ax.set_xlim([0, 0.005])
+            ts = time.gmtime()
+            # fig.savefig(
+            #    f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                       reward_fun=rew.rew_fun_PIPI_MRE,
+                       viz_cols=[
+                           PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']],
+                                    callback=plotter.xylables_v_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_v_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']],
+                                    callback=plotter.xylables_i_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                    callback=xylables_R,
+                                    color=[['b', 'r', 'g']],
+                                    style=[[None]]
+                                    ),
+                           # PlotTmpl([[f'master.I_hat{i}' for i in 'abc'], [f'r_load.resistor{i}.i' for i in '123'], ],
+                           #         callback=lambda fig: plotter.update_axes(fig, title='Simulation',
+                           #                                                  ylabel='$i_{\mathrm{o estimate,abc}}\,/\,\mathrm{A}$'),
+                           #         color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                           #         style=[['-*'], ['--*']]
+                           #         ),
+                           # PlotTmpl([[f'master.m{i}' for i in 'dq0']],
+                           #         callback=lambda fig: plotter.update_axes(fig, title='Simulation',
+                           #                                                  ylabel='$m_{\mathrm{dq0}}\,/\,\mathrm{}$',
+                           #                                                  filename='Sim_m_dq0')
+                           #         ),
+                           PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_i_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    )
+                       ],
+                       # log_level=logging.INFO,
+                       viz_mode='episode',
+                       max_episode_steps=max_episode_steps_list[max_eps_steps],
+                       model_params={'lc.resistor1.R': R_filter,
+                                     'lc.resistor2.R': R_filter,
+                                     'lc.resistor3.R': R_filter,
+                                     'lc.resistor4.R': 0.0000001,
+                                     'lc.resistor5.R': 0.0000001,
+                                     'lc.resistor6.R': 0.0000001,
+                                     'lc.inductor1.L': L_filter,
+                                     'lc.inductor2.L': L_filter,
+                                     'lc.inductor3.L': L_filter,
+                                     'lc.capacitor1.C': C_filter,
+                                     'lc.capacitor2.C': C_filter,
+                                     'lc.capacitor3.C': C_filter,
+                                     # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                                     # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                                     # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                                     'r_load.resistor1.R': rand_load_train.random_load_step,
+                                     'r_load.resistor2.R': rand_load_train.random_load_step,
+                                     'r_load.resistor3.R': rand_load_train.random_load_step,
+                                     'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                    high=v_nom) if t == -1 else None,
+                                     'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                    high=v_nom) if t == -1 else None,
+                                     'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                    high=v_nom) if t == -1 else None,
+                                     'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                   high=i_nom) if t == -1 else None,
+                                     'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                   high=i_nom) if t == -1 else None,
+                                     'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                   high=i_nom) if t == -1 else None,
+                                     },
+                       net=net,
+                       model_path='omg_grid/grid.paper_loadstep.fmu',
+                       history=FullHistory(),
+                       # on_episode_reset_callback=cb.fire,
+                       action_time_delay=1 * undersample
+                       )
+
+        return_sum_PI = 0.0
+        rew_list_PI = []
+        v_d_PI = []
+        v_q_PI = []
+        v_0_PI = []
+        R_load_PI = []
+        limit_exceeded_in_test_PI = False
+        limit_exceeded_penalty_PI = 0
+
+        ####################################DDPG Stuff##############################################
+        return_sum = 0.0
+
+        rew.gamma = 0
+        # episodes will not abort, if limit is exceeded reward = -1
+        rew.det_run = True
+        rew.exponent = 0.5  # 1
+        limit_exceeded_in_test = False
+        limit_exceeded_penalty = 0
+
+        env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1',
+                            reward_fun=rew.rew_fun_dq0,
+                            abort_reward=-1,  # no needed if in rew no None is given back
+                            # on_episode_reset_callback=cb.fire  # needed?
+                            obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                        'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                        'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'],
+                            max_episode_steps=max_episode_steps_list[max_eps_steps],
+
+                            model_params={'lc.resistor1.R': R_filter,
+                                          'lc.resistor2.R': R_filter,
+                                          'lc.resistor3.R': R_filter,
+                                          'lc.resistor4.R': 0.0000001,
+                                          'lc.resistor5.R': 0.0000001,
+                                          'lc.resistor6.R': 0.0000001,
+                                          'lc.inductor1.L': L_filter,
+                                          'lc.inductor2.L': L_filter,
+                                          'lc.inductor3.L': L_filter,
+                                          'lc.capacitor1.C': C_filter,
+                                          'lc.capacitor2.C': C_filter,
+                                          'lc.capacitor3.C': C_filter,
+                                          # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                                          # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                                          # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                                          'r_load.resistor1.R': rand_load_train.random_load_step,
+                                          'r_load.resistor2.R': rand_load_train.random_load_step,
+                                          'r_load.resistor3.R': rand_load_train.random_load_step,
+                                          'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                         high=v_nom) if t == -1 else None,
+                                          'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                         high=v_nom) if t == -1 else None,
+                                          'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                         high=v_nom) if t == -1 else None,
+                                          'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                        high=i_nom) if t == -1 else None,
+                                          'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                        high=i_nom) if t == -1 else None,
+                                          'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                        high=i_nom) if t == -1 else None,
+                                          },
+                            on_episode_reset_callback=cb.fire
+                            )
+
+        env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                                  recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                  gamma=1, penalty_I_weight=0, penalty_P_weight=0)
+        # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+
+        env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+        # model = DDPG.load(model_path + f'model.zip')  # , env=env_test)
+        model = DDPG.load(model_path + f'model_retrained.zip')  # , env=env_test)
+
+        count = 0
+        for kk in range(actor_number_layers + 1):
+
+            if kk < actor_number_layers:
+                model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+                model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+            count = count + 2
+
+        count = 0
+
+        for kk in range(critic_number_layers + 1):
+
+            if kk < critic_number_layers:
+                model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+                model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+            count = count + 2
+
+        env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+        rew_list = []
+        v_d = []
+        v_q = []
+        v_0 = []
+        R_load = []
+
+        ####### Run Test #########
+        # agent ~ PI Controllerv using env
+        # model ~ RL Controller using env_test
+        # Both run in the same loop
+
+        agent.reset()
+        agent.obs_varnames = env.history.cols
+        env.history.cols = env.history.structured_cols(None) + agent.measurement_cols
+        env.measure = agent.measure
+        agent_fig = None
+
+        obs = env_test.reset()
+        obs_PI = env.reset()
+
+        for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False):
+            # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+            agent.observe(None, False)
+            act_PI = agent.act(obs_PI)
+            obs_PI, r_PI, done_PI, info_PI = env.step(act_PI)
+            rew_list_PI.append(r_PI)
+            env.render()
+            return_sum_PI += r_PI
+            if r_PI == -1 and not limit_exceeded_in_test_PI:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test_PI = True
+                limit_exceeded_penalty_PI = -1
+
+            action, _states = model.predict(obs, deterministic=True)
+            obs, rewards, done, info = env_test.step(action)
+
+            if rewards == -1 and not limit_exceeded_in_test:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test = True
+                limit_exceeded_penalty = -1
+            env_test.render()
+            return_sum += rewards
+            rew_list.append(rewards)
+
+            """
+            v_a = env_test.history.df['lc.capacitor1.v'].iloc[-1]
+            v_b = env_test.history.df['lc.capacitor2.v'].iloc[-1]
+            v_c = env_test.history.df['lc.capacitor3.v'].iloc[-1]
+            R_load.append(env_test.history.df['r_load.resistor1.R'].iloc[-1])
+
+            v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), env_test.env.net.components[0].phase)
+
+            v_d.append(v_dq0[0])
+            v_q.append(v_dq0[1])
+            v_0.append(v_dq0[2])
+
+            v_a_PI = env.history.df['lc.capacitor1.v'].iloc[-1]
+            v_b_PI = env.history.df['lc.capacitor2.v'].iloc[-1]
+            v_c_PI = env.history.df['lc.capacitor3.v'].iloc[-1]
+            R_load_PI.append(env.history.df['r_load.resistor1.R'].iloc[-1])
+
+            v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), env.net.components[0].phase)
+
+            v_d_PI.append(v_dq0_PI[0])
+            v_q_PI.append(v_dq0_PI[1])
+            v_0_PI.append(v_dq0_PI[2])
+            """
+
+            if step % 10000 == 0 and step != 0:
+                print("10%")
+            """
+                env_test.close()
+                obs = env_test.reset()
+
+                env.close()
+                agent.reset()
+                obs_PI = env.reset()
+            """
+
+            # print(rewards)
+            if done:
+                env_test.close()
+
+                # print(limit_exceeded_in_test)
+                break
+        v_a = env_test.history.df['lc.capacitor1.v']
+        v_b = env_test.history.df['lc.capacitor2.v']
+        v_c = env_test.history.df['lc.capacitor3.v']
+        i_a = env_test.history.df['lc.inductor1.i']
+        i_b = env_test.history.df['lc.inductor2.i']
+        i_c = env_test.history.df['lc.inductor3.i']
+        R_load = (env_test.history.df['r_load.resistor1.R'].tolist())
+        phase = env_test.history.df['inverter1.phase.0']  # env_test.env.net.components[0].phase
+        v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+        i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+
+        i_d = i_dq0[0].tolist()
+        i_q = i_dq0[1].tolist()
+        i_0 = i_dq0[2].tolist()
+        v_d = (v_dq0[0].tolist())
+        v_q = (v_dq0[1].tolist())
+        v_0 = (v_dq0[2].tolist())
+
+        v_a_PI = env.history.df['lc.capacitor1.v']
+        v_b_PI = env.history.df['lc.capacitor2.v']
+        v_c_PI = env.history.df['lc.capacitor3.v']
+        i_a_PI = env.history.df['lc.inductor1.i']
+        i_b_PI = env.history.df['lc.inductor2.i']
+        i_c_PI = env.history.df['lc.inductor3.i']
+        R_load_PI = (env.history.df['r_load.resistor1.R'].tolist())
+        phase_PI = env.history.df['inverter1.phase.0']  # env.net.components[0].phase
+
+        i_dq0_PI = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+        v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+
+        i_d_PI = i_dq0_PI[0].tolist()
+        i_q_PI = i_dq0_PI[1].tolist()
+        i_0_PI = i_dq0_PI[2].tolist()
+        v_d_PI = (v_dq0_PI[0].tolist())
+        v_q_PI = (v_dq0_PI[1].tolist())
+        v_0_PI = (v_dq0_PI[2].tolist())
+
+        env_test.close()
+        _, env_fig = env.close()
+        agent.observe(r_PI, done_PI)
+
+        plt.plot(v_d, 'b')
+        plt.plot(v_q, 'r')
+        plt.plot(v_0, 'g')
+        plt.xlabel("")
+        plt.grid()
+        plt.ylabel("v_dq0")
+        plt.title('DDPG')
+        plt.show()
+
+        plt.plot(R_load, 'g')
+        plt.xlabel("")
+        plt.grid()
+        plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+        plt.title('Test')
+        plt.show()
+
+        plt.plot(v_d_PI, 'b')
+        plt.plot(v_q_PI, 'r')
+        plt.plot(v_0_PI, 'g')
+        plt.xlabel("")
+        plt.grid()
+        plt.ylabel("v_dq0")
+        plt.title('PI')
+        plt.show()
+
+        plt.plot(R_load_PI, 'g')
+        plt.xlabel("")
+        plt.grid()
+        plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+        plt.title('Test')
+        plt.show()
+
+        # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+        print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}')
+        print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}')
+
+        ts = time.gmtime()
+        compare_result = {"Name": "comparison_PI_DDPG",
+                          "time": ts,
+                          "PI_Kp_c": kp_c,
+                          "PI_Ki_c": ki_c,
+                          "PI_Kp_v": kp_v,
+                          "PI_Ki_v": ki_v,
+                          "DDPG_model_path": model_path,
+                          "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI),
+                          "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty),
+                          "v_d_PI": v_d_PI,
+                          "v_q_PI": v_q_PI,
+                          "v_0_PI": v_0_PI,
+                          "v_d_DDPG": v_d,
+                          "v_q_DDPG": v_q,
+                          "v_0_DDPG": v_0,
+                          "R_load": R_load,
+                          "R_load_PI": R_load_PI,
+                          "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                          "number of averages per run": num_average,
+                          "info": "best of new 4D unsafe optimization of 300 runs (picard) to figure out the boundaries "
+                                  "of the statespace without reset",
+                          "optimization node": 'Thinkpad',
+                          "info2": 'storing the current and modulation indices as well',
+                          "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2'
+                          }
+        node = platform.uname().node
+
+        # mongo_recorder = Recorder(database_name=folder_name)
+
+        # mongo_recorder.save_to_mongodb('Comparison1' + n_trail, compare_result)
+        mongo_recorder.save_to_mongodb('Comparison_4D_optimizedPIPI_retrainDDPG',
+                                       compare_result)
+        # mongo_recorder.save_to_mongodb('Comparison_2D_optimizedPIPI', compare_result)
+
+        ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty))
+        ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+        # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+        # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list)
+        # temp_dict = dict(zipped)
+    temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list}
+    result_list.append(temp_dict)
+    # ret_dict.append(zipped)
+    # df = df.append(ret_dict)
+
+    mean_list.append(np.mean(ret_array))
+    std_list.append(np.std(ret_array))
+
+# df = df.append(temp_list, True)
+print(mean_list)
+print(std_list)
+print(result_list)
+
+results = {
+    'Mean': mean_list,
+    'Std': std_list,
+    'All results': result_list,
+    'max_episode_steps_list': max_episode_steps_list
+}
+
+df = pd.DataFrame(results)
+# df.to_pickle("DDPG_study18_best_test_varianz.pkl")
+asd = 1
+"""
+m = np.array(df['Mean'])
+s = np.array(df['Std'])
+max_episode_steps_list = np.array(df['max_episode_steps_list'])
+
+plt.plot(max_episode_steps_list, m)
+plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('DDPG')
+plt.show()
+
+# plt.plot(max_episode_steps_list, m)
+# plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r')
+plt.errorbar(max_episode_steps_list, m, s, fmt='-o')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('DDPG')
+plt.show()
+
+plt.plot(max_episode_steps_list, s)
+plt.ylabel('std')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('DDPG')
+plt.show()
+"""
diff --git a/experiments/hp_tune/data/R_load_dessca.pkl b/experiments/hp_tune/data/R_load_dessca.pkl
new file mode 100644
index 00000000..6a804f58
Binary files /dev/null and b/experiments/hp_tune/data/R_load_dessca.pkl differ
diff --git a/experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl b/experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl
new file mode 100644
index 00000000..3512f7c0
Binary files /dev/null and b/experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl differ
diff --git a/experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl b/experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl
new file mode 100644
index 00000000..4a16afee
Binary files /dev/null and b/experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl differ
diff --git a/experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl b/experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl
new file mode 100644
index 00000000..73659a6f
Binary files /dev/null and b/experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl differ
diff --git a/experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl b/experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl
new file mode 100644
index 00000000..e6bfbc06
Binary files /dev/null and b/experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl differ
diff --git a/experiments/hp_tune/data/R_load_test_case_1_second b/experiments/hp_tune/data/R_load_test_case_1_second
new file mode 100644
index 00000000..506c8389
Binary files /dev/null and b/experiments/hp_tune/data/R_load_test_case_1_second differ
diff --git a/experiments/hp_tune/data/R_load_test_case_2_seconds.pkl b/experiments/hp_tune/data/R_load_test_case_2_seconds.pkl
new file mode 100644
index 00000000..756a199d
Binary files /dev/null and b/experiments/hp_tune/data/R_load_test_case_2_seconds.pkl differ
diff --git a/experiments/hp_tune/env/__init__.py b/experiments/hp_tune/env/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/experiments/hp_tune/env/env_wrapper.py b/experiments/hp_tune/env/env_wrapper.py
new file mode 100644
index 00000000..b38bc5e8
--- /dev/null
+++ b/experiments/hp_tune/env/env_wrapper.py
@@ -0,0 +1,1059 @@
+import platform
+from functools import partial
+from typing import Union
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.type_aliases import GymStepReturn
+from stochastic.processes import VasicekProcess
+
+from experiments.hp_tune.env.random_load import RandomLoad
+from experiments.hp_tune.env.vctrl_single_inv import net
+from experiments.hp_tune.util.config import cfg
+from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc, abc_to_dq0, Fastqueue, RandProcess
+
+
+class BaseWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000,
+                 recorder=None, n_trail="", gamma=0,
+                 number_learing_steps=500000, number_past_vals=0):
+        """
+        Base Env Wrapper to add features to the env-observations and adds information to env.step output which can be
+        used in case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        # increase action-space for PI-seperation
+        # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1))
+
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_a = []
+        self.i_b = []
+        self.i_c = []
+        self.v_a = []
+        self.v_b = []
+        self.v_c = []
+        self._v_pahsor = 0.0
+        self.n_episode = 0
+        self.R_training = []
+        self.i_phasor_training = []
+        self.v_phasor_training = []
+        self.reward_episode_mean = []
+        self.n_trail = n_trail
+        self.phase = []
+        self.used_P = np.zeros(self.action_space.shape)
+        self.gamma = gamma
+        self.number_learing_steps = number_learing_steps
+        self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)]
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+
+        if cfg['is_dq0']:
+            # Action: dq0 -> abc
+            action_abc = dq0_to_abc(action, self.env.net.components[0].phase)
+        else:
+            action_abc = action
+
+        obs, reward, done, info = super().step(action_abc)
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        super().render()
+
+        self._n_training_steps += 1
+
+        # if self._n_training_steps % round(self.training_episode_length / 10) == 0:
+        #    self.env.on_episode_reset_callback()
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+            super().close()
+
+        # add wanted features here (add appropriate self.observation in init!!)
+        # calculate magnitude of current phasor abc
+        # self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        # self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "R_load_training": self.R_training,
+                                "i_phasor_training": self.i_phasor_training,
+                                "i_a_training": self.i_a,
+                                "i_b_training": self.i_b,
+                                "i_c_training": self.i_c,
+                                "v_a_training": self.v_a,
+                                "v_b_training": self.v_b,
+                                "v_c_training": self.v_c,
+                                "v_phasor_training": self.v_phasor_training,
+                                "Rewards": self.rewards,
+                                "Phase": self.phase,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": cfg['STUDY_NAME'],
+                                "Reward function": 'rew.rew_fun_dq0',
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.R_training = []
+                self.i_phasor_training = []
+                self.v_phasor_training = []
+                self.i_a = []
+                self.i_b = []
+                self.i_c = []
+                self.v_a = []
+                self.v_b = []
+                self.v_c = []
+                self.phase = []
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Features
+        """
+        error = (obs[6:9] - obs[3:6]) / 2  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        # obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        # obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+
+        # todo efficiency?
+        self.used_P = np.copy(action)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        [x.clear() for x in self.delay_queues]
+        obs = super().reset()
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        self._n_training_steps = 0
+        self.used_P = np.zeros(self.action_space.shape)
+
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+        """
+        Features
+        """
+        error = (obs[6:9] - obs[3:6]) / 2  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        # obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        # obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+
+        # obs = np.append(obs, delta_i_lim_i_phasor)
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs
+
+    def cal_phasor_magnitude(self, abc: np.array) -> float:
+        """
+        Calculated the magnitude of a phasor in a three phase system. M
+
+        :param abc: Due to limit normed currents or voltages in abc frame
+        :return: magnitude of the current or voltage phasor
+        """
+        # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)|
+        i_alpha_beta = abc_to_alpha_beta(abc)
+        i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2)
+
+        return i_phasor_mag
+
+    def shift_and_append(self, obs):
+        """
+        Takes the observation and shifts throught the queue
+        every queue output is added to total obs
+        """
+        obs_delay_array = np.array([])
+        obs_temp = obs
+        for queue in self.delay_queues:
+            obs_temp = queue.shift(obs_temp)
+            obs_delay_array = np.append(obs_delay_array, obs_temp)
+
+        return obs_delay_array
+
+
+class FeatureWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0,
+                 penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0,
+                 number_learing_steps=500000):  # , use_past_vals=False, number_past_vals=0):
+        """
+        Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in
+        case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        # increase action-space for PI-seperation
+        # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1))
+
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_a = []
+        self.i_b = []
+        self.i_c = []
+        self.v_a = []
+        self.v_b = []
+        self.v_c = []
+        self._v_pahsor = 0.0
+        self.n_episode = 0
+        self.R_training = []
+        self.i_phasor_training = []
+        self.v_phasor_training = []
+        self.reward_episode_mean = []
+        self.n_trail = n_trail
+        self.phase = []
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.integrator_weight = integrator_weight
+        self.antiwindup_weight = antiwindup_weight
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+        self.gamma = gamma
+        self.penalty_I_weight = penalty_I_weight
+        self.penalty_P_weight = penalty_P_weight
+        self.t_start_penalty_I = t_start_penalty_I
+        self.t_start_penalty_P = t_start_penalty_P
+        self.number_learing_steps = number_learing_steps
+        self.integrator_sum_list0 = []
+        self.integrator_sum_list1 = []
+        self.integrator_sum_list2 = []
+        self.action_P0 = []
+        self.action_P1 = []
+        self.action_P2 = []
+        self.action_I0 = []
+        self.action_I1 = []
+        self.action_I2 = []
+
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+        action_P = action[0:3]
+        action_I = action[3:6]
+
+        self.integrator_sum += action_I * self.integrator_weight
+
+        action_PI = action_P + self.integrator_sum
+
+        if cfg['is_dq0']:
+            # Action: dq0 -> abc
+            action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase)
+
+
+
+        # check if m_abc will be clipped
+        if np.any(abs(action_abc) > 1):
+
+            clipped_action = np.clip(action_abc, -1, 1)
+
+            delta_action = clipped_action - action_abc
+            # if, reduce integrator by clipped delta
+            action_delta = abc_to_dq0(delta_action, self.env.net.components[0].phase)
+            self.integrator_sum += action_delta * self.antiwindup_weight
+
+            # clip_reward = np.clip(np.sum(np.abs(delta_action) * \
+            #                             (-1 / (self.env.net.components[0].v_lim / self.env.net.components[
+            #                                 0].v_DC))) / 3 * (1 - self.gamma),
+            #                      -1, 0)
+
+            clip_reward = 0
+            # toDo reset clip reward for P10 experiment
+
+            action_abc = clipped_action
+
+        else:
+            clip_reward = 0
+
+        obs, reward, done, info = super().step(action_abc)
+
+        reward = reward + clip_reward
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        super().render()
+
+        integrator_penalty = np.sum(-((np.abs(action_I)) ** 0.5)) * (1 - self.gamma) / 3
+        # action_P_penalty = - np.sum((np.abs(action_P - self.used_P)) ** 0.5) * (1 - self.gamma) / 3
+        action_P_penalty = np.sum(-((np.abs(action_P)) ** 0.5)) * (1 - self.gamma) / 3
+
+        # reward_weight is = 1
+
+        if self.total_steps > self.t_start_penalty_I:
+            penalty_I_weight_scale = 1 / (self.t_start_penalty_I - self.number_learing_steps) * self.total_steps - \
+                                     self.number_learing_steps / (self.t_start_penalty_I - self.number_learing_steps)
+
+        else:
+            penalty_I_weight_scale = 1
+
+        if self.total_steps > self.t_start_penalty_P:
+            penalty_P_weight_scale = 1 / (self.t_start_penalty_P - self.number_learing_steps) * self.total_steps - \
+                                     self.number_learing_steps / (self.t_start_penalty_P - self.number_learing_steps)
+
+        else:
+
+            penalty_P_weight_scale = 1
+
+        reward = (reward + (self.penalty_I_weight * penalty_I_weight_scale) * integrator_penalty
+                  + self.penalty_P_weight * penalty_P_weight_scale * action_P_penalty) \
+                 / (1 + self.penalty_I_weight * penalty_I_weight_scale + self.penalty_P_weight * penalty_P_weight_scale)
+
+        self._n_training_steps += 1
+
+        # if self._n_training_steps % round(self.training_episode_length / 10) == 0:
+        #    self.env.on_episode_reset_callback()
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+            super().close()
+
+
+        # add wanted features here (add appropriate self.observation in init!!)
+        # calculate magnitude of current phasor abc
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+            self.integrator_sum_list0.append(self.integrator_sum[0])
+            self.integrator_sum_list1.append(self.integrator_sum[1])
+            self.integrator_sum_list2.append(self.integrator_sum[2])
+            self.action_P0.append(np.float64(action_P[0]))
+            self.action_P1.append(np.float64(action_P[1]))
+            self.action_P2.append(np.float64(action_P[2]))
+            self.action_I0.append(np.float64(action_I[0]))
+            self.action_I1.append(np.float64(action_I[1]))
+            self.action_I2.append(np.float64(action_I[2]))
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Features
+        """
+        error = (obs[6:9] - obs[3:6]) / 2  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        # obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        # obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, self.used_action)
+
+        # todo efficiency?
+        self.used_P = np.copy(action_P)
+        self.used_I = np.copy(self.integrator_sum)
+        # self.used_P = action_P
+        # self.used_I = self.integrator_sum
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "R_load_training": self.R_training,
+                                "i_phasor_training": self.i_phasor_training,
+                                "i_a_training": self.i_a,
+                                "i_b_training": self.i_b,
+                                "i_c_training": self.i_c,
+                                "v_a_training": self.v_a,
+                                "v_b_training": self.v_b,
+                                "v_c_training": self.v_c,
+                                "v_phasor_training": self.v_phasor_training,
+                                "Rewards": self.rewards,
+                                "Phase": self.phase,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": cfg['STUDY_NAME'],
+                                "Reward function": 'rew.rew_fun_dq0',
+                                'Integrator0': self.integrator_sum_list0,
+                                'Integrator1': self.integrator_sum_list1,
+                                'Integrator2': self.integrator_sum_list2,
+                                'actionP0': self.action_P0,
+                                'actionP1': self.action_P1,
+                                'actionP2': self.action_P2,
+                                'actionI0': self.action_I0,
+                                'actionI1': self.action_I1,
+                                'actionI2': self.action_I2
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.R_training = []
+                self.i_phasor_training = []
+                self.v_phasor_training = []
+                self.i_a = []
+                self.i_b = []
+                self.i_c = []
+                self.v_a = []
+                self.v_b = []
+                self.v_c = []
+                self.phase = []
+
+            # if self._n_training_steps > 500:
+            super().close()
+            # plt.plot(self.integrator_sum_list0)
+            # plt.plot(self.integrator_sum_list1)
+            # plt.plot(self.integrator_sum_list2)
+            # plt.ylabel('intergratorzustand')
+            # plt.show()
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+
+        obs = super().reset()
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        self._n_training_steps = 0
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+        """
+        Features
+        """
+        error = (obs[6:9] - obs[3:6]) / 2  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        # obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        # obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+
+        # obs = np.append(obs, delta_i_lim_i_phasor)
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, self.used_action)
+
+        return obs
+
+    def cal_phasor_magnitude(self, abc: np.array) -> float:
+        """
+        Calculated the magnitude of a phasor in a three phase system. M
+
+        :param abc: Due to limit normed currents or voltages in abc frame
+        :return: magnitude of the current or voltage phasor
+        """
+        # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)|
+        i_alpha_beta = abc_to_alpha_beta(abc)
+        i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2)
+
+        return i_phasor_mag
+
+
+class FeatureWrapper_pastVals(FeatureWrapper):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 500000,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0,
+                 penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0,
+                 number_learing_steps=500000, number_past_vals=10):
+        """
+        Env Wrapper which adds the number_past_vals voltage ([3:6]!!!) observations to the observations.
+        Initialized with zeros!
+        """
+        super().__init__(env, number_of_features, training_episode_length,
+                         recorder, n_trail, integrator_weight, antiwindup_weight, gamma,
+                         penalty_I_weight, penalty_P_weight, t_start_penalty_I, t_start_penalty_P,
+                         number_learing_steps)
+
+        # self.observation_space = gym.spaces.Box(
+        #    low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+        #    high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)]
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        obs, reward, done, info = super().step(action)
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        [x.clear() for x in self.delay_queues]
+        obs = super().reset()
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+
+        return obs
+
+    def shift_and_append(self, obs):
+        """
+        Takes the observation and shifts throught the queue
+        every queue output is added to total obs
+        """
+        obs_delay_array = np.array([])
+        obs_temp = obs
+        for queue in self.delay_queues:
+            obs_temp = queue.shift(obs_temp)
+            obs_delay_array = np.append(obs_delay_array, obs_temp)
+
+        return obs_delay_array
+
+
+class FeatureWrapper_futureVals(FeatureWrapper):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0,
+                 penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0,
+                 number_learing_steps=500000, number_future_vals=0, future_data=''):
+        """
+                Env Wrapper which adds the number_future_vals R-values to the observations.
+                Initialized with zeros!
+                Therfore it uses the in the init defined pkl
+                """
+        super().__init__(env, number_of_features + number_future_vals, training_episode_length,
+                         recorder, n_trail, integrator_weight, antiwindup_weight, gamma,
+                         penalty_I_weight, penalty_P_weight, t_start_penalty_I, t_start_penalty_P,
+                         number_learing_steps)
+
+        # not needed... toDo Chage in Randload init?
+        gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=50), initial=50,
+                          bounds=(14, 200))
+        self.load_curve = RandomLoad(2881, net.ts, gen,
+                                     load_curve=pd.read_pickle(
+                                         # 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl'))
+                                         future_data))
+
+        self.future_vals = []
+        self.number_future_vals = number_future_vals
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        obs, reward, done, info = super().step(action)
+
+        self.future_vals = [2 * (self.load_curve.give_dataframe_value(self.env.sim_time_interval[0] +
+                                                                      i * self.env.time_step_size,
+                                                                      col='r_load.resistor' + Rx + '.R') - 14) / (
+                                        200 - 14) - 1
+                            # NORMALIZATION!
+                            for i in range(self.number_future_vals) for Rx in ['1']]  # , '2', '3']]
+        # toDo: if Load is not balanced, different values have to be sampled! (till now only 1 value per future step is sufficent
+
+        obs = np.append(obs, self.future_vals)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        obs = super().reset()
+        self.future_vals = [2 * (self.load_curve.give_dataframe_value(self.env.sim_time_interval[0] +
+                                                                      i * self.env.time_step_size,
+                                                                      col='r_load.resistor' + Rx + '.R') - 14) / (
+                                        200 - 14) - 1
+                            # NORMALIZATION!
+                            for i in range(self.number_future_vals) for Rx in ['1']]  # , '2', '3']]
+        # toDo: if Load is not balanced, different values have to be sampled! (till now only 1 value per future step is sufficent
+
+        obs = np.append(obs, self.future_vals)
+        return obs
+
+
+class FeatureWrapper_I_controller(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0,
+                 penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0,
+                 number_learing_steps=500000, Ki=12, number_past_vals=0):
+        """
+        Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in
+        case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        # increase action-space for PI-seperation
+        # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1))
+
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_a = []
+        self.i_b = []
+        self.i_c = []
+        self.v_a = []
+        self.v_b = []
+        self.v_c = []
+        self._v_pahsor = 0.0
+        self.n_episode = 0
+        self.R_training = []
+        self.i_phasor_training = []
+        self.v_phasor_training = []
+        self.reward_episode_mean = []
+        self.n_trail = n_trail
+        self.phase = []
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.integrator_weight = integrator_weight
+        self.antiwindup_weight = antiwindup_weight
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+        self.gamma = gamma
+        self.penalty_I_weight = penalty_I_weight
+        self.penalty_P_weight = penalty_P_weight
+        self.t_start_penalty_I = t_start_penalty_I
+        self.t_start_penalty_P = t_start_penalty_P
+        self.number_learing_steps = number_learing_steps
+        self.Ki = Ki
+        self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)]
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+
+        action_PI = action + self.integrator_sum
+
+        if cfg['is_dq0']:
+            # Action: dq0 -> abc
+            action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase)
+
+        # check if m_abc will be clipped
+        if np.any(abs(action_abc) > 1):
+            # if, reduce integrator by clipped delta
+            action_delta = abc_to_dq0(np.clip(action_abc, -1, 1) - action_abc, self.env.net.components[0].phase)
+            # self.integrator_sum += action_delta * self.antiwindup_weight
+            self.integrator_sum += action_delta * self.env.time_step_size
+
+        obs, reward, done, info = super().step(action_abc)
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        super().render()
+
+        self._n_training_steps += 1
+
+        # if self._n_training_steps % round(self.training_episode_length / 10) == 0:
+        #    self.env.on_episode_reset_callback()
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+            super().close()
+
+        # add wanted features here (add appropriate self.observation in init!!)
+        # calculate magnitude of current phasor abc
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "R_load_training": self.R_training,
+                                "i_phasor_training": self.i_phasor_training,
+                                "i_a_training": self.i_a,
+                                "i_b_training": self.i_b,
+                                "i_c_training": self.i_c,
+                                "v_a_training": self.v_a,
+                                "v_b_training": self.v_b,
+                                "v_c_training": self.v_c,
+                                "v_phasor_training": self.v_phasor_training,
+                                "Rewards": self.rewards,
+                                "Phase": self.phase,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": cfg['STUDY_NAME'],
+                                "Reward function": 'rew.rew_fun_dq0',
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.R_training = []
+                self.i_phasor_training = []
+                self.v_phasor_training = []
+                self.i_a = []
+                self.i_b = []
+                self.i_c = []
+                self.v_a = []
+                self.v_b = []
+                self.v_c = []
+                self.phase = []
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Features
+        """
+        error = obs[6:9] - obs[3:6]  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        # self.integrator_sum += error * self.integrator_weight * self.Ki
+        self.integrator_sum += error * self.env.time_step_size * self.Ki
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        # obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        # obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+
+        """
+        Add pastvals
+        """
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+        obs = np.append(obs, self.integrator_sum)
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, self.used_action)
+
+        # todo efficiency?
+        self.used_P = np.copy(action)
+        self.used_I = np.copy(self.integrator_sum)
+        # self.used_P = action_P
+        # self.used_I = self.integrator_sum
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+
+        [x.clear() for x in self.delay_queues]
+        obs = super().reset()
+
+        if len(obs) > 9:
+            # ASSUME  THAT LOADCURRENT is included!
+            obs[9:12] = obs[9:12] / net['inverter1'].i_lim
+
+        self._n_training_steps = 0
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+        """
+        Features
+        """
+        error = obs[6:9] - obs[3:6]  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+        # self.integrator_sum += error * self.integrator_weight * self.Ki
+        self.integrator_sum += error * self.env.time_step_size * self.Ki
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        # obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        # obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+
+        """
+        Add pastvals and integrator sum
+        """
+        obs_delay_array = self.shift_and_append(obs[3:6])
+        obs = np.append(obs, obs_delay_array)
+        obs = np.append(obs, self.integrator_sum)
+
+        # obs = np.append(obs, delta_i_lim_i_phasor)
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, self.used_action)
+
+        return obs
+
+    def cal_phasor_magnitude(self, abc: np.array) -> float:
+        """
+        Calculated the magnitude of a phasor in a three phase system. M
+
+        :param abc: Due to limit normed currents or voltages in abc frame
+        :return: magnitude of the current or voltage phasor
+        """
+        # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)|
+        i_alpha_beta = abc_to_alpha_beta(abc)
+        i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2)
+
+        return i_phasor_mag
+
+    def shift_and_append(self, obs):
+        """
+        Takes the observation and shifts throught the queue
+        every queue output is added to total obs
+        """
+        obs_delay_array = np.array([])
+        obs_temp = obs
+        for queue in self.delay_queues:
+            obs_temp = queue.shift(obs_temp)
+            obs_delay_array = np.append(obs_delay_array, obs_temp)
+
+        return obs_delay_array
diff --git a/experiments/hp_tune/env/random_load.py b/experiments/hp_tune/env/random_load.py
new file mode 100644
index 00000000..6b13f43b
--- /dev/null
+++ b/experiments/hp_tune/env/random_load.py
@@ -0,0 +1,151 @@
+import numpy as np
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import RandProcess
+
+
+class RandomLoad:
+    def __init__(self, train_episode_length: int, ts: float, rand_process: RandProcess, loadstep_time: int = None,
+                 load_curve: pd.DataFrame = None, bounds=None, bounds_std=None):
+        """
+
+        :param max_episode_steps: number of steps per training episode (can differ from env.max_episode_steps)
+        :param ts: sampletime of env
+        :param rand_pocess: Instance of random process defines noise added to load
+        :param loadstep_time: number of env step where load step should happen
+        :param load_curve: Stored load data to sample from instead of smaple from distribution
+        :param bounds: Bounds to clip the sampled load data
+        :param bounds_std: Chosen bounds are sampled from a distribution with std=bounds_std and mean=bounds
+
+        """
+        self.train_episode_length = train_episode_length
+        self.ts = ts
+        self.rand_process = rand_process
+        if loadstep_time is None:
+            self.loadstep_time = np.random.randint(0, self.train_episode_length)
+        else:
+            self.loadstep_time = loadstep_time
+        self.load_curve = load_curve
+        if bounds is None:
+            self.bounds = (-np.inf, np.inf)
+        else:
+            self.bounds = bounds
+        if bounds_std is None:
+            self.bounds_std = (0, 0)
+        else:
+            self.bounds_std = bounds_std
+
+        self.lowerbound_std = 0
+        self.upperbound_std = 0
+
+    def reset(self, loadstep_time=None):
+        if loadstep_time is None:
+            self.loadstep_time = np.random.randint(0, self.train_episode_length)
+        else:
+            self.loadstep_time = loadstep_time
+
+    def load_step(self, t, gain):
+        """
+        Changes the load parameters
+        :param t:
+        :param gain: device parameter
+        :return: Sample from SP
+        """
+        # Defines a load step after 0.01 s
+        if self.loadstep_time * self.ts < t <= self.loadstep_time * self.ts + self.ts:
+            self.rand_process.proc.mean = gain * 0.55
+            self.rand_process.reserve = gain * 0.55
+        elif t <= self.ts:
+            self.rand_process.proc.mean = gain
+
+        return self.rand_process.sample(t)
+
+    def clipped_step(self, t):
+        return np.clip(self.rand_process.sample(t),
+                       self.bounds[0] + self.lowerbound_std,
+                       self.bounds[1] + self.upperbound_std
+                       )
+
+    def one_random_loadstep_per_episode(self, t):
+        if self.loadstep_time * self.ts < t <= self.loadstep_time * self.ts + self.ts:
+            # do with 100 percent propability
+            self.do_change(1002, 102)
+        # else:
+        # with 2 permill change drift
+        #    self.do_change(2, 0)
+
+        return np.clip(self.rand_process.sample(t),
+                       self.bounds[0] + self.lowerbound_std,
+                       self.bounds[1] + self.upperbound_std
+                       )
+
+    def give_dataframe_value(self, t, col):
+        """
+        Gives load values from a stored dataframe (self.load_curve)
+        :parma t: time - represents here the row of the dataframe
+        :param col: colon name of the dataframe (typically str)
+        """
+        if t < 0:
+            # return None
+            return self.load_curve[col][0]
+        if self.load_curve is None:
+            raise ValueError('No dataframe given! Please feed load class (.load_curve) with data')
+        return self.load_curve[col][int(t / self.ts)]
+
+    def random_load_step(self, t, event_prob: int = 2, step_prob: int = 50):
+        """
+        Changes the load parameters applying a loadstep with 0.2% probability which is a pure step with 50 %
+        probability otherwise a drift. In every event the random process variance is drawn randomly [1, 150].
+        :param t: time
+        :param event_prob: probability (in pre mill) that the step event is triggered in the current step
+        :param step_prob: probability (in pre cent) that event is a abrupt step (drift otherwise!, random process speed
+                          not adjustable yet
+        :return: Sample from SP
+        """
+        # Changes rand process data with probability of 5% and sets new value randomly
+        if np.random.randint(0, 1001) < 2:
+
+            gain = np.random.randint(self.rand_process.bounds[0], self.rand_process.bounds[1])
+
+            self.rand_process.proc.mean = gain
+            self.rand_process.proc.vol = np.random.randint(1, 150)
+            self.rand_process.proc.speed = np.random.randint(10, 1200)
+            # define sdt for clipping once every event
+            # np.maximum to not allow negative values
+            self.lowerbound_std = np.maximum(np.random.normal(scale=self.bounds_std[0]), 0.0001)
+            self.upperbound_std = np.random.normal(scale=self.bounds_std[1])
+
+            # With 50% probability do a step or a drift
+            if np.random.randint(0, 101) < 50:
+                # step
+                self.rand_process.reserve = gain
+
+            else:
+                # drift -> Lower speed to allow
+                self.rand_process.proc.speed = np.random.randint(10, 100)
+
+        return np.clip(self.rand_process.sample(t),
+                       self.bounds[0] + self.lowerbound_std,
+                       self.bounds[1] + self.upperbound_std
+                       )
+
+    def do_change(self, event_prob_permill=2, step_prob_percent=50):
+        if np.random.randint(0, 1001) < event_prob_permill:
+
+            gain = np.random.randint(self.rand_process.bounds[0], self.rand_process.bounds[1])
+
+            self.rand_process.proc.mean = gain
+            self.rand_process.proc.vol = np.random.randint(1, 150)
+            self.rand_process.proc.speed = np.random.randint(10, 1200)
+            # define sdt for clipping once every event
+            self.lowerbound_std = np.random.normal(scale=self.bounds_std[0])
+            self.upperbound_std = np.random.normal(scale=self.bounds_std[1])
+
+            # With 50% probability do a step or a drift
+            if np.random.randint(0, 101) < step_prob_percent:
+                # step
+                self.rand_process.reserve = gain
+
+            else:
+                # drift -> Lower speed to allow
+                self.rand_process.proc.speed = np.random.randint(10, 100)
diff --git a/experiments/hp_tune/env/rewards.py b/experiments/hp_tune/env/rewards.py
new file mode 100644
index 00000000..2e6bb435
--- /dev/null
+++ b/experiments/hp_tune/env/rewards.py
@@ -0,0 +1,552 @@
+import numpy as np
+from openmodelica_microgrid_gym.util import nested_map, abc_to_dq0, dq0_to_abc
+from typing import List
+
+
+class Reward:
+    def __init__(self, nom, lim, v_DC, gamma, det_run=False, nom_region: float = 1.1, use_gamma_normalization=1,
+                 error_exponent: float = 1.0, i_lim: float = np.inf, i_nom: float = np.inf, i_exponent: float = 1.0):
+        """
+
+        :param nom: Nominal value for the voltage
+        :param lim: Limit value for the voltage
+        :param v_DC: DC-Link voltage
+        :param gamma: Discount factor to map critic values -> [-1, 1]
+        :param use_gamma_normalization: if 0 normalization depending on gamma is not used
+        :param nom_region: Defines cliff in the reward landscape where the reward is pulled down because the nominal
+                           value is exceeded. nom_region defines how much the nominal value can be exceeded before
+                           the cliff (e.g. 1.1 -> cliff @ 1.1*self.nom
+        :param error_exponent: defines the used error-function: E.g.: 1 -> Mean absolute error
+                                                                2 -> Mean squared error
+                                                              0.5 -> Mean root error
+        :param i_lim: Limit value for the current
+        :param i_nom: Nominal value for the current
+        """
+        self._idx = None
+        self.nom = nom
+        self.lim = lim
+        self.v_DC = v_DC
+        self.use_gamma_normalization = use_gamma_normalization
+        if self.use_gamma_normalization == 1:
+            self.gamma = gamma
+        else:
+            self.gamma = 0
+        self.det_run = det_run
+        self.nom_region = nom_region
+        self.exponent = error_exponent
+        self.i_lim = i_lim
+        self.i_nom = i_nom
+        self.i_exponent = i_exponent
+
+    def set_idx(self, obs):
+        if self._idx is None:
+            self._idx = nested_map(
+                lambda n: obs.index(n),
+                [[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'],
+                 [f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012'],
+                 'inverter1.phase.0'])
+
+    def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        Defines the reward function for the environment. Uses the observations and set-points to evaluate the quality of
+        the used parameters.
+        Takes current and voltage measurements and set-points to calculate the mean-root control error and uses a
+        logarithmic barrier function in case of violating the current limit. Barrier function is adjustable using
+        parameter mu.
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        iabc_master = data[idx[0]]  # 3 phase currents at LC inductors
+        vabc_master = data[idx[2]]  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        isp_abc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+        vsp_abc_master = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        SP = vsp_abc_master * self.lim
+        mess = vabc_master * self.lim
+
+        if all(np.abs(mess) <= self.nom * 1.1):
+            # if all(np.abs(mess) <= self.lim*self.nom_region):
+            """
+            1st area - inside wanted (nom) operation range
+            -v_nom -> + v_nom
+                rew = 1; if mess = SP
+                rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area)
+            """
+            # devided by 3 because of sums up all 3 phases
+            rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * 2 * 1 / 3 +
+                         1 / 3) / 3
+
+
+        elif any(np.abs(mess) > self.lim):
+            """
+            3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS
+            +-v_lim -> +-v_DC
+
+            V1:
+            @ SP = +v_nom AND mess = -v_DC:
+                rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured
+            @ SP = -v_nom AND mess = -v_lim
+                rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)]
+                rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC))
+                The latter fraction is quite small but leads to depending on the system less then 2/3 is
+                substracted and we have a gap to the 2nd area! :) 
+                
+            V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?)
+            
+            V3: rew = -1
+            """
+
+            # V1:
+            # rew = np.sum(
+            #    (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3
+
+            # V2:
+            # if return -> rew = None and in env abort_reward is given to agent
+            if self.det_run:
+                return -(1 - self.gamma)
+            else:
+                return
+
+            # V3:
+            # rew = (1 - gamma)
+
+
+        else:
+            """
+            2nd area
+            +-v_nom -> +- v_lim
+
+            @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?)
+                rew = 1/3
+            @ SP = v_nom AND mess = -v_lim
+                rew = -1/3
+
+            """
+            rew = np.sum(
+                (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * 1 / 3 - 1 / 3) / 3
+
+        return rew * (1 - self.gamma)
+        # return -np.clip(error.squeeze(), 0, 1e5)
+
+    def rew_fun_include_current(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        uses the same reward for voltage like defined above but also includes reward depending on the current
+        If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage
+        Before r_voltage is scaled to the region [0,1]:
+         - r_voltage = (r_voltage+1)/2
+         - r = r_voltage * r_current
+         - r = r-1
+
+         If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        iabc_master = data[idx[0]]  # 3 phase currents at LC inductors
+        vabc_master = data[idx[2]]  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        isp_abc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+        vsp_abc_master = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        i_mess = iabc_master * self.i_lim
+
+        SP = vsp_abc_master * self.lim
+        mess = vabc_master * self.lim
+
+        if any(np.abs(mess) > self.lim) or any(np.abs(i_mess) > self.i_lim):
+            """
+            3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS
+            +-v_lim -> +-v_DC
+            Valid for v_lim OR i_lim exceeded
+
+            V1:
+            @ SP = +v_nom AND mess = -v_DC:
+                rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured
+            @ SP = -v_nom AND mess = -v_lim
+                rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)]
+                rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC))
+                The latter fraction is quite small but leads to depending on the system less then 2/3 is
+                substracted and we have a gap to the 2nd area! :) 
+
+            V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?)
+
+            V3: rew = -1
+            """
+
+            # V1:
+            # rew = np.sum(
+            #    (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3
+
+            # V2:
+            # if return -> rew = None and in env abort_reward is given to agent
+            if self.det_run:
+                return -(1 - self.gamma)
+            else:
+                return
+
+            # V3:
+            # rew = (1 - gamma)
+
+        elif all(np.abs(mess) <= self.nom * 1.1):
+            # if all(np.abs(mess) <= self.lim*self.nom_region):
+            """
+            1st area - inside wanted (nom) operation range
+            -v_nom -> + v_nom
+                rew = 1; if mess = SP
+                rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area)
+            """
+            # devided by 3 because of sums up all 3 phases
+            # rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * 2 * (1 - self.gamma) / 3 + (
+            #        1 - self.gamma) / 3) / 3
+
+            rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * (1 - self.gamma)) / 3
+
+
+
+
+        else:
+            """
+            2nd area
+            +-v_nom -> +- v_lim
+
+            @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?)
+                rew = 1/3
+            @ SP = v_nom AND mess = -v_lim
+                rew = -1/3
+
+            """
+            # rew = np.sum(
+            #    (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3
+            rew = (1 - np.max(np.abs(SP - mess)) / (self.nom + self.lim)) * (1 - self.gamma) / 2 - (1 - self.gamma) / 2
+
+        if any(abs(i_mess) > ((self.i_nom + self.i_lim) / 2)):
+            rew = (rew + 1) / 2  # map rew_voltage -> [0,1]
+
+            # Scale rew_voltage with rew_current
+            # rew = rew * np.sum((((self.i_nom - i_mess) / (self.i_lim - self.i_nom))+1) ** self.i_exponent) / 3
+            rew = rew * (((self.i_nom - max(abs(i_mess))) / (self.i_lim - self.i_nom)) + 1) ** self.i_exponent
+
+            rew = rew * 2 - 1  # map rew -> [-1, 1]
+
+        if rew < -1:
+            asd = 1
+        return rew  # * (1-0.9)
+        # return -np.clip(error.squeeze(), 0, 1e5)
+
+    def rew_fun_include_current_dq0(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        uses the same reward for voltage like defined above but also includes reward depending on the current
+        If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage
+        Before r_voltage is scaled to the region [0,1]:
+         - r_voltage = (r_voltage+1)/2
+         - r = r_voltage * r_current
+         - r = r-1
+
+         If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        phase = data[idx[4]]
+
+        idq0_master = abc_to_dq0(data[idx[0]], phase)  # 3 phase currents at LC inductors
+        vdq0_master = abc_to_dq0(data[idx[2]], phase)  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        # isp_abc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+        vsp_dq0_master = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        vsp_abc = dq0_to_abc(data[idx[3]], phase)
+
+        i_mess = idq0_master * self.i_lim
+        i_mess_abc = data[idx[0]] * self.i_lim
+
+        SP = vsp_abc * self.lim
+        mess = data[idx[2]] * self.lim
+
+        mess_abc = data[idx[2]] * self.lim
+
+        if any(np.abs(mess_abc) > self.lim) or any(np.abs(i_mess_abc) > self.i_lim):
+            """
+            3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS
+            +-v_lim -> +-v_DC
+            Valid for v_lim OR i_lim exceeded
+
+            V1:
+            @ SP = +v_nom AND mess = -v_DC:
+                rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured
+            @ SP = -v_nom AND mess = -v_lim
+                rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)]
+                rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC))
+                The latter fraction is quite small but leads to depending on the system less then 2/3 is
+                substracted and we have a gap to the 2nd area! :) 
+
+            V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?)
+
+            V3: rew = -1
+            """
+
+            # V1:
+            # rew = np.sum(
+            #    (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3
+
+            # V2:
+            # if return -> rew = None and in env abort_reward is given to agent
+            if self.det_run:
+                return -(1 - self.gamma)
+            else:
+                return
+
+            # V3:
+            # rew = (1 - gamma)
+
+        elif all(np.abs(mess_abc) <= self.nom * 1.1):
+            # if all(np.abs(mess) <= self.lim*self.nom_region):
+            """
+            1st area - inside wanted (nom) operation range
+            -v_nom -> + v_nom
+                rew = 1; if mess = SP
+                rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area)
+            """
+            # devided by 3 because of sums up all 3 phases
+            #rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * 2 * (1 - self.gamma) / 3 + (
+            #        1 - self.gamma) / 3) / 3
+
+            rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * (1 - self.gamma) ) / 3
+
+
+
+
+        else:
+            """
+            2nd area
+            +-v_nom -> +- v_lim
+
+            @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?)
+                rew = 1/3
+            @ SP = v_nom AND mess = -v_lim
+                rew = -1/3
+
+            """
+            #rew = np.sum(
+            #    (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3
+            rew = (1 - np.max(np.abs(SP - mess)) / (self.nom + self.lim)) * (1 - self.gamma) / 2 - (1 - self.gamma) / 2
+
+        if any(abs(i_mess_abc) > ((self.i_nom + self.i_lim) / 2)):
+            rew = (rew + 1) / 2  # map rew_voltage -> [0,1]
+
+            # Scale rew_voltage with rew_current
+            # rew = rew * np.sum((((self.i_nom - i_mess) / (self.i_lim - self.i_nom))+1) ** self.i_exponent) / 3
+            rew = rew * (((self.i_nom - max(abs(i_mess_abc))) / (self.i_lim - self.i_nom)) + 1) ** self.i_exponent
+
+            rew = rew * 2 - 1  # map rew -> [-1, 1]
+
+        if rew < -1:
+            asd = 1
+        return rew  # * (1-0.9)
+        # return -np.clip(error.squeeze(), 0, 1e5)
+
+    def rew_fun_dq0(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        uses the same reward for voltage like defined above
+
+         If v_lim is exceeded, episode abort -> env.abort_reward (should be -1) is given back
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        phase = data[idx[4]]
+
+        vdq0_master = abc_to_dq0(data[idx[2]], phase)  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        vsp_dq0_master = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        # SP = vsp_dq0_master * self.lim
+        # mess = vdq0_master * self.lim
+        """
+        if any(np.abs(data[idx[2]]) > 1):
+            if self.det_run:
+                return -(1 - self.gamma)
+            else:
+                return
+        else:
+            rew = np.sum(1 - (2 * (np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3  # /2
+        """
+        rew = np.sum(-((np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3  # /2
+
+        return rew
+
+    def rew_fun_PIPI_MRE(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        uses the same reward for voltage like defined above but also includes reward depending on the current
+        If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage
+        Before r_voltage is scaled to the region [0,1]:
+         - r_voltage = (r_voltage+1)/2
+         - r = r_voltage * r_current
+         - r = r-1
+
+         If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        i_mess = data[idx[0]]  # 3 phase currents at LC inductors
+        mess = data[idx[2]]  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        isp_abc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+        SP = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        # SP = vsp_dq0_master * self.lim
+        # mess = vdq0_master * self.lim
+
+        # rew = np.sum(-((np.abs(SP - mess)) ** 0.5)) * (1 - self.gamma) / 3
+
+        phase = data[idx[4]]
+
+        vdq0_master = abc_to_dq0(data[idx[2]], phase) / self.lim  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        vsp_dq0_master = abc_to_dq0(data[idx[3]],
+                                    phase) / self.lim  # convert dq set-points into three-phase abc coordinates
+
+        # SP = vsp_dq0_master * self.lim
+        # mess = vdq0_master * self.lim
+
+        rew = np.sum(-((np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3
+
+        # rew = np.sum(1 - (2 * (np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3
+
+        return rew
+
+    def rew_fun_PIPI(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        uses the same reward for voltage like defined above but also includes reward depending on the current
+        If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage
+        Before r_voltage is scaled to the region [0,1]:
+         - r_voltage = (r_voltage+1)/2
+         - r = r_voltage * r_current
+         - r = r-1
+
+         If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        i_mess = data[idx[0]]  # 3 phase currents at LC inductors
+        mess = data[idx[2]]  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        isp_abc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+        SP = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        # i_mess = iabc_master * self.i_lim
+
+        # SP = vsp_abc_master * self.lim
+        # mess = vabc_master * self.lim
+
+        if any(np.abs(mess) > self.lim) or any(np.abs(i_mess) > self.i_lim):
+            """
+            3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS
+            +-v_lim -> +-v_DC
+            Valid for v_lim OR i_lim exceeded
+
+            V1:
+            @ SP = +v_nom AND mess = -v_DC:
+                rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured
+            @ SP = -v_nom AND mess = -v_lim
+                rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)]
+                rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC))
+                The latter fraction is quite small but leads to depending on the system less then 2/3 is
+                substracted and we have a gap to the 2nd area! :) 
+
+            V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?)
+
+            V3: rew = -1
+            """
+
+            # V1:
+            # rew = np.sum(
+            #    (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3
+
+            # V2:
+            # if return -> rew = None and in env abort_reward is given to agent
+            if self.det_run:
+                return -(1 - self.gamma)
+            else:
+                return
+
+            # V3:
+            # rew = (1 - gamma)
+
+        elif all(np.abs(mess) <= self.nom * 1.1):
+            # if all(np.abs(mess) <= self.lim*self.nom_region):
+            """
+            1st area - inside wanted (nom) operation range
+            -v_nom -> + v_nom
+                rew = 1; if mess = SP
+                rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area)
+            """
+            # devided by 3 because of sums up all 3 phases
+            # rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * 2 * (1 - self.gamma) / 3 + (
+            #        1 - self.gamma) / 3) / 3
+            rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * (1 - self.gamma)) / 3
+
+
+
+        else:
+            """
+            2nd area
+            +-v_nom -> +- v_lim
+
+            @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?)
+                rew = 1/3
+            @ SP = v_nom AND mess = -v_lim
+                rew = -1/3
+
+            """
+            # rew = np.sum(
+            #    (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3
+            rew = (1 - np.max(np.abs(SP - mess)) / (self.nom + self.lim)) * (1 - self.gamma) / 2 - (1 - self.gamma) / 2
+        if any(abs(i_mess) > self.i_nom):
+            rew = (rew + 1) / 2  # map rew_voltage -> [0,1]
+
+            # Scale rew_voltage with rew_current
+            # rew = rew * np.sum((((self.i_nom - i_mess) / (self.i_lim - self.i_nom))+1) ** self.i_exponent) / 3
+            rew = rew * (((self.i_nom - max(abs(i_mess))) / (self.i_lim - self.i_nom)) + 1) ** self.i_exponent
+
+            rew = rew * 2 - 1  # map rew -> [-1, 1]
+
+        if rew < -1:
+            asd = 1
+        return rew  # * (1-0.9)
+        # return -np.clip(error.squeeze(), 0, 1e5)
diff --git a/experiments/hp_tune/env/vctrl_single_inv.py b/experiments/hp_tune/env/vctrl_single_inv.py
new file mode 100644
index 00000000..a108dc18
--- /dev/null
+++ b/experiments/hp_tune/env/vctrl_single_inv.py
@@ -0,0 +1,419 @@
+from datetime import datetime, time
+from functools import partial
+from itertools import accumulate
+from os import makedirs
+
+import time
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from stochastic.processes import VasicekProcess
+
+from experiments.hp_tune.env.random_load import RandomLoad
+from experiments.hp_tune.env.rewards import Reward
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+
+from openmodelica_microgrid_gym.util import RandProcess
+from gym.envs.registration import register
+from experiments.hp_tune.util.config import cfg
+
+folder_name = cfg['STUDY_NAME']  # 'DDPG_MRE_sqlite_PC2'
+# experiment_name = 'DDPG_VC_Reward_MRE_reward_NOT_NORMED'
+experiment_name = 'plots'
+timestamp = datetime.now().strftime(f'_%Y.%b.%d_%X')
+
+makedirs(folder_name, exist_ok=True)
+# makedirs(folder_name + experiment_name, exist_ok=True)
+
+
+# Simulation definitions
+if not cfg['is_dq0']:
+    # load net using abc reference values
+    net = Network.load('net/net_vctrl_single_inv.yaml')
+else:
+    # load net using dq0 reference values
+    net = Network.load('net/net_vctrl_single_inv_dq0.yaml')
+    # net = Network.load('net/net_p10.yaml')
+
+# set high to not terminate env! Termination should be done in wrapper by env after episode-length-HP
+max_episode_steps = 1500000  # net.max_episode_steps  # number of simulation steps per episode
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+
+# plant
+"""
+print('Using P10 setting')
+L_filter = 70e-6  # / H
+R_filter = 1.1e-3  # / Ohm
+C_filter = 250e-6  # / F
+lower_bound_load_clip = 1  # to allow maximal load that draws i_limit (let exceed?)
+lower_bound_load_clip_std = 1
+"""
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+# R = 40  # nomVoltPeak / 7.5   # / Ohm
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                  bounds=(lower_bound_load, upper_bound_load))
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+# if save needed in dependence of trial ( -> foldername) shift to executive file?
+def xylables_i(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+    ax.grid(which='both')
+    # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+    plt.close()
+
+
+def xylables_v(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+    ax.grid(which='both')
+    # ax.set_xlim([0, 0.005])
+    ts = time.gmtime()
+    # fig.savefig(
+    #    f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.close()
+
+
+def xylables_R(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+    ax.grid(which='both')
+    # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+    # ts = time.gmtime()
+    # fig.savefig(f'{folder_name + experiment_name}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.close()
+
+
+rand_load_train = RandomLoad(round(cfg['train_episode_length'] / 10), net.ts, gen,
+                             bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                             bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+
+cb = CallbackList()
+# set initial = None to reset load random in range of bounds
+cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+cb.append(rand_load_train.reset)
+
+register(id='vctrl_single_inv_train-v0',
+         entry_point='openmodelica_microgrid_gym.env:ModelicaEnv',
+         kwargs=dict(  # reward_fun=rew.rew_fun,
+             viz_cols=[
+                 PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                          callback=xylables_v,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                          callback=xylables_i,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                          callback=xylables_R,
+                          color=[['b', 'r', 'g']],
+                          style=[[None]]
+                          )
+             ],
+             viz_mode='episode',
+             max_episode_steps=max_episode_steps,
+             model_params={'lc.resistor1.R': R_filter,
+                           'lc.resistor2.R': R_filter,
+                           'lc.resistor3.R': R_filter,
+                           'lc.resistor4.R': 0.0000001,
+                           'lc.resistor5.R': 0.0000001,
+                           'lc.resistor6.R': 0.0000001,
+                           'lc.inductor1.L': L_filter,
+                           'lc.inductor2.L': L_filter,
+                           'lc.inductor3.L': L_filter,
+                           'lc.capacitor1.C': C_filter,
+                           'lc.capacitor2.C': C_filter,
+                           'lc.capacitor3.C': C_filter,
+                           # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                           'r_load.resistor1.R': rand_load_train.random_load_step,
+                           'r_load.resistor2.R': rand_load_train.clipped_step,
+                           'r_load.resistor3.R': rand_load_train.clipped_step,
+                           'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           },
+             net=net,
+             model_path='omg_grid/grid.paper_loadstep.fmu',
+             on_episode_reset_callback=cb.fire,
+             is_normalized=True,
+             action_time_delay=1
+         )
+         )
+
+register(id='vctrl_single_inv_train-v1',
+         entry_point='openmodelica_microgrid_gym.env:ModelicaEnv',
+         kwargs=dict(  # reward_fun=rew.rew_fun,
+             viz_cols=[
+                 PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                          callback=xylables_v,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                          callback=xylables_i,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                          callback=xylables_R,
+                          color=[['b', 'r', 'g']],
+                          style=[[None]]
+                          )
+             ],
+             viz_mode='episode',
+             max_episode_steps=max_episode_steps,
+             model_params={'lc.resistor1.R': R_filter,
+                           'lc.resistor2.R': R_filter,
+                           'lc.resistor3.R': R_filter,
+                           'lc.resistor4.R': 0.0000001,
+                           'lc.resistor5.R': 0.0000001,
+                           'lc.resistor6.R': 0.0000001,
+                           'lc.inductor1.L': L_filter,
+                           'lc.inductor2.L': L_filter,
+                           'lc.inductor3.L': L_filter,
+                           'lc.capacitor1.C': C_filter,
+                           'lc.capacitor2.C': C_filter,
+                           'lc.capacitor3.C': C_filter,
+                           # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                           'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode,
+                           'r_load.resistor2.R': rand_load_train.clipped_step,
+                           'r_load.resistor3.R': rand_load_train.clipped_step,
+                           'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           },
+             net=net,
+             model_path='omg_grid/grid.paper_loadstep.fmu',
+             on_episode_reset_callback=cb.fire,
+             is_normalized=True,
+             action_time_delay=1
+         )
+         )
+
+rand_train2 = RandomLoad(2881, net.ts, gen,
+                         load_curve=pd.read_pickle(
+                             'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl'))
+
+register(id='vctrl_single_inv_train-v2',
+         entry_point='openmodelica_microgrid_gym.env:ModelicaEnv',
+         kwargs=dict(  # reward_fun=rew.rew_fun,
+             viz_cols=[
+                 PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                          callback=xylables_v,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                          callback=xylables_i,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                          callback=xylables_R,
+                          color=[['b', 'r', 'g']],
+                          style=[[None]]
+                          )
+             ],
+             viz_mode='episode',
+             max_episode_steps=max_episode_steps,
+             model_params={'lc.resistor1.R': R_filter,
+                           'lc.resistor2.R': R_filter,
+                           'lc.resistor3.R': R_filter,
+                           'lc.resistor4.R': 0.0000001,
+                           'lc.resistor5.R': 0.0000001,
+                           'lc.resistor6.R': 0.0000001,
+                           'lc.inductor1.L': L_filter,
+                           'lc.inductor2.L': L_filter,
+                           'lc.inductor3.L': L_filter,
+                           'lc.capacitor1.C': C_filter,
+                           'lc.capacitor2.C': C_filter,
+                           'lc.capacitor3.C': C_filter,
+                           'r_load.resistor1.R': partial(rand_train2.give_dataframe_value, col='r_load.resistor1.R'),
+                           'r_load.resistor2.R': partial(rand_train2.give_dataframe_value, col='r_load.resistor2.R'),
+                           'r_load.resistor3.R': partial(rand_train2.give_dataframe_value, col='r_load.resistor3.R'),
+                           'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           },
+             net=net,
+             model_path='omg_grid/grid.paper_loadstep.fmu',
+             on_episode_reset_callback=cb.fire,
+             is_normalized=True,
+             action_time_delay=1
+         )
+         )
+
+rand_load_test = RandomLoad(2881, net.ts, gen,
+                            # load_curve=pd.read_pickle(
+                            # 'experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl'))
+                            load_curve=pd.read_pickle(
+                                'experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl'))
+# load_curve=pd.read_pickle('experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl'))
+
+register(id='vctrl_single_inv_test-v0',
+         entry_point='openmodelica_microgrid_gym.env:ModelicaEnv',
+         kwargs=dict(  # reward_fun=rew.rew_fun,
+             viz_cols=[
+                 PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                          callback=xylables_v,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                          callback=xylables_i,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                          callback=xylables_R,
+                          color=[['b', 'r', 'g']],
+                          style=[[None]]
+                          )
+             ],
+             viz_mode='episode',
+             max_episode_steps=20000,
+             model_params={'lc.resistor1.R': R_filter,
+                           'lc.resistor2.R': R_filter,
+                           'lc.resistor3.R': R_filter,
+                           'lc.resistor4.R': 0.0000001,
+                           'lc.resistor5.R': 0.0000001,
+                           'lc.resistor6.R': 0.0000001,
+                           'lc.inductor1.L': L_filter,
+                           'lc.inductor2.L': L_filter,
+                           'lc.inductor3.L': L_filter,
+                           'lc.capacitor1.C': C_filter,
+                           'lc.capacitor2.C': C_filter,
+                           'lc.capacitor3.C': C_filter,
+                           'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor1.R'),
+                           'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor2.R'),
+                           'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor3.R')
+                           },
+             net=net,
+             model_path='omg_grid/grid.paper_loadstep.fmu',
+             on_episode_reset_callback=cb.fire,
+             is_normalized=True,
+             action_time_delay=1
+         )
+         )
+
+register(id='vctrl_single_inv_test-v1',
+         entry_point='openmodelica_microgrid_gym.env:ModelicaEnv',
+         kwargs=dict(  # reward_fun=rew.rew_fun,
+             viz_cols=[
+                 PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                          callback=xylables_v,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                          callback=xylables_i,
+                          color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                          style=[[None], ['--']]
+                          ),
+                 PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                          callback=xylables_R,
+                          color=[['b', 'r', 'g']],
+                          style=[[None]]
+                          )
+             ],
+             viz_mode='episode',
+             max_episode_steps=100001,
+             model_params={'lc.resistor1.R': R_filter,
+                           'lc.resistor2.R': R_filter,
+                           'lc.resistor3.R': R_filter,
+                           'lc.resistor4.R': 0.0000001,
+                           'lc.resistor5.R': 0.0000001,
+                           'lc.resistor6.R': 0.0000001,
+                           'lc.inductor1.L': L_filter,
+                           'lc.inductor2.L': L_filter,
+                           'lc.inductor3.L': L_filter,
+                           'lc.capacitor1.C': C_filter,
+                           'lc.capacitor2.C': C_filter,
+                           'lc.capacitor3.C': C_filter,
+                           # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                           # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                           'r_load.resistor1.R': rand_load_train.random_load_step,
+                           # 'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode,
+                           'r_load.resistor2.R': rand_load_train.clipped_step,
+                           'r_load.resistor3.R': rand_load_train.clipped_step,
+                           'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                          high=v_nom) if t == -1 else None,
+                           'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                         high=i_nom) if t == -1 else None,
+                           },
+             net=net,
+             model_path='omg_grid/grid.paper_loadstep.fmu',
+             on_episode_reset_callback=cb.fire,
+             is_normalized=True,
+             action_time_delay=1
+         )
+         )
diff --git a/experiments/hp_tune/examples/DDPG_init_execution.py b/experiments/hp_tune/examples/DDPG_init_execution.py
new file mode 100644
index 00000000..96318547
--- /dev/null
+++ b/experiments/hp_tune/examples/DDPG_init_execution.py
@@ -0,0 +1,517 @@
+import platform
+from typing import Union
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+from stable_baselines3.common.monitor import Monitor
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+from stable_baselines3.common.type_aliases import GymStepReturn
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+from experiments.hp_tune.util.scheduler import linear_schedule
+from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc, abc_to_dq0
+
+# np.random.seed(0)
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+
+class FeatureWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = np.inf,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts):
+        """
+        Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in
+        case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+
+        # increase action-space for PI-seperation
+        # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1))
+
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_a = []
+        self.i_b = []
+        self.i_c = []
+        self.v_a = []
+        self.v_b = []
+        self.v_c = []
+        self._v_pahsor = 0.0
+        self.n_episode = 0
+        self.R_training = []
+        self.i_phasor_training = []
+        self.v_phasor_training = []
+        self.reward_episode_mean = []
+        self.n_trail = n_trail
+        self.phase = []
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.integrator_weight = integrator_weight
+        self.antiwindup_weight = antiwindup_weight
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+        self.used_I_action = np.zeros(self.action_space.shape)
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+        action_P = action[0:3]
+        action_I = action[3:6]
+
+        self.used_I_action = np.copy(action_I)
+
+        self.integrator_sum += action_I * self.integrator_weight
+
+        action_PI = action_P + self.integrator_sum
+
+        if cfg['is_dq0']:
+            # Action: dq0 -> abc
+            action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase)
+
+        # check if m_abc will be clipped
+        if np.any(abs(action_abc) > 1):
+            # if, reduce integrator by clipped delta
+            action_delta = abc_to_dq0(np.clip(action_abc, -1, 1) - action_abc, self.env.net.components[0].phase)
+            self.integrator_sum += action_delta * self.antiwindup_weight
+
+        obs, reward, done, info = super().step(action_abc)
+        self._n_training_steps += 1
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+
+        # add wanted features here (add appropriate self.observation in init!!)
+        # calculate magnitude of current phasor abc
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "R_load_training": self.R_training,
+                                "i_phasor_training": self.i_phasor_training,
+                                "i_a_training": self.i_a,
+                                "i_b_training": self.i_b,
+                                "i_c_training": self.i_c,
+                                "v_a_training": self.v_a,
+                                "v_b_training": self.v_b,
+                                "v_c_training": self.v_c,
+                                "v_phasor_training": self.v_phasor_training,
+                                "Rewards": self.rewards,
+                                "Phase": self.phase,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": folder_name,
+                                "Reward function": 'rew.rew_fun_dq0',
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                mongo_recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.R_training = []
+                self.i_phasor_training = []
+                self.v_phasor_training = []
+                self.i_a = []
+                self.i_b = []
+                self.i_c = []
+                self.v_a = []
+                self.v_b = []
+                self.v_c = []
+                self.phase = []
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Features
+        """
+        error = obs[6:9] - obs[3:6]  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, delta_i_lim_i_phasor)
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        # obs = np.append(obs, self.used_action)
+
+        # todo efficiency?
+        self.used_P = np.copy(action_P)
+        self.used_I = np.copy(self.integrator_sum)
+        # self.used_P = action_P
+        # self.used_I = self.integrator_sum
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+        obs = super().reset()
+        self._n_training_steps = 0
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.used_P = np.zeros(self.action_space.shape)
+        self.used_I = np.zeros(self.action_space.shape)
+
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+        """
+        Features
+        """
+        error = obs[6:9] - obs[3:6]  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+        obs = np.append(obs, self.used_P)
+        obs = np.append(obs, self.used_I)
+        # obs = np.append(obs, delta_i_lim_i_phasor)
+        """
+        Add used action to the NN input to learn delay
+        """
+        # obs = np.append(obs, self.used_action)
+
+        return obs
+
+    def cal_phasor_magnitude(self, abc: np.array) -> float:
+        """
+        Calculated the magnitude of a phasor in a three phase system. M
+
+        :param abc: Due to limit normed currents or voltages in abc frame
+        :return: magnitude of the current or voltage phasor
+        """
+        # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)|
+        i_alpha_beta = abc_to_alpha_beta(abc)
+        i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2)
+
+        return i_phasor_mag
+
+
+number_learning_steps = 10000
+integrator_weight = 0.216  # trial.suggest_loguniform("integrator_weight", 1 / 20, 20)
+# integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0)
+# antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3)
+antiwindup_weight = 0.96  # trial.suggest_float("antiwindup_weight", 0.00001, 1)
+
+learning_rate = 1.42e-5  # trial.suggest_loguniform("learning_rate", 100e-9, 100e-6)  # 0.0002#
+
+lr_decay_start = 0.88  # trial.suggest_float("lr_decay_start", 0.00001, 1)  # 3000  # 0.2 * number_learning_steps?
+lr_decay_duration = 0.064  # trial.suggest_float("lr_decay_duration", 0.00001,
+#              1)  # 3000  # 0.2 * number_learning_steps?
+t_start = int(lr_decay_start * number_learning_steps)
+t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                       number_learning_steps))
+final_lr = 0.3  # trial.suggest_float("final_lr", 0.00001, 1)
+
+gamma = 0.927  # trial.suggest_float("gamma", 0.8, 0.99)
+weight_scale = 0.000132  # trial.suggest_loguniform("weight_scale", 5e-5, 0.2)  # 0.005
+
+bias_scale = 0.1  # trial.suggest_loguniform("bias_scale", 5e-4, 0.1)  # 0.005
+alpha_relu_actor = 0.1  # trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5)  # 0.005
+alpha_relu_critic = 0.1  # trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5)  # 0.005
+
+batch_size = 1024  # trial.suggest_int("batch_size", 32, 1024)  # 128
+buffer_size = int(1e6)  # trial.suggest_int("buffer_size", 10, 1000000)  # 128
+
+actor_hidden_size = 131  # trial.suggest_int("actor_hidden_size", 10, 200)  # 100  # Using LeakyReLU
+actor_number_layers = 3  # trial.suggest_int("actor_number_layers", 1, 5)
+
+critic_hidden_size = 324  # trial.suggest_int("critic_hidden_size", 10, 500)  # 100
+critic_number_layers = 3  # trial.suggest_int("critic_number_layers", 1, 4)
+
+n_trail = str(9999999)
+use_gamma_in_rew = 1
+noise_var = 0.012  # trial.suggest_loguniform("noise_var", 0.01, 1)  # 2
+# min var, action noise is reduced to (depends on noise_var)
+noise_var_min = 0.0013  # trial.suggest_loguniform("noise_var_min", 0.0000001, 2)
+# min var, action noise is reduced to (depends on training_episode_length)
+noise_steps_annealing = int(
+    0.25 * number_learning_steps)  # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+# number_learning_steps)
+noise_theta = 1.74  # trial.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+error_exponent = 0.5  # trial.suggest_loguniform("error_exponent", 0.01, 4)
+
+training_episode_length = 2000  # trial.suggest_int("training_episode_length", 200, 5000)  # 128
+learning_starts = 0.32  # trial.suggest_loguniform("learning_starts", 0.1, 2)  # 128
+tau = 0.005  # trial.suggest_loguniform("tau", 0.0001, 0.2)  # 2
+
+learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr,
+                                t_start=t_start,
+                                t_end=t_end,
+                                total_timesteps=number_learning_steps)
+
+rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+             use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+             i_nom=net['inverter1'].i_nom)
+
+env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+               reward_fun=rew.rew_fun_dq0,
+               abort_reward=-1,
+               obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                           'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                           'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+               )
+
+env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                     recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                     antiwindup_weight=antiwindup_weight)
+
+# todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+n_actions = env.action_space.shape[-1]
+noise_var = noise_var  # 20#0.2
+noise_theta = noise_theta  # 50 # stiffness of OU
+action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                            sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+# action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing,
+#                                              sigma_min=noise_var * np.ones(n_actions) * noise_var_min,
+#                                              mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+#                                              sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                  , qf=[critic_hidden_size] * critic_number_layers))
+
+model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log='test',
+             # model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/',
+             policy_kwargs=policy_kwargs,
+             learning_rate=learning_rate, buffer_size=buffer_size,
+             learning_starts=int(learning_starts * training_episode_length),
+             batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise,
+             train_freq=(1, "episode"), gradient_steps=- 1,
+             optimize_memory_usage=False,
+             create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+# Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
+# adn scale weights and biases
+count = 0
+for kk in range(actor_number_layers + 1):
+
+    model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale
+    model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[
+                                                                 str(count)].weight.data * weight_scale
+
+    model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale
+    model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[
+                                                               str(count)].bias.data * bias_scale
+
+    if kk < actor_number_layers:
+        model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+        model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+    count = count + 2
+
+count = 0
+
+for kk in range(critic_number_layers + 1):
+
+    if kk < critic_number_layers:
+        model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+        model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+    count = count + 2
+
+# todo: Downscale actionspace - lessulgy possible? Interaction pytorch...
+env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+for ex_run in range(10):
+    # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+    model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log='test',
+                 # model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/',
+                 policy_kwargs=policy_kwargs,
+                 learning_rate=learning_rate, buffer_size=buffer_size,
+                 learning_starts=int(learning_starts * training_episode_length),
+                 batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise,
+                 train_freq=(1, "episode"), gradient_steps=- 1,
+                 optimize_memory_usage=False,
+                 create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
+    # adn scale weights and biases
+    count = 0
+    for kk in range(actor_number_layers + 1):
+
+        model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale
+        model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[
+                                                                     str(count)].weight.data * weight_scale
+
+        model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale
+        model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[
+                                                                   str(count)].bias.data * bias_scale
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    # todo: Downscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+    I_list0 = []
+    I_list1 = []
+    I_list2 = []
+    I_action_list0 = []
+    I_action_list1 = []
+    I_action_list2 = []
+    P_list0 = []
+    P_list1 = []
+    P_list2 = []
+    return_sum = 0.0
+    obs = env.reset()
+    while True:
+
+        action, _states = model.predict(obs)
+        obs, rewards, done, info = env.step(action)
+        # I_list.append(env.used_I)
+        I_list0.append(env.used_I[0])
+        I_list1.append(env.used_I[1])
+        I_list2.append(env.used_I[2])
+        P_list0.append(env.used_P[0])
+        P_list1.append(env.used_P[1])
+        P_list2.append(env.used_P[2])
+        I_action_list0.append(env.used_I_action[0])
+        I_action_list1.append(env.used_I_action[1])
+        I_action_list2.append(env.used_I_action[2])
+        env.render()
+        return_sum += rewards
+        if done:
+            break
+    env.close()
+
+    plt.plot(P_list0, 'b')
+    plt.plot(P_list1, 'r')
+    plt.plot(P_list2, 'g')
+    # plt.xlim([0, 0.01])
+    plt.grid()
+    plt.xlabel("time")
+    plt.ylabel("action_P")
+    plt.title('Test')
+    plt.show()
+
+    plt.plot(I_list0, 'b')
+    plt.plot(I_list1, 'r')
+    plt.plot(I_list2, 'g')
+    # plt.xlim([0, 0.01])
+    plt.grid()
+    plt.xlabel("time")
+    plt.ylabel("Integratorzustand")
+    plt.title('Test')
+    plt.show()
+
+    plt.plot(I_action_list0, 'b')
+    plt.plot(I_action_list1, 'r')
+    plt.plot(I_action_list2, 'g')
+    # plt.xlim([0, 0.01])
+    plt.grid()
+    plt.xlabel("time")
+    plt.ylabel("action_I")
+    plt.title('Test')
+    plt.show()
diff --git a/experiments/hp_tune/examples/Pipi_testcase_eval.py b/experiments/hp_tune/examples/Pipi_testcase_eval.py
new file mode 100644
index 00000000..396b44a4
--- /dev/null
+++ b/experiments/hp_tune/examples/Pipi_testcase_eval.py
@@ -0,0 +1,434 @@
+#####################################
+# Experiment : Single voltage forming inverter supplying an RL-load via an LC-filter
+# Controller: Cascaded PI-PI voltage and current controller gain parameters are optimized by SafeOpt
+# a) FMU by OpenModelica and SafeOpt algorithm to find optimal controller parameters
+# b) connecting via ssh to a testbench to perform real-world measurement
+import logging
+import os
+import time
+from functools import partial
+
+import GPy
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from pymongo import MongoClient
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+
+from experiments.hp_tune.env.random_load import RandomLoad
+from experiments.hp_tune.env.rewards import Reward
+from openmodelica_microgrid_gym.agents import SafeOptAgent
+from openmodelica_microgrid_gym.agents.util import MutableFloat
+from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \
+    MultiPhaseDQ0PIPIController
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.env.plotmanager import PlotManager
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import FullHistory, RandProcess
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+show_plots = False
+balanced_load = False
+save_results = False
+
+num_average = 25
+max_episode_steps_list = [1000, 5000, 10000, 20000, 50000, 100000]
+
+result_list = []
+ret_list = []
+mean_list = []
+std_list = []
+ret_array = np.zeros(num_average)
+
+df = pd.DataFrame()
+ret_dict = dict()
+
+# Files saves results and  resulting plots to the folder saves_VI_control_safeopt in the current directory
+current_directory = os.getcwd()
+folder_name = 'Pipi_safeopt_best_run4d'
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+np.random.seed(1)
+
+# Simulation definitions
+# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+net = Network.load('net/net_vctrl_single_inv.yaml')
+delta_t = 1e-4  # simulation time step size / s
+undersample = 1
+max_episode_steps = 1000  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
+v_DC = 600  # DC-link voltage / V; will be set as model parameter in the FMU
+nomFreq = 60  # nominal grid frequency / Hz
+nomVoltPeak = 169.7  # 230 * 1.414  # nominal grid voltage / V
+iLimit = 16  # inverter current limit / A
+iNominal = 12  # nominal inverter current / A
+vNominal = 190  # nominal inverter current / A
+vLimit = vNominal * 1.5  # inverter current limit / A
+funnelFactor = 0.02
+vFunnel = np.array([vNominal * funnelFactor, vNominal * funnelFactor, vNominal * funnelFactor])
+mu = 400  # factor for barrier function (see below)
+DroopGain = 0.0  # virtual droop gain for active power / W/Hz
+QDroopGain = 0.0  # virtual droop gain for reactive power / VAR/V
+
+
+class Recorder:
+
+    def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ):
+        self.client = MongoClient(URI)
+        self.db = self.client[database_name]
+
+    def save_to_mongodb(self, col: str = ' trails', data=None):
+        trial_coll = self.db[col]  # get collection named col
+        if data is None:
+            raise ValueError('No data given to store in database!')
+        trial_coll.insert_one(data)
+
+
+rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma=0,
+             use_gamma_normalization=1, error_exponent=0.5, i_lim=net['inverter1'].i_lim,
+             i_nom=net['inverter1'].i_nom)
+
+#####################################
+# Definitions for the GP
+prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the initial set
+noise_var = 0.001  # ** 2  # measurement noise sigma_omega
+prior_var = 2  # prior variance of the GP
+
+# Choose Kp and Ki (current and voltage controller) as mutable parameters (below) and define bounds and lengthscale
+# for both of them
+bounds = [(0.000, 0.045), (4, 450)]  # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp
+lengthscale = [.003, 50.]  # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP
+
+# The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times
+# the initial performance: safe_threshold = 1.2 means: performance measurement for optimization are seen as
+# unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!)
+# parameter set
+safe_threshold = 0
+j_min = -5  # cal min allowed performance
+
+# The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop
+# expanding points eventually.
+# The following variable is multiplied with the first performance of the initial set by the factor below:
+explore_threshold = 0
+
+# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
+# limit exceeded
+abort_reward = 100 * j_min
+
+# Definition of the kernel
+kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True)
+
+#####################################
+# Definition of the controllers
+# Choose Kp and Ki for the current and voltage controller as mutable parameters
+mutable_params = dict(voltageP=MutableFloat(0.002), voltageI=MutableFloat(143))  # 300Hz
+# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105))  # 300Hz
+voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
+                                limits=(-iLimit, iLimit))
+
+kp_c = 0.033
+ki_c = 17.4  # 11.8
+current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1))  # Current controller values
+
+# Define the droop parameters for the inverter of the active power Watt/Hz (DroopGain), delta_t (0.005) used for the
+# filter and the nominal frequency
+# Droop controller used to calculate the virtual frequency drop due to load changes
+droop_param = DroopParams(DroopGain, 0.005, net.freq_nom)
+
+# Define the Q-droop parameters for the inverter of the reactive power VAR/Volt, delta_t (0.002) used for the
+# filter and the nominal voltage
+qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom)
+
+# Define a voltage forming inverter using the PIPI and droop parameters from above
+
+# Controller with observer
+# ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param,
+#                                   observer=[Lueneberger(*params) for params in
+#                                             repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample,
+#                                   name='master')
+
+# Controller without observer
+ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
+                                   ts_sim=delta_t,
+                                   ts_ctrl=undersample * delta_t,
+                                   name='master')
+
+#####################################
+# Definition of the optimization agent
+# The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example
+# Arguments described above
+# History is used to store results
+agent = SafeOptAgent(mutable_params,
+                     abort_reward,
+                     j_min,
+                     kernel,
+                     dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean,
+                          safe_threshold=safe_threshold, explore_threshold=explore_threshold),
+                     [ctrl],
+                     dict(master=[[f'lc.inductor{k}.i' for k in '123'],
+                                  [f'lc.capacitor{k}.v' for k in '123']
+                                  ]),
+                     history=FullHistory(),
+                     )
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+# plant
+
+# toDo: shift this to net?!
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+
+for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+    for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False):
+        R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+        gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                          bounds=(lower_bound_load, upper_bound_load))
+
+        rand_load_train = RandomLoad(max_episode_steps, net.ts, gen,
+                                     bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                                     bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+
+        cb = CallbackList()
+        # set initial = None to reset load random in range of bounds
+        cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+        cb.append(rand_load_train.reset)
+
+        plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder,
+                              show_plots=show_plots)
+
+        rand_load_test = RandomLoad(max_episode_steps, net.ts, gen,
+                                    load_curve=pd.read_pickle(
+                                        'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl'))
+
+
+        def xylables_R(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            ax.grid(which='both')
+            # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+            ts = time.gmtime()
+            fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                       reward_fun=rew.rew_fun_PIPI_MRE,
+                       viz_cols=[
+                           PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']],
+                                    callback=plotter.xylables_v_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_v_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']],
+                                    callback=plotter.xylables_i_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                    callback=xylables_R,
+                                    color=[['b', 'r', 'g']],
+                                    style=[[None]]
+                                    ),
+                           # PlotTmpl([[f'master.I_hat{i}' for i in 'abc'], [f'r_load.resistor{i}.i' for i in '123'], ],
+                           #         callback=lambda fig: plotter.update_axes(fig, title='Simulation',
+                           #                                                  ylabel='$i_{\mathrm{o estimate,abc}}\,/\,\mathrm{A}$'),
+                           #         color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                           #         style=[['-*'], ['--*']]
+                           #         ),
+                           # PlotTmpl([[f'master.m{i}' for i in 'dq0']],
+                           #         callback=lambda fig: plotter.update_axes(fig, title='Simulation',
+                           #                                                  ylabel='$m_{\mathrm{dq0}}\,/\,\mathrm{}$',
+                           #                                                  filename='Sim_m_dq0')
+                           #         ),
+                           PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_i_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    )
+                       ],
+                       log_level=logging.INFO,
+                       viz_mode='episode',
+                       max_episode_steps=max_episode_steps_list[max_eps_steps],
+                       model_params={'lc.resistor1.R': R_filter,
+                                     'lc.resistor2.R': R_filter,
+                                     'lc.resistor3.R': R_filter,
+                                     'lc.resistor4.R': 0.0000001,
+                                     'lc.resistor5.R': 0.0000001,
+                                     'lc.resistor6.R': 0.0000001,
+                                     'lc.inductor1.L': L_filter,
+                                     'lc.inductor2.L': L_filter,
+                                     'lc.inductor3.L': L_filter,
+                                     'lc.capacitor1.C': C_filter,
+                                     'lc.capacitor2.C': C_filter,
+                                     'lc.capacitor3.C': C_filter,
+                                     # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                                     # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                                     # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                                     'r_load.resistor1.R': rand_load_train.random_load_step,
+                                     'r_load.resistor2.R': rand_load_train.random_load_step,
+                                     'r_load.resistor3.R': rand_load_train.random_load_step,
+                                     'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                    high=v_nom) if t == -1 else None,
+                                     'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                    high=v_nom) if t == -1 else None,
+                                     'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                    high=v_nom) if t == -1 else None,
+                                     'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                   high=i_nom) if t == -1 else None,
+                                     'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                   high=i_nom) if t == -1 else None,
+                                     'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                                   high=i_nom) if t == -1 else None,
+                                     },
+                       net=net,
+                       model_path='omg_grid/grid.paper_loadstep.fmu',
+                       history=FullHistory(),
+                       on_episode_reset_callback=cb.fire,
+                       action_time_delay=1 * undersample
+                       )
+
+        return_sum = 0.0
+
+        rew.gamma = 0
+        # episodes will not abort, if limit is exceeded reward = -1
+        rew.det_run = True
+        rew.exponent = 0.5
+        limit_exceeded_in_test = False
+        limit_exceeded_penalty = 0
+
+        agent.reset()
+        agent.obs_varnames = env.history.cols
+        env.history.cols = env.history.structured_cols(None) + agent.measurement_cols
+        env.measure = agent.measure
+
+        reward_list = []
+
+        agent_fig = None
+        obs = env.reset()
+        # for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False):
+        for step in range(env.max_episode_steps):
+            # for _ in tqdm(range(1000), desc='steps', unit='step', leave=False):
+
+            done, r = False, None
+
+            if len(reward_list) > 10000:
+                asd = 1
+
+            agent.observe(r, done)
+            act = agent.act(obs)
+            obs, r, done, info = env.step(act)
+            reward_list.append(r)
+            env.render()
+            return_sum += r
+            if r == -1 and not limit_exceeded_in_test:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test = True
+                limit_exceeded_penalty = -1
+
+            if step % 1000 == 0 and step != 0:
+                env.close()
+                agent.reset()
+                obs = env.reset()
+
+            #    break
+            # close env before calling final agent observe to see plots even if agent crashes
+        _, env_fig = env.close()
+        agent.observe(r, done)
+        print(limit_exceeded_in_test)
+
+        # return (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+        ret_list.append((return_sum / env.max_episode_steps + limit_exceeded_penalty))
+        ret_array[ave_run] = (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+        # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+    # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list)
+    # temp_dict = dict(zipped)
+    temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list}
+    result_list.append(temp_dict)
+    # ret_dict.append(zipped)
+    # df = df.append(ret_dict)
+
+    mean_list.append(np.mean(ret_array))
+    std_list.append(np.std(ret_array))
+
+# df = df.append(temp_list, True)
+print(mean_list)
+print(std_list)
+print(result_list)
+
+results = {
+    'Mean': mean_list,
+    'Std': std_list,
+    'All results': result_list,
+    'max_episode_steps_list': max_episode_steps_list
+}
+
+df = pd.DataFrame(results)
+df.to_pickle("Pipi.pkl")
+asd = 1
+
+m = np.array(df['Mean'])
+s = np.array(df['Std'])
+max_episode_steps_list = np.array(df['max_episode_steps_list'])
+
+plt.plot(max_episode_steps_list, m)
+plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('')
+plt.show()
+
+# plt.plot(max_episode_steps_list, m)
+# plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r')
+plt.errorbar(max_episode_steps_list, m, s, fmt='-o')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('')
+plt.show()
+
+plt.plot(max_episode_steps_list, s)
+plt.ylabel('std')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('')
+plt.show()
diff --git a/experiments/hp_tune/examples/Pipi_testcase_optimization.py b/experiments/hp_tune/examples/Pipi_testcase_optimization.py
new file mode 100644
index 00000000..7d46e116
--- /dev/null
+++ b/experiments/hp_tune/examples/Pipi_testcase_optimization.py
@@ -0,0 +1,375 @@
+#####################################
+# Experiment : Single voltage forming inverter supplying an RL-load via an LC-filter
+# Controller: Cascaded PI-PI voltage and current controller gain parameters are optimized by SafeOpt
+# a) FMU by OpenModelica and SafeOpt algorithm to find optimal controller parameters
+# b) connecting via ssh to a testbench to perform real-world measurement
+import logging
+import os
+import time
+from functools import partial
+
+import GPy
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from pymongo import MongoClient
+from stochastic.processes import VasicekProcess
+
+from experiments.hp_tune.env.random_load import RandomLoad
+from experiments.hp_tune.env.rewards import Reward
+# from experiments.model_validation.execution.monte_carlo_runner import MonteCarloRunner
+from experiments.hp_tune.execution.reset_runner import MonteCarloRunner
+from experiments.hp_tune.execution.runner import Runner
+from openmodelica_microgrid_gym.agents import SafeOptAgent
+from openmodelica_microgrid_gym.agents.util import MutableFloat
+from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \
+    MultiPhaseDQ0PIPIController
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.env.plotmanager import PlotManager
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import FullHistory, RandProcess
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+show_plots = False
+balanced_load = False
+save_results = True
+PIPI = True
+
+num_average = 25
+max_episode_steps_list = [1000, 5000, 10000, 20000, 50000, 100000]
+
+result_list = []
+ret_list = []
+mean_list = []
+std_list = []
+ret_array = np.zeros(num_average)
+
+df = pd.DataFrame()
+ret_dict = dict()
+
+# Files saves results and  resulting plots to the folder saves_VI_control_safeopt in the current directory
+current_directory = os.getcwd()
+folder_name = 'Pipi_new_testcase_opt_4D_reset2'
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+np.random.seed(1)
+
+# Simulation definitions
+# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+net = Network.load('net/net_vctrl_single_inv.yaml')
+delta_t = 1e-4  # simulation time step size / s
+undersample = 1
+max_episode_steps = 99999#10000  # number of simulation steps per episode
+num_episodes = 100  # number of simulation episodes (i.e. SafeOpt iterations)
+n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
+v_DC = 600  # DC-link voltage / V; will be set as model parameter in the FMU
+nomFreq = 60  # nominal grid frequency / Hz
+nomVoltPeak = 169.7  # 230 * 1.414  # nominal grid voltage / V
+iLimit = 16  # inverter current limit / A
+iNominal = 12  # nominal inverter current / A
+vNominal = 190  # nominal inverter current / A
+vLimit = vNominal * 1.5  # inverter current limit / A
+funnelFactor = 0.02
+vFunnel = np.array([vNominal * funnelFactor, vNominal * funnelFactor, vNominal * funnelFactor])
+mu = 400  # factor for barrier function (see below)
+DroopGain = 0.0  # virtual droop gain for active power / W/Hz
+QDroopGain = 0.0  # virtual droop gain for reactive power / VAR/V
+
+
+class Recorder:
+
+    def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ):
+        self.client = MongoClient(URI)
+        self.db = self.client[database_name]
+
+    def save_to_mongodb(self, col: str = ' trails', data=None):
+        trial_coll = self.db[col]  # get collection named col
+        if data is None:
+            raise ValueError('No data given to store in database!')
+        trial_coll.insert_one(data)
+
+
+rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma=0,
+             use_gamma_normalization=1, error_exponent=0.5, i_lim=net['inverter1'].i_lim,
+             i_nom=net['inverter1'].i_nom)
+
+#####################################
+# Definitions for the GP
+prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the initial set
+noise_var = 0.001  # ** 2  # measurement noise sigma_omega
+prior_var = 2  # prior variance of the GP
+
+# Choose Kp and Ki (current and voltage controller) as mutable parameters (below) and define bounds and lengthscale
+# for both of them
+if PIPI:
+    # bounds = [(0.001, 0.07), (2, 150), (0.000, 0.045), (4, 450)]
+    # lengthscale = [0.005, 25., 0.008, 150] # .003, 50.]
+    bounds = [(0.001, 0.07), (2, 150), (0.000, 0.05), (4, 600)]
+    lengthscale = [0.01, 35., 0.01, 175]  # .003, 50.]
+    mutable_params = dict(currentP=MutableFloat(0.04), currentI=MutableFloat(11.8),
+                          voltageP=MutableFloat(0.0175), voltageI=MutableFloat(12))
+    current_dqp_iparams = PI_params(kP=mutable_params['currentP'], kI=mutable_params['currentI'],
+                                    limits=(-1, 1))  # Best set from paper III-D
+
+else:
+    bounds = [(0.000, 0.045), (4, 450)]
+    lengthscale = [0.01, 150]  # [0.003, 50]
+    mutable_params = dict(voltageP=MutableFloat(0.0175), voltageI=MutableFloat(12))  # 300Hz
+    kp_c = 0.04
+    ki_c = 11.8  # 11.8
+    current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1))
+
+# The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times
+# the initial performance: safe_threshold = 1.2 means: performance measurement for optimization are seen as
+# unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!)
+# parameter set
+safe_threshold = 0
+j_min = -50000  # -5  # 15000? # cal min allowed performance
+
+# The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop
+# expanding points eventually.
+# The following variable is multiplied with the first performance of the initial set by the factor below:
+explore_threshold = -200000
+
+# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
+# limit exceeded
+abort_reward = 100 * j_min
+
+# Definition of the kernel
+kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True)
+
+#####################################
+# Definition of the controllers
+# Choose Kp and Ki for the current and voltage controller as mutable parameters
+
+# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105))  # 300Hz
+voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
+                                limits=(-iLimit, iLimit))
+
+# Current controller values
+
+# Define the droop parameters for the inverter of the active power Watt/Hz (DroopGain), delta_t (0.005) used for the
+# filter and the nominal frequency
+# Droop controller used to calculate the virtual frequency drop due to load changes
+droop_param = DroopParams(DroopGain, 0.005, net.freq_nom)
+
+# Define the Q-droop parameters for the inverter of the reactive power VAR/Volt, delta_t (0.002) used for the
+# filter and the nominal voltage
+qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom)
+
+# Define a voltage forming inverter using the PIPI and droop parameters from above
+
+# Controller with observer
+# ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param,
+#                                   observer=[Lueneberger(*params) for params in
+#                                             repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample,
+#                                   name='master')
+
+# Controller without observer
+ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
+                                   ts_sim=delta_t,
+                                   ts_ctrl=undersample * delta_t,
+                                   name='master')
+
+#####################################
+# Definition of the optimization agent
+# The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example
+# Arguments described above
+# History is used to store results
+agent = SafeOptAgent(mutable_params,
+                     abort_reward,
+                     j_min,
+                     kernel,
+                     dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean,
+                          safe_threshold=safe_threshold, explore_threshold=explore_threshold),
+                     [ctrl],
+                     dict(master=[[f'lc.inductor{k}.i' for k in '123'],
+                                  [f'lc.capacitor{k}.v' for k in '123']
+                                  ]),
+                     history=FullHistory(),
+                     )
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+# plant
+
+# toDo: shift this to net?!
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+
+R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                  bounds=(lower_bound_load, upper_bound_load))
+
+rand_load_train = RandomLoad(max_episode_steps, net.ts, gen,
+                             bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                             bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+
+cb = CallbackList()
+# set initial = None to reset load random in range of bounds
+cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+cb.append(rand_load_train.reset)
+
+plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder,
+                      show_plots=show_plots)
+
+rand_load_test = RandomLoad(max_episode_steps, net.ts, gen,
+                            load_curve=pd.read_pickle(
+                                'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl'))
+
+
+def xylables_R(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+    ax.grid(which='both')
+    # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+    ts = time.gmtime()
+    fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    if show_plots:
+        plt.show()
+    else:
+        plt.close()
+
+
+env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+               reward_fun=rew.rew_fun_PIPI_MRE,
+               viz_cols=[
+                   PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']],
+                            callback=plotter.xylables_v_abc,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            ),
+                   PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']],
+                            callback=plotter.xylables_v_dq0,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            ),
+                   PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']],
+                            callback=plotter.xylables_i_abc,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            ),
+                   PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                            callback=xylables_R,
+                            color=[['b', 'r', 'g']],
+                            style=[[None]]
+                            ),
+                   # PlotTmpl([[f'master.I_hat{i}' for i in 'abc'], [f'r_load.resistor{i}.i' for i in '123'], ],
+                   #         callback=lambda fig: plotter.update_axes(fig, title='Simulation',
+                   #                                                  ylabel='$i_{\mathrm{o estimate,abc}}\,/\,\mathrm{A}$'),
+                   #         color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                   #         style=[['-*'], ['--*']]
+                   #         ),
+                   # PlotTmpl([[f'master.m{i}' for i in 'dq0']],
+                   #         callback=lambda fig: plotter.update_axes(fig, title='Simulation',
+                   #                                                  ylabel='$m_{\mathrm{dq0}}\,/\,\mathrm{}$',
+                   #                                                  filename='Sim_m_dq0')
+                   #         ),
+                   PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
+                            callback=plotter.xylables_i_dq0,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            )
+               ],
+               log_level=logging.INFO,
+               viz_mode='episode',
+               max_episode_steps=max_episode_steps,
+               model_params={'lc.resistor1.R': R_filter,
+                             'lc.resistor2.R': R_filter,
+                             'lc.resistor3.R': R_filter,
+                             'lc.resistor4.R': 0.0000001,
+                             'lc.resistor5.R': 0.0000001,
+                             'lc.resistor6.R': 0.0000001,
+                             'lc.inductor1.L': L_filter,
+                             'lc.inductor2.L': L_filter,
+                             'lc.inductor3.L': L_filter,
+                             'lc.capacitor1.C': C_filter,
+                             'lc.capacitor2.C': C_filter,
+                             'lc.capacitor3.C': C_filter,
+                             # 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor1.R'),
+                             # 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor2.R'),
+                             # 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor3.R'),
+                             'r_load.resistor1.R': rand_load_train.random_load_step,
+                             'r_load.resistor2.R': rand_load_train.random_load_step,
+                             'r_load.resistor3.R': rand_load_train.random_load_step,
+                             'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                            high=v_nom) if t == -1 else None,
+                             'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                            high=v_nom) if t == -1 else None,
+                             'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                            high=v_nom) if t == -1 else None,
+                             'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                           high=i_nom) if t == -1 else None,
+                             'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                           high=i_nom) if t == -1 else None,
+                             'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                           high=i_nom) if t == -1 else None,
+                             },
+               net=net,
+               model_path='omg_grid/grid.paper_loadstepWIN.fmu',
+               history=FullHistory(),
+               on_episode_reset_callback=cb.fire,
+               action_time_delay=1 * undersample
+               )
+
+runner = MonteCarloRunner(agent, env)
+runner.run(num_episodes, n_mc=n_MC, visualise=True,  # prepare_mc_experiment=reset_loads,
+           return_gradient_extend=False)
+
+df_len = pd.DataFrame({'lengthscale': lengthscale,
+                       'bounds': bounds,
+                       'balanced_load': balanced_load,
+                       'barrier_param_mu': mu,
+                       'J_min': j_min})
+
+if save_results:
+    agent.history.df.to_csv(save_folder + '/_result.csv')
+    df_len.to_csv(save_folder + '/_params.csv')
+if not PIPI:
+    best_agent_plt = runner.run_data['last_agent_plt']
+    ax = best_agent_plt.axes[0]
+    ax.grid(which='both')
+    ax.set_axisbelow(True)
+
+    agent.params.reset()
+    ax.set_ylabel(r'$K_\mathrm{i}\,/\,\mathrm{(AV^{-1}s^{-1})}$')
+    ax.set_xlabel(r'$K_\mathrm{p}\,/\,\mathrm{(AV^{-1})}$')
+    ax.get_figure().axes[1].set_ylabel(r'$J$')
+    plt.title('Lengthscale = {}; balanced = '.format(lengthscale, balanced_load))
+    # ax.plot([0.01, 0.01], [0, 250], 'k')
+    # ax.plot([mutable_params['currentP'].val, mutable_params['currentP'].val], bounds[1], 'k-', zorder=1,
+    #         lw=4,
+    #         alpha=.5)
+    best_agent_plt.show()
+    if save_results:
+        best_agent_plt.savefig(save_folder + '/_agent_plt.pdf')
+        #best_agent_plt.savefig(save_folder + '/_agent_plt.pgf')
+        agent.history.df.to_csv(save_folder + '/_result.csv')
+
+print('\n Experiment finished with best set: \n\n {}'.format(agent.history.df.round({'J': 4, 'Params': 4})))
+print('\n Experiment finished with best set: \n')
+print('\n  Current-Ki&Kp and voltage-Ki&Kp = {}'.format(
+    agent.history.df.at[np.argmax(agent.history.df['J']), 'Params']))
+print('  Resulting in a performance of J = {}'.format(np.max(agent.history.df['J'])))
+print('\n\nBest experiment results are plotted in the following:')
diff --git a/experiments/hp_tune/examples/Stoch_load_test.py b/experiments/hp_tune/examples/Stoch_load_test.py
new file mode 100644
index 00000000..a1eaa3f4
--- /dev/null
+++ b/experiments/hp_tune/examples/Stoch_load_test.py
@@ -0,0 +1,46 @@
+from functools import partial
+
+import gym
+import matplotlib.pyplot as plt
+import pandas as pd
+from stochastic.processes import VasicekProcess
+
+from experiments.hp_tune.env.random_load import RandomLoad
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import RandProcess
+
+load = 55  # 28
+upper_bound_load = 200
+lower_bound_load = -10
+net = Network.load('net/net_vctrl_single_inv.yaml')
+max_episode_steps = 10000  # int(2 / net.ts)
+
+if __name__ == '__main__':
+    gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=1000, vol=200, mean=load), initial=load,
+                      bounds=(lower_bound_load, upper_bound_load))
+
+    rand_load = RandomLoad(max_episode_steps, net.ts, gen, bounds=(14, 200), bounds_std=(2, 0))
+
+    R_load = []
+    t_vec = []
+    t = 0
+
+    for ii in range(2000):
+        # if ii % 1000 == 0:
+        #    gen.reset()
+
+        R_load.append(rand_load.random_load_step(t))
+
+        t += net.ts
+
+        t_vec.append(t)
+
+    plt.plot(t_vec, R_load)
+    # plt.ylim([5,20])
+    plt.show()
+
+    df = pd.DataFrame(R_load)
+
+    hist = df.hist(bins=100)
+    plt.show()
diff --git a/experiments/hp_tune/examples/__init__.py b/experiments/hp_tune/examples/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/experiments/hp_tune/examples/action_noise_example.py b/experiments/hp_tune/examples/action_noise_example.py
new file mode 100644
index 00000000..74733cd1
--- /dev/null
+++ b/experiments/hp_tune/examples/action_noise_example.py
@@ -0,0 +1,51 @@
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+import numpy as np
+import matplotlib.pyplot as plt
+
+from experiments.hp_tune.util.action_noise_wrapper import myOrnsteinUhlenbeckActionNoise
+
+noise_var = 2.
+noise_theta = 25  # stiffness of OU
+
+action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(3), theta=noise_theta * np.ones(3),
+                                            sigma=noise_var * np.ones(3), dt=1e-4)
+
+action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=1000, sigma_min=np.zeros(3), mean=np.zeros(3),
+                                              theta=noise_theta * np.ones(3),
+                                              sigma=noise_var * np.ones(3), dt=1e-4)
+
+noise = np.zeros([3, 1000])
+noise2 = np.zeros([3, 1000])
+noise3 = np.zeros([3, 1000])
+
+for i in range(1000):
+    noise[:, i] = action_noise.__call__()
+
+action_noise.reset()  # does not reset the noise reduction! Reduction not per episode but per learing, since action noise
+# is redifiend then, no reset of annealing needed
+for i in range(1000):
+    noise2[:, i] = action_noise.__call__()
+
+action_noise3 = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=1000, sigma_min=np.zeros(3), mean=np.zeros(3),
+                                               theta=noise_theta * np.ones(3),
+                                               sigma=noise_var * np.ones(3), dt=1e-4)
+for i in range(1000):
+    noise3[:, i] = action_noise3.__call__()
+
+plt.plot(noise[0, :])
+plt.plot(noise[1, :])
+plt.plot(noise[2, :])
+plt.title(f'Stiffness theta = {noise_theta}, Varianz = {noise_var}')
+plt.show()
+
+plt.plot(noise2[0, :])
+plt.plot(noise2[1, :])
+plt.plot(noise2[2, :])
+plt.title(f'Stiffness theta = {noise_theta}, Varianz = {noise_var}')
+plt.show()
+
+plt.plot(noise3[0, :])
+plt.plot(noise3[1, :])
+plt.plot(noise3[2, :])
+plt.title(f'Stiffness theta = {noise_theta}, Varianz = {noise_var}')
+plt.show()
diff --git a/experiments/hp_tune/examples/critic_gamma_investigation.py b/experiments/hp_tune/examples/critic_gamma_investigation.py
new file mode 100644
index 00000000..4a7eb3b6
--- /dev/null
+++ b/experiments/hp_tune/examples/critic_gamma_investigation.py
@@ -0,0 +1,294 @@
+import time
+from typing import Union
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from pymongo import MongoClient
+from stable_baselines3 import DDPG
+from stable_baselines3.common.callbacks import BaseCallback, EveryNTimesteps
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.type_aliases import GymStepReturn
+
+from experiments.hp_tune.agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net, folder_name, max_episode_steps
+from experiments.hp_tune.util.record_env import RecordEnvCallback
+from openmodelica_microgrid_gym.env import PlotTmpl
+
+np.random.seed(0)
+
+# toDo: what to store:
+"""
+Alle importieren vom Recorder der in DB speichert und interagieren an den richtungen stellen mit dem env/agent...
+
+after training: -> like: SaveOnBestTrainingRewardCallback(BaseCallback): after training
+    hyperopt-data 
+    weights
+    model / net-architecture
+
+Each step: -> StepRecorder (ggf. StepMonitor?)
+    training_reward
+    messdaten? (aus der net.yaml die outs?)
+
+    training_return -> if episode done: store return(-> sollte der Monitor kennen)
+
+config    
+skriptname
+start- und endzeit stempel
+Computername
+Architektur des Netzes (mit model.to_json() )
+Gewichte des Netzes (mit model.get_layer('layer_name').weights)
+Prädiktion (für jede Zielgröße eine längere Liste)
+Testset (profilnummern von den messschrieben die prädiziert wurden)
+
+"""
+
+
+class Recorder:
+
+    def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ):
+        self.client = MongoClient(URI)
+        self.db = self.client[database_name]
+
+    def save_to_mongodb(self, col: str = ' trails', data=None):
+        trial_coll = self.db[col]
+        if data is None:
+            raise ValueError('No data given to store in database!')
+        trial_coll.insert_one(data)
+
+
+class StepRecorder(Monitor):
+
+    def __init__(self, env):
+        super().__init__(env)
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        observation, reward, done, info = super().step(action)
+        # print(reward)
+
+        # hier vll noch die Messung loggen? aus der obs die richtigen suchen? wie figure out die augmented states?
+
+        return observation, reward, done, info
+
+
+class TrainRecorder(BaseCallback):
+
+    def __init__(self, verbose=1):
+        super(TrainRecorder, self).__init__(verbose)
+
+    def _on_training_end(self) -> None:
+        """
+        This event is triggered before exiting the `learn()` method.
+        """
+        # asd = 1
+        # ads = 2
+        pass
+
+    def _on_step(self) -> bool:
+        # asd = 1
+        return True
+
+    def _on_rollout_end(self) -> None:
+        # asd = 1
+        pass
+
+
+def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, batch_size,
+                        actor_hidden_size, critic_hidden_size, noise_var, noise_theta, error_exponent, n_trail):
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent)
+
+    def xylables_v(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+        ax.grid(which='both')
+        # ax.set_xlim([0, 0.005])
+        ts = time.gmtime()
+        fig.savefig(
+            f'{folder_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+        plt.close()
+
+    def xylables_R(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+        ax.grid(which='both')
+        # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+        ts = time.gmtime()
+        fig.savefig(f'{folder_name}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+        plt.close()
+
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+                   reward_fun=rew.rew_fun,
+                   abort_reward=-(1 - gamma),
+                   viz_cols=[
+                       PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                                callback=xylables_v,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                callback=xylables_R,
+                                color=[['b', 'r', 'g']],
+                                style=[[None]]
+                                )
+                   ],
+                   # on_episode_reset_callback=cb.fire  # needed?
+                   )
+
+    env = StepRecorder(env)
+
+    n_actions = env.action_space.shape[-1]
+    noise_var = noise_var  # 20#0.2
+    noise_theta = noise_theta  # 50 # stiffness of OU
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size,
+                                                                                                  critic_hidden_size,
+                                                                                                  critic_hidden_size]))
+
+    callback = TrainRecorder()
+
+    # instead of new agent load trained one and train on
+    # toDo: load optuna data from model
+
+    model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/',
+                   policy_kwargs=policy_kwargs,
+                   learning_rate=learning_rate, buffer_size=5000, learning_starts=100,
+                   batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise,
+                   train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False,
+                   create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    model.actor.mu._modules['0'].weight.data = model.actor.mu._modules['0'].weight.data * weight_scale
+    model.actor.mu._modules['2'].weight.data = model.actor.mu._modules['2'].weight.data * weight_scale
+    model.actor_target.mu._modules['0'].weight.data = model.actor_target.mu._modules['0'].weight.data * weight_scale
+    model.actor_target.mu._modules['2'].weight.data = model.actor_target.mu._modules['2'].weight.data * weight_scale
+
+    # todo: instead /here? store reward per step?!
+    plot_callback = EveryNTimesteps(n_steps=1000, callback=RecordEnvCallback(env, model, max_episode_steps))
+    model.learn(total_timesteps=10000, callback=[callback, plot_callback])
+    # model.learn(total_timesteps=1000, callback=callback)
+
+    plt.plot(model.critic_loss_batch_mean)
+    plt.ylabel('Critic_loss (mean per batch)')
+    plt.xlabel('Training-step')
+    plt.title(f'gamma = {gamma}')
+    plt.show()
+
+    plt.plot(model.actor_loss_batch_mean)
+    plt.ylabel('Actor_loss (mean per batch)')
+    plt.xlabel('Training-step')
+    plt.title(f'gamma = {gamma}')
+    plt.show()
+
+    plt.plot(model.critic_estimate_target_diff_mean)
+    plt.ylabel('sum(Q_estimat - target)/N_batch_size')
+    plt.xlabel('Training-step')
+    plt.title(f'gamma = {gamma}')
+    plt.show()
+
+    plt.plot(model.current_q_estimates_batch_mean)
+    plt.ylabel('Q_estimat  (mean per batch)')
+    plt.xlabel('Training-step')
+    plt.title(f'gamma = {gamma}')
+    plt.show()
+
+    plt.plot(model.target_q_batch_mean)
+    plt.ylabel('target (mean per batch)')
+    plt.xlabel('Training-step')
+    plt.title(f'gamma = {gamma}')
+    plt.show()
+
+    plt.plot(model.reward_batch_mean)
+    plt.ylabel('reward (mean per batch)')
+    plt.xlabel('Training-step')
+    plt.title(f'gamma = {gamma}')
+    plt.show()
+
+    monitor_rewards = env.get_episode_rewards()
+    print(monitor_rewards)
+    # todo: instead: store model(/weights+bias?) to database
+    model.save(f'{folder_name}/{n_trail}/model.zip')
+
+    return_sum = 0.0
+
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                        reward_fun=rew.rew_fun,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        viz_cols=[
+                            PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                                     callback=xylables_v,
+                                     color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                     style=[[None], ['--']]
+                                     ),
+                            PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                     callback=xylables_R,
+                                     color=[['b', 'r', 'g']],
+                                     style=[[None]]
+                                     )
+                        ],
+                        )
+
+    obs = env_test.reset()
+
+    # toDo: - Use other Test-episode
+    #       - Rückgabewert = (Summe der üblichen Rewards) / (Anzahl steps Validierung) + (Penalty i.H.v. -1)
+    while True:
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        # print(rewards)
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+
+learning_rate = 0.00018  # trail.suggest_loguniform("lr", 1e-5, 5e-3)  # 0.0002#
+gamma = 0  # trail.suggest_loguniform("gamma", 0.1, 0.99)
+weight_scale = 0.1  # trail.suggest_loguniform("weight_scale", 5e-4, 1)  # 0.005
+batch_size = 150  # trail.suggest_int("batch_size", 32, 1024)  # 128
+actor_hidden_size = 100  # trail.suggest_int("actor_hidden_size", 10, 500)  # 100  # Using LeakyReLU
+critic_hidden_size = 100  # trail.suggest_int("actor_hidden_size", 10, 500)  # 100
+n_trail = str(0)
+use_gamma_in_rew = 0
+noise_var = 1  # trail.suggest_loguniform("noise_var", 0.01, 10)  # 2
+noise_theta = 25  # trail.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+
+# toDo:
+error_exponent = 0.15  # trail.suggest_loguniform("error_exponent", 0.01, 4)
+# alpha_lRelu = trail.suggest_loguniform("alpha_lRelu", 0.0001, 0.5)  #0.1
+# memory_interval = 1
+# weigth_regularizer = 0.5
+# memory_lim = 5000  # = buffersize?
+# warm_up_steps_actor = 2048
+# warm_up_steps_critic = 1024  # learning starts?
+# target_model_update = 1000
+
+
+episode_return = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, batch_size,
+                                     actor_hidden_size, critic_hidden_size, noise_var, noise_theta, error_exponent,
+                                     n_trail)
+
+print(episode_return)
diff --git a/experiments/hp_tune/examples/ddpg_testcase_eval.py b/experiments/hp_tune/examples/ddpg_testcase_eval.py
new file mode 100644
index 00000000..d9b70bb5
--- /dev/null
+++ b/experiments/hp_tune/examples/ddpg_testcase_eval.py
@@ -0,0 +1,285 @@
+import platform
+import time
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+from tqdm import tqdm
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+import pandas as pd
+
+# np.random.seed(0)
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+num_average = 25
+max_episode_steps_list = [1000, 5000, 10000, 20000, 50000, 100000]
+
+result_list = []
+ret_list = []
+mean_list = []
+std_list = []
+ret_array = np.zeros(num_average)
+
+df = pd.DataFrame()
+ret_dict = dict()
+
+# def run_testcase_DDPG(gamma=0.8003175741091463, integrator_weight=0.6618979905182214,
+#                  antiwindup_weight=0.9197062574269099,
+#                  model_path='experiments/hp_tune/trained_models/study_18_run_6462/',
+#                  error_exponent=0.3663140388100587, use_gamma_in_rew=1, n_trail=50000,
+#                      actor_number_layers=2, critic_number_layers=3,
+#                      alpha_relu_actor=0.04768952563400553,
+#                      alpha_relu_critic=0.00019026593928712137
+#                      ):
+gamma = 0.8003175741091463
+integrator_weight = 0.6618979905182214
+antiwindup_weight = 0.9197062574269099
+model_path = 'experiments/hp_tune/trained_models/study_18_run_6462_new/'
+error_exponent = 0.3663140388100587
+use_gamma_in_rew = 1
+n_trail = 50000
+actor_number_layers = 2
+critic_number_layers = 3
+alpha_relu_actor = 0.04768952563400553
+alpha_relu_critic = 0.00019026593928712137
+
+for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+    for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False):
+
+        rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                     use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent,
+                     i_lim=net['inverter1'].i_lim,
+                     i_nom=net['inverter1'].i_nom)
+
+        ####### Run Test #########
+        return_sum = 0.0
+        rew.gamma = 0
+        # episodes will not abort, if limit is exceeded reward = -1
+        rew.det_run = True
+        rew.exponent = 0.5  # 1
+        limit_exceeded_in_test = False
+        limit_exceeded_penalty = 0
+        env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1',
+                            reward_fun=rew.rew_fun_dq0,
+                            abort_reward=-1,  # no needed if in rew no None is given back
+                            # on_episode_reset_callback=cb.fire  # needed?
+                            obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                        'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                        'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'],
+                            max_episode_steps=max_episode_steps_list[max_eps_steps]
+                            )
+        env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                                  recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                  gamma=1, penalty_I_weight=0, penalty_P_weight=0)
+        # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+
+        env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+        model = DDPG.load(model_path + f'model.zip')  # , env=env_test)
+
+        count = 0
+        for kk in range(actor_number_layers + 1):
+
+            if kk < actor_number_layers:
+                model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+                model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+            count = count + 2
+
+        count = 0
+
+        for kk in range(critic_number_layers + 1):
+
+            if kk < critic_number_layers:
+                model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+                model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+            count = count + 2
+
+        env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+        obs = env_test.reset()
+        phase_list = []
+        phase_list.append(env_test.env.net.components[0].phase)
+
+        rew_list = []
+        aP0 = []
+        aP1 = []
+        aP2 = []
+        aI0 = []
+        aI1 = []
+        aI2 = []
+        integrator_sum0 = []
+        integrator_sum1 = []
+        integrator_sum2 = []
+        v_d = []
+        v_q = []
+        v_0 = []
+
+        for step in range(env_test.max_episode_steps):
+            action, _states = model.predict(obs, deterministic=True)
+            obs, rewards, done, info = env_test.step(action)
+            phase_list.append(env_test.env.net.components[0].phase)
+            aP0.append(np.float64(action[0]))
+            aP1.append(np.float64(action[1]))
+            aP2.append(np.float64(action[2]))
+            aI0.append(np.float64(action[3]))
+            aI1.append(np.float64(action[4]))
+            aI2.append(np.float64(action[5]))
+            integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+            integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+            integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+            v_a = env_test.history.df['lc.capacitor1.v'].iloc[-1]
+            v_b = env_test.history.df['lc.capacitor2.v'].iloc[-1]
+            v_c = env_test.history.df['lc.capacitor3.v'].iloc[-1]
+
+            v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), env_test.env.net.components[0].phase)
+
+            v_d.append(v_dq0[0])
+            v_q.append(v_dq0[1])
+            v_0.append(v_dq0[2])
+
+            if rewards == -1 and not limit_exceeded_in_test:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test = True
+                limit_exceeded_penalty = -1
+            env_test.render()
+            return_sum += rewards
+            rew_list.append(rewards)
+
+            if step % 1000 == 0 and step != 0:
+                env_test.close()
+
+                obs = env_test.reset()
+
+            # print(rewards)
+            if done:
+                env_test.close()
+                # print(limit_exceeded_in_test)
+                break
+
+        ts = time.gmtime()
+        test_after_training = {"Name": "Test",
+                               "time": ts,
+                               "Reward": rew_list,
+                               "ActionP0": aP0,
+                               "ActionP1": aP1,
+                               "ActionP2": aP2,
+                               "ActionI0": aI0,
+                               "ActionI1": aI1,
+                               "ActionI2": aI2,
+                               "integrator_sum0": integrator_sum0,
+                               "integrator_sum1": integrator_sum1,
+                               "integrator_sum2": integrator_sum2,
+                               "Phase": phase_list,
+                               "Node": platform.uname().node,
+                               "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                               "Reward function": 'rew.rew_fun_dq0',
+                               "Trial number": n_trail,
+                               "Database name": folder_name,
+                               "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase),"
+                                       "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; "
+                                       "Reward = MRE, PI-Approch using AntiWindUp"
+                                       "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+        # Add v-&i-measurements
+        test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+            env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                    })
+        test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+            env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                    })
+
+        # mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+        plt.plot(v_d)
+        plt.plot(v_q)
+        plt.plot(v_0)
+        plt.xlabel("")
+        plt.ylabel("v_dq0")
+        plt.title('Test')
+        plt.show()
+
+        # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+        ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty))
+        ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+        # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+        # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list)
+        # temp_dict = dict(zipped)
+    temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list}
+    result_list.append(temp_dict)
+    # ret_dict.append(zipped)
+    # df = df.append(ret_dict)
+
+    mean_list.append(np.mean(ret_array))
+    std_list.append(np.std(ret_array))
+
+# df = df.append(temp_list, True)
+print(mean_list)
+print(std_list)
+print(result_list)
+
+results = {
+    'Mean': mean_list,
+    'Std': std_list,
+    'All results': result_list,
+    'max_episode_steps_list': max_episode_steps_list
+}
+
+df = pd.DataFrame(results)
+df.to_pickle("DDPG_study18_best_test_varianz.pkl")
+asd = 1
+
+m = np.array(df['Mean'])
+s = np.array(df['Std'])
+max_episode_steps_list = np.array(df['max_episode_steps_list'])
+
+plt.plot(max_episode_steps_list, m)
+plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('DDPG')
+plt.show()
+
+# plt.plot(max_episode_steps_list, m)
+# plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r')
+plt.errorbar(max_episode_steps_list, m, s, fmt='-o')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('DDPG')
+plt.show()
+
+plt.plot(max_episode_steps_list, s)
+plt.ylabel('std')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title('DDPG')
+plt.show()
diff --git a/experiments/hp_tune/examples/experiment_vctrl_single_inv_optuna_standalone.py b/experiments/hp_tune/examples/experiment_vctrl_single_inv_optuna_standalone.py
new file mode 100644
index 00000000..e89b9e2c
--- /dev/null
+++ b/experiments/hp_tune/examples/experiment_vctrl_single_inv_optuna_standalone.py
@@ -0,0 +1,551 @@
+import itertools
+import time
+from typing import Union
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import optuna
+import torch as th
+from stable_baselines3 import DDPG
+from stable_baselines3.common.callbacks import EveryNTimesteps
+from stable_baselines3.common.monitor import Monitor
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+from stable_baselines3.common.type_aliases import GymStepReturn
+
+from experiments.hp_tune.agents.my_ddpg import myDDPG
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net, folder_name
+from experiments.hp_tune.util.action_noise_wrapper import myOrnsteinUhlenbeckActionNoise
+from experiments.hp_tune.util.record_env import RecordEnvCallback
+from experiments.hp_tune.util.recorder import Recorder
+from experiments.hp_tune.util.training_recorder import TrainRecorder
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc
+
+np.random.seed(0)
+
+print('!!!!!!!!!!!!!')
+print('Old examplefile for standalone!')
+print('Still needs to be refactored using config -> is_dq0,...')
+print('Better use hp_tune + sqlite if local wanted')
+print('Here some examples for features in abc....')
+
+number_learning_steps1 = 500000
+number_plotting_steps = 100000
+number_trails = 10
+
+params_change = []
+
+mongo_recorder = Recorder(database_name=folder_name)
+
+
+class FeatureWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = np.inf, recorder=None,
+                 n_trail=""):
+        """
+        Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in
+        case of an continuing (non-episodic) task to reset the environment without being terminated by done
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+        :
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_a = []
+        self.i_b = []
+        self.i_c = []
+        self.v_a = []
+        self.v_b = []
+        self.v_c = []
+        self.phase = []
+        self._v_pahsor = 0.0
+        self.n_episode = 0
+        self.R_training = []
+        self.i_phasor_training = []
+        self.v_phasor_training = []
+        self.reward_episode_mean = []
+        self.n_trail = n_trail
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+
+        #action_abc = dq0_to_abc(action, self.env.net.components[0].phase)
+
+        # clipping?
+        obs, reward, done, info = super().step(action)
+
+        self._n_training_steps += 1
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+
+        # log measurement here?
+
+        # add wanted features here (add appropriate self.observation in init!!)
+        # calculate magnitude of current phasor abc
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        # todo: delta (ref-mess), letzte aktion, beides, delta i_phasor zur stromgrenze
+
+        self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+        self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+        self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+        self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+        self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+        self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+        self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+        self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+        self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+        self.phase.append(self.env.net.components[0].phase)
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            episode_data = {"Name": "On_Training",
+                            "Episode_number": self.n_episode,
+                            "Episode_length": self._n_training_steps,
+                            "R_load_training": self.R_training,
+                            "i_phasor_training": self.i_phasor_training,
+                            "i_a_training": self.i_a,
+                            "i_b_training": self.i_b,
+                            "i_c_training": self.i_c,
+                            "v_a_training": self.v_a,
+                            "v_b_training": self.v_b,
+                            "v_c_training": self.v_c,
+                            "v_phasor_training": self.v_phasor_training,
+                            "Rewards": self.rewards,
+                            "Phase": self.phase
+                            }
+
+            """
+            add here "model_params_change": callback.params_change, from training_recorder?
+            """
+
+            mongo_recorder.save_to_mongodb('Trail_number_' + self.n_trail, episode_data)
+
+            # clear lists
+            self.R_training = []
+            self.i_phasor_training = []
+            self.v_phasor_training = []
+            self.i_a = []
+            self.i_b = []
+            self.i_c = []
+            self.v_a = []
+            self.v_b = []
+            self.v_c = []
+            self.phase = []
+            self.n_episode += 1
+
+        # if setpoint in dq: Transform measurement to dq0!!!!
+        #obs[3:6] = dq0_to_abc(obs[3:6], self.env.net.components[0].phase)
+        #obs[0:3] = dq0_to_abc(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Feature control error: v_setpoint - v_mess
+        """
+        error = obs[6:9] - obs[3:6]
+
+        """
+        Feature delta to current limit
+        """
+        delta_i_lim_i_phasor = 1 - self.i_phasor
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        obs = np.append(obs, delta_i_lim_i_phasor)
+
+
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_action)
+
+        # add sin/cos of phase to obs
+        obs = np.append(obs, 0.1*np.sin(self.env.net.components[0].phase))
+        obs = np.append(obs, 0.1*np.cos(self.env.net.components[0].phase))
+        obs = np.append(obs, (self.env.net.components[0].phase) / (2 * np.pi))
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+        obs = super().reset()
+        self._n_training_steps = 0
+
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+        self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+        self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+        self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+        self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+        self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+        self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+        self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+        self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+        self.phase.append(self.env.net.components[0].phase)
+
+        # if setpoint in dq: Transform measurement to dq0!!!!
+        obs[3:6] = dq0_to_abc(obs[3:6], self.env.net.components[0].phase)
+        obs[0:3] = dq0_to_abc(obs[0:3], self.env.net.components[0].phase)
+        """
+        Feature control error: v_setpoint - v_mess
+        """
+        error = obs[6:9] - obs[3:6]
+
+        """
+        Feature delta to current limit
+        """
+        delta_i_lim_i_phasor = 1 - self.i_phasor
+
+        obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        obs = np.append(obs, delta_i_lim_i_phasor)
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_action)
+
+        # add sin/cos of phase to obs
+        obs = np.append(obs, 0.1 * np.sin(self.env.net.components[0].phase))
+        obs = np.append(obs, 0.1 * np.cos(self.env.net.components[0].phase))
+        obs = np.append(obs, (self.env.net.components[0].phase)/(2*np.pi))
+
+        return obs
+
+    def cal_phasor_magnitude(self, abc: np.array) -> float:
+        """
+        Calculated the magnitude of a phasor in a three phase system. M
+
+        :param abc: Due to limit normed currents or voltages in abc frame
+        :return: magnitude of the current or voltage phasor
+        """
+        # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)|
+        i_alpha_beta = abc_to_alpha_beta(abc)
+        i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2)
+
+        return i_phasor_mag
+
+
+def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor,
+                        batch_size,
+                        actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                        alpha_relu_critic,
+                        noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                        training_episode_length, buffer_size,
+                        learning_starts, tau, number_learning_steps, activation_function, n_trail):
+
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    def xylables_v(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+        ax.grid(which='both')
+        # ax.set_xlim([0, 0.005])
+        ts = time.gmtime()
+        fig.savefig(
+            f'{folder_name}/{n_trail}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+        plt.close()
+
+    def xylables_i(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+        ax.grid(which='both')
+        ts = time.gmtime()
+        fig.savefig(
+            f'{folder_name}/{n_trail}/Inductor_currents{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+        plt.close()
+
+    def xylables_R(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+        ax.grid(which='both')
+        # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+        ts = time.gmtime()
+        fig.savefig(f'{folder_name}/{n_trail}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+        plt.close()
+
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+                   reward_fun=rew.rew_fun_include_current,
+                   # reward_fun=rew.rew_fun_dq0,
+                   abort_reward=-1,
+                   viz_cols=[
+                       PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                                callback=xylables_v,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                                callback=xylables_i,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                callback=xylables_R,
+                                color=[['b', 'r', 'g']],
+                                style=[[None]]
+                                )
+                   ],
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                   )
+
+    env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                         recorder=mongo_recorder, n_trail=n_trail)
+
+    n_actions = env.action_space.shape[-1]
+    noise_var = noise_var  # 20#0.2
+    noise_theta = noise_theta  # 50 # stiffness of OU
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing,
+    #                                              sigma_min=noise_var * np.ones(n_actions) * noise_var_min,
+    #                                              mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+    #                                              sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    if activation_function == "LeakyReLU":
+        policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                          , qf=[
+                                                                                   critic_hidden_size] * critic_number_layers))
+
+    if activation_function == "Tanh":
+        policy_kwargs = dict(activation_fn=th.nn.Tanh, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                     , qf=[critic_hidden_size] * critic_number_layers))
+
+    callback = TrainRecorder()
+
+    # model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/',
+    model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/',
+                   policy_kwargs=policy_kwargs,
+                   learning_rate=learning_rate, buffer_size=buffer_size,
+                   learning_starts=int(learning_starts * training_episode_length),
+                   batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise,
+                   train_freq=(1, "episode"), gradient_steps=- 1,
+                   optimize_memory_usage=False,
+                   create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    count = 0
+
+    for kk in range(actor_number_layers + 1):
+
+        model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale
+        model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[
+                                                                     str(count)].weight.data * weight_scale
+
+        model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale
+        model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[
+                                                                   str(count)].bias.data * bias_scale
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    # Plotting tests during Training (no training in here!)
+    plot_callback = EveryNTimesteps(n_steps=number_plotting_steps,
+                                    callback=RecordEnvCallback(env, model, 1000, mongo_recorder,
+                                                               n_trail))
+    model.learn(total_timesteps=number_learning_steps, callback=[callback, plot_callback])
+
+    train_data = {"Name": "After_Training",
+                  "Mean_eps_reward": env.reward_episode_mean,
+                  "Sum_eps_reward": env.get_episode_rewards()
+                  }
+
+    mongo_recorder.save_to_mongodb('Trail_number_' + n_trail, train_data)
+
+    model.save(f'{folder_name}/{n_trail}/model.zip')
+
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                        reward_fun=rew.rew_fun_include_current,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        viz_cols=[
+                            PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                                     callback=xylables_v,
+                                     color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                     style=[[None], ['--']]
+                                     ),
+                            PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                                     callback=xylables_i,
+                                     color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                     style=[[None], ['--']]
+                                     ),
+                            PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                     callback=xylables_R,
+                                     color=[['b', 'r', 'g']],
+                                     style=[[None]]
+                                     )],
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        )
+    env_test = FeatureWrapper(env_test, number_of_features=11)
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+
+    while True:
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        phase_list.append(env_test.env.net.components[0].phase)
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "Phase": env_test.phase}
+
+    # Add v-&i-measurements
+    test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                })
+
+    test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                })
+
+    mongo_recorder.save_to_mongodb('Trail_number_' + n_trail, test_after_training)
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+
+def objective(trail):
+    number_learning_steps = number_learning_steps1  # trail.suggest_int("number_learning_steps", 1000, 1000000)
+
+    learning_rate = trail.suggest_loguniform("lr", 1e-9, 1e-3)  # 0.0002#
+    gamma = trail.suggest_loguniform("gamma", 0.6, 0.99)
+    weight_scale = 0.05  # trail.suggest_loguniform("weight_scale", 5e-4, 0.1)  # 0.005
+
+    bias_scale = 0.05  # trail.suggest_loguniform("bias_scale", 5e-4, 0.1)  # 0.005
+    alpha_relu_actor = 0.1  # trail.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5)  # 0.005
+    alpha_relu_critic = 0.1  # trail.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5)  # 0.005
+
+    batch_size = 1024  # trail.suggest_int("batch_size", 32, 1024)  # 128
+    buffer_size = int(1e6)  # trail.suggest_int("buffer_size", 10, 20000)  # 128
+
+    activation_function = trail.suggest_categorical("activation_functions", ["LeakyReLU", "Tanh"])
+    # activation_function = trail.suggest_categorical('activation_functions', ['linear', 'poly', 'rbf'])
+
+    actor_hidden_size = trail.suggest_int("actor_hidden_size", 10, 500)  # 100  # Using LeakyReLU
+    actor_number_layers = trail.suggest_int("actor_number_layers", 1, 3)
+
+    critic_hidden_size = trail.suggest_int("critic_hidden_size", 10, 600)  # 100
+    critic_number_layers = trail.suggest_int("critic_number_layers", 1, 4)
+
+    n_trail = str(trail.number)
+    use_gamma_in_rew = 1
+    noise_var = trail.suggest_loguniform("noise_var", 0.01, 4)  # 2
+    # min var, action noise is reduced to (depends on noise_var)
+    noise_var_min = trail.suggest_loguniform("noise_var_min", 0.0000001, 2)
+    # min var, action noise is reduced to (depends on training_episode_length)
+    noise_steps_annealing = trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+     number_learning_steps)
+    noise_theta = trail.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+    error_exponent = 0.5  # trail.suggest_loguniform("error_exponent", 0.01, 0.5)
+
+    training_episode_length = 2000  # trail.suggest_int("training_episode_length", 200, 5000)  # 128
+    learning_starts = 0.32  # trail.suggest_loguniform("learning_starts", 0.1, 2)  # 128
+    tau = 0.005  # trail.suggest_loguniform("tau", 0.0001, 0.2)  # 2
+
+    trail_config_mongo = {"Name": "Config"}
+    trail_config_mongo.update(trail.params)
+    mongo_recorder.save_to_mongodb('Trail_number_' + n_trail, trail_config_mongo)
+
+    return experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor,
+                               batch_size,
+                               actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                               alpha_relu_critic,
+                               noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                               training_episode_length, buffer_size,
+                               learning_starts, tau, number_learning_steps, activation_function, n_trail)
+
+
+# for gamma grid search:
+# gamma_list = list(itertools.chain(*[[0.001]*5, [0.25]*5, [0.5]*5, [0.75]*5, [0.99]*5]))
+# search_space = {'gamma': gamma_list}
+
+# number_learning_steps_list = list(itertools.chain(*[[100000] * 3, [300000] * 3, [600000] * 3, [1000000] * 3]))
+# number_learning_steps_list = list(itertools.chain(*[[2000] * 3, [30000] * 3, [60000] * 3, [100000] * 3]))
+# search_space = {'number_learning_steps': number_learning_steps_list}
+
+# toDo: postgresql instead of sqlite
+study = optuna.create_study(study_name=folder_name,
+                            direction='maximize',
+                            storage=f'sqlite:///optuna_sqlite.sqlite',
+                            load_if_exists=True,
+                            # sampler=optuna.samplers.GridSampler(search_space)
+                            )
+
+study.optimize(objective, n_trials=number_trails, n_jobs=1)
diff --git a/experiments/hp_tune/examples/generate_testcases.py b/experiments/hp_tune/examples/generate_testcases.py
new file mode 100644
index 00000000..4267bd07
--- /dev/null
+++ b/experiments/hp_tune/examples/generate_testcases.py
@@ -0,0 +1,280 @@
+from functools import partial
+import numpy as np
+import pandas as pd
+import gym
+import matplotlib.pyplot as plt
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+
+from experiments.hp_tune.env.random_load import RandomLoad
+from experiments.hp_tune.env.vctrl_single_inv import CallbackList
+from experiments.hp_tune.util.config import cfg
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import RandProcess
+
+# load = 55  # 28
+# net = Network.load('net/net_vctrl_single_inv.yaml')
+# max_episode_steps = int(2 / net.ts)
+
+
+# Simulation definitions
+if not cfg['is_dq0']:
+    # load net using abc reference values
+    net = Network.load('net/net_vctrl_single_inv.yaml')
+else:
+    # load net using dq0 reference values
+    net = Network.load('net/net_vctrl_single_inv_dq0.yaml')
+
+# set high to not terminate env! Termination should be done in wrapper by env after episode-length-HP
+max_episode_steps = 10000  # net.max_episode_steps  # number of simulation steps per episode
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+# R = 40  # nomVoltPeak / 7.5   # / Ohm
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                  bounds=(lower_bound_load, upper_bound_load))
+
+"""
+ Tescases need to have:
+  - Full load
+  - (nearly) No load
+  - Step up/down
+  - Drift up/down
+1 second, start at nominal power
+"""
+time_to_nomPower = 0.1
+time_nomPower_drift = 0.32
+time_loadshading = 0.587
+time_power_ramp_up = 0.741
+time_power_ramp_down = 0.985
+time_power_Ramp_stop = 1.3
+time_drift_down2 = 1.52
+time_step_up2 = 1.66
+time_drift_down3 = 1.72
+
+R_load = []
+
+
+def load_step_deterministic(t):
+    if -2 < t <= 0.1:
+        return 100.0
+    if 0.1 < t <= 0.2:
+        return 50.0
+    if 0.2 < t <= 0.3:
+        return 100.0
+    if 0.3 < t <= 0.4:
+        return 50.0
+    if 0.4 < t <= 0.5:
+        return 200.0
+    if 0.5 < t <= 0.6:
+        return 50.0
+    if 0.7 < t <= 0.7:
+        return 14.0
+    if 0.7 < t <= 0.8:
+        return 200.0
+    else:
+        return 14
+
+
+def load_step(t):
+    """
+    Doubles the load parameters
+    :param t:
+    :param gain: device parameter
+    :return: Dictionary with load parameters
+    """
+    # Defines a load step after 0.01 s
+    if time_to_nomPower < t <= time_to_nomPower + net.ts:
+        # step to p_nom
+        gen.proc.mean = 14
+        gen.reserve = 14
+
+    elif time_nomPower_drift < t <= time_nomPower_drift + net.ts:
+        # drift
+        gen.proc.mean = 40
+        gen.proc.speed = 40
+        # gen.reserve = 40
+
+
+    elif time_loadshading < t <= time_loadshading + net.ts:
+        # loadshading
+        gen.proc.mean = upper_bound_load
+        gen.reserve = upper_bound_load
+        gen.proc.vol = 25
+
+    elif time_power_ramp_up < t <= time_power_ramp_up + net.ts:
+        # drift
+        gen.proc.mean = 80
+        gen.proc.speed = 10
+        # gen.reserve = 40
+
+
+    elif time_power_ramp_down < t <= time_power_ramp_down + net.ts:
+        gen.proc.mean = 30
+        gen.proc.speed = 80
+        gen.proc.vol = 10
+        # gen.reserve = 40
+
+    elif time_power_Ramp_stop < t <= time_power_Ramp_stop + net.ts:
+        gen.proc.mean = 30
+        gen.proc.speed = 1000
+        gen.proc.vol = 100
+        # gen.reserve = 40
+
+    elif time_drift_down2 < t <= time_drift_down2 + net.ts:
+        gen.proc.mean = 100
+        gen.proc.speed = 100
+        # gen.reserve = 40
+
+    elif time_step_up2 < t <= time_step_up2 + net.ts:
+        gen.proc.mean = 20
+        gen.proc.speed = 1000
+        gen.reserve = 20
+
+    elif time_drift_down3 < t <= time_drift_down3 + net.ts:
+        gen.proc.mean = 50
+        gen.proc.speed = 60
+        gen.proc.vol = 2
+        # gen.reserve = 40
+
+    R_load_sample = gen.sample(t)
+    R_load.append(R_load_sample)
+
+    return R_load_sample
+
+
+R_L_dessca = [0.9383603849247186, 0.01370747099315972, 0.436663566297538, 0.2261261999434656, 0.6485002895059251,
+              0.11839290006977787, 0.5463985295511345, 0.7530789892142805, 0.32964132905168747, 0.9944504372633558,
+              0.49206163189268537, 0.8079144275290111, 0.3794638365771582, 0.17277441320360834, 0.0658824263134536,
+              0.7017633533405172, 0.2733925217683726, 0.5982364701739138, 0.9008471432389613, 0.0034681769531965667,
+              0.9875802744573191]
+
+
+class Load_runner():
+    def __init__(self):
+        self.count = 0
+        self.cc = 0
+
+    def load_step_dessca(self, t):
+        self.cc += 1
+
+        if self.cc % 500 == 0:
+            self.count += 1
+
+        return R_L_dessca[self.count] * (200 - 14) + 14
+
+    def give_val(self, t):
+        return R_L_dessca[self.count] * (200 - 14) + 14
+
+
+Load_runner_dessca = Load_runner()
+
+if __name__ == '__main__':
+    # gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=1000, vol=10, mean=load), initial=load,
+    #                  bounds=(lower_bound_load, upper_bound_load))
+
+    # rand_load = RandomLoad(max_episode_steps, net.ts, gen)
+
+    rand_load = RandomLoad(round(cfg['train_episode_length'] / 10), net.ts, gen,
+                           bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                           bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+
+    rand_load_train = RandomLoad(cfg['train_episode_length'], net.ts, gen,
+                                 bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                                 bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+
+    cb = CallbackList()
+    # set initial = None to reset load random in range of bounds
+    cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+    cb.append(rand_load_train.reset)
+
+
+    def xylables(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$R_{\mathrm{load}}\,/\,\mathrm{\Omega}$')
+        ax.grid(which='both')
+        ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+        # plt.title('Load example drawn from Ornstein-Uhlenbeck process \n- Clipping outside the shown y-range')
+        plt.legend()
+        fig.show()
+
+
+    env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                   net=net,
+                   # model_params={'r_load.resistor1.R': rand_load.random_load_step,  # For use upper function
+                   # model_params={'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode,
+                   # model_params={'r_load.resistor1.R': rand_load_train.random_load_step,
+                   #              # For use upper function
+                   #              'r_load.resistor2.R': rand_load.clipped_step,
+                   #              'r_load.resistor3.R': rand_load.clipped_step},
+                   # model_params={'r_load.resistor1.R': load_step_deterministic,  # for check train-random
+                   #              'r_load.resistor2.R': load_step_deterministic,  # loadstep
+                   #              'r_load.resistor3.R': load_step_deterministic},
+                   # model_params={'r_load.resistor1.R': 25,  # for check train-random
+                   #              'r_load.resistor2.R': 25,  # loadstep
+                   #              'r_load.resistor3.R': 25},
+                   model_params={'r_load.resistor1.R': Load_runner_dessca.load_step_dessca,  # for check train-random
+                                 'r_load.resistor2.R': Load_runner_dessca.give_val,  # loadstep
+                                 'r_load.resistor3.R': Load_runner_dessca.give_val},
+                   viz_cols=[
+                       PlotTmpl([f'r_load.resistor{i}.R' for i in '123'],
+                                callback=xylables
+                                )],
+                   model_path='omg_grid/grid.paper_loadstep.fmu',
+                   max_episode_steps=max_episode_steps,
+                   on_episode_reset_callback=cb.fire, )
+
+    env.reset()
+    R_load1 = []
+    R_load2 = []
+    R_load3 = []
+    # for _ in range(max_episode_steps):
+    for current_step in tqdm(range(max_episode_steps), desc='steps', unit='step', leave=False):
+        env.render()
+
+        obs, rew, done, info = env.step(env.action_space.sample())  # take a random action
+
+        # If env is reset for several loadsteps, store env.df
+        """
+        if current_step % round(cfg['train_episode_length'] / 10) == 0 and current_step != 0:
+            R_load1.extend(env.history.df['r_load.resistor1.R'].copy().values.tolist())
+            R_load2.extend(env.history.df['r_load.resistor2.R'].copy().values.tolist())
+            R_load3.extend(env.history.df['r_load.resistor3.R'].copy().values.tolist())
+
+            # obs = env.reset()
+            env.on_episode_reset_callback()
+        """
+        if done:
+            break
+    env.close()
+    R_load1.extend(env.history.df['r_load.resistor1.R'].copy().values.tolist())
+    R_load2.extend(env.history.df['r_load.resistor2.R'].copy().values.tolist())
+    R_load3.extend(env.history.df['r_load.resistor3.R'].copy().values.tolist())
+
+    df_store = pd.DataFrame(list(zip(R_load1, R_load2, R_load3)),
+                            columns=['r_load.resistor1.R', 'r_load.resistor2.R', 'r_load.resistor3.R'])
+
+    # df_store = env.history.df[['r_load.resistor1.R', 'r_load.resistor2.R', 'r_load.resistor3.R']]
+    # df_store.to_pickle('R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl')
+    # df_store.to_pickle('R_load_deterministic_test_case_25_ohm_1_seconds.pkl')
+    df_store.to_pickle('R_load_dess'
+                       ',322'
+                       ',23,,'
+                       'ca.pkl')
diff --git a/experiments/hp_tune/examples/reset_loop.py b/experiments/hp_tune/examples/reset_loop.py
new file mode 100644
index 00000000..88560916
--- /dev/null
+++ b/experiments/hp_tune/examples/reset_loop.py
@@ -0,0 +1,238 @@
+import time
+from typing import Union
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+from pymongo import MongoClient
+from stable_baselines3.common.callbacks import BaseCallback
+from stable_baselines3.common.monitor import Monitor
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.type_aliases import GymStepReturn
+
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net, folder_name
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.util import abc_to_alpha_beta
+
+np.random.seed(0)
+
+# toDo: what to store:
+"""
+Alle importieren vom Recorder der in DB speichert und interagieren an den richtungen stellen mit dem env/agent...
+
+after training: -> like: SaveOnBestTrainingRewardCallback(BaseCallback): after training
+    hyperopt-data 
+    weights
+    model / net-architecture
+
+Each step: -> StepRecorder (ggf. StepMonitor?)
+    training_reward
+    messdaten? (aus der net.yaml die outs?)
+
+    training_return -> if episode done: store return(-> sollte der Monitor kennen)
+
+config    
+skriptname
+start- und endzeit stempel
+Computername
+Architektur des Netzes (mit model.to_json() )
+Gewichte des Netzes (mit model.get_layer('layer_name').weights)
+Prädiktion (für jede Zielgröße eine längere Liste)
+Testset (profilnummern von den messschrieben die prädiziert wurden)
+
+"""
+
+
+class Recorder:
+
+    def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ):
+        self.client = MongoClient(URI)
+        self.db = self.client[database_name]
+
+    def save_to_mongodb(self, col: str = ' trails', data=None):
+        trial_coll = self.db[col]  # get collection named col
+        if data is None:
+            raise ValueError('No data given to store in database!')
+        trial_coll.insert_one(data)
+
+
+class FeatureWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = np.inf):
+        """
+        Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in
+        case of an continuing (non-episodic) task to reset the environment without being terminated by done
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+        :
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+        self.training_episode_length = training_episode_length
+        self._n_training_steps = 0
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+
+        obs, reward, done, info = super().step(action)
+
+        self._n_training_steps += 1
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            info["timelimit_reached"] = True
+
+        # log measurement here?
+
+        # add wanted features here (add appropriate self.observation in init!!)
+        # calculate magnitude of current phasor abc
+        feature_diff_imax_iphasor = self.cal_phasor_magnitude(obs[0:3])
+
+        obs = np.append(obs, feature_diff_imax_iphasor)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+        obs = super().reset()
+        self._n_training_steps = 0
+
+        # reset timelimit_reached flag
+        # self.info["timelimit_reached"] = False
+
+        feature_diff_imax_iphasor = self.cal_phasor_magnitude(obs[0:3])
+        obs = np.append(obs, feature_diff_imax_iphasor)
+
+        return obs
+
+    def cal_phasor_magnitude(self, abc: np.array) -> float:
+        """
+        Calculated the magnitude of a phasor in a three phase system. Maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (1 - phasor_mag) - 0.5. -0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+
+        :param abc: Due to limit normed currents or voltages in abc frame
+        :return: magnitude of the current or voltage phasor
+        """
+        # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)|
+        i_alpha_beta = abc_to_alpha_beta(abc)
+        i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2)
+
+        # mapping [0,1+]
+        # feature_diff_imax_iphasor = 1 - (1 - i_phasor_mag)
+
+        # mapping [-0.5 -,0.5] (can be < 0.5 if phasor exceeds lim)
+        feature_diff_imax_iphasor = (1 - i_phasor_mag) - 0.5
+
+        return feature_diff_imax_iphasor
+
+
+class TrainRecorder(BaseCallback):
+
+    def __init__(self, verbose=1):
+        super(TrainRecorder, self).__init__(verbose)
+
+    def _on_training_end(self) -> None:
+        """
+        This event is triggered before exiting the `learn()` method.
+        """
+        # asd = 1
+        # ads = 2
+        pass
+
+    def _on_step(self) -> bool:
+        asd = 1
+
+        # nach env.step()
+
+        return True
+
+    def _on_rollout_end(self) -> None:
+        # asd = 1
+        pass
+
+
+mongo_recorder = Recorder(database_name=folder_name)
+
+rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, 1,
+             use_gamma_normalization=1, error_exponent=1, i_lim=net['inverter1'].i_lim,
+             i_nom=net['inverter1'].i_nom)
+
+
+def xylables_v(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+    ax.grid(which='both')
+    # ax.set_xlim([0, 0.005])
+    ts = time.gmtime()
+    fig.savefig(
+        f'{folder_name}/{n_trail}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.close()
+
+
+def xylables_i(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+    ax.grid(which='both')
+    ts = time.gmtime()
+    fig.savefig(
+        f'{folder_name}/{n_trail}/Inductor_currents{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.close()
+
+
+def xylables_R(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+    ax.grid(which='both')
+    # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+    ts = time.gmtime()
+    fig.savefig(f'{folder_name}/{n_trail}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.close()
+
+
+env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+               # reward_fun=rew.rew_fun,
+               reward_fun=rew.rew_fun_include_current,
+               # reward_fun=rew.rew_fun,
+               abort_reward=-(1 - rew.gamma),
+               viz_cols=[
+                   PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                            callback=xylables_v,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            ),
+                   PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                            callback=xylables_i,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            ),
+                   PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                            callback=xylables_R,
+                            color=[['b', 'r', 'g']],
+                            style=[[None]]
+                            )
+               ],
+               obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                           'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                           'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+               )
+
+env = FeatureWrapper(env, number_of_features=1, training_episode_length=1000)
+
+while True:
+    obs = env.reset()
+
+    asd = 1
diff --git a/experiments/hp_tune/examples/single_inverter_voltage_current_control_PIPI.py b/experiments/hp_tune/examples/single_inverter_voltage_current_control_PIPI.py
new file mode 100644
index 00000000..8ca6cacf
--- /dev/null
+++ b/experiments/hp_tune/examples/single_inverter_voltage_current_control_PIPI.py
@@ -0,0 +1,392 @@
+#####################################
+# Experiment : Single voltage forming inverter supplying an RL-load via an LC-filter
+# Controller: Cascaded PI-PI voltage and current controller gain parameters are optimized by SafeOpt
+# a) FMU by OpenModelica and SafeOpt algorithm to find optimal controller parameters
+# b) connecting via ssh to a testbench to perform real-world measurement
+import time
+import logging
+import os
+from functools import partial
+from itertools import tee
+
+import GPy
+import gym
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from pymongo import MongoClient
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+
+from experiments.hp_tune.env.random_load import RandomLoad
+from experiments.model_validation.env.testbench_voltage_ctrl import TestbenchEnvVoltage
+from experiments.model_validation.execution.monte_carlo_runner import MonteCarloRunner
+from experiments.model_validation.execution.runner_hardware import RunnerHardwareGradient
+from openmodelica_microgrid_gym.agents import SafeOptAgent
+from openmodelica_microgrid_gym.agents.util import MutableFloat
+from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \
+    MultiPhaseDQ0PIPIController
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.env.plotmanager import PlotManager
+from experiments.hp_tune.env.rewards import Reward
+from experiments.model_validation.env.stochastic_components import Load
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import FullHistory, RandProcess
+
+# Plot setting
+params = {'backend': 'ps',
+          'text.latex.preamble': [r'\usepackage{gensymb}'
+                                  r'\usepackage{amsmath,amssymb,mathtools}'
+                                  r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                  r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+          'axes.labelsize': 8,  # fontsize for x and y labels (was 10)
+          'axes.titlesize': 8,
+          'font.size': 8,  # was 10
+          'legend.fontsize': 8,  # was 10
+          'xtick.labelsize': 8,
+          'ytick.labelsize': 8,
+          'text.usetex': True,
+          'figure.figsize': [3.9, 3.1],
+          'font.family': 'serif',
+          'lines.linewidth': 1
+          }
+matplotlib.rcParams.update(params)
+
+include_simulate = True
+show_plots = True
+balanced_load = False
+do_measurement = False
+save_results = True
+
+# Files saves results and  resulting plots to the folder saves_VI_control_safeopt in the current directory
+current_directory = os.getcwd()
+folder_name = 'Pipi_safeopt_best_run4d'
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+np.random.seed(1)
+
+# Simulation definitions
+# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+net = Network.load('net/net_vctrl_single_inv.yaml')
+delta_t = 1e-4  # simulation time step size / s
+undersample = 1
+max_episode_steps = 2000  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
+v_DC = 600  # DC-link voltage / V; will be set as model parameter in the FMU
+nomFreq = 60  # nominal grid frequency / Hz
+nomVoltPeak = 169.7  # 230 * 1.414  # nominal grid voltage / V
+iLimit = 16  # inverter current limit / A
+iNominal = 12  # nominal inverter current / A
+vNominal = 190  # nominal inverter current / A
+vLimit = vNominal * 1.5  # inverter current limit / A
+funnelFactor = 0.02
+vFunnel = np.array([vNominal * funnelFactor, vNominal * funnelFactor, vNominal * funnelFactor])
+mu = 400  # factor for barrier function (see below)
+DroopGain = 0.0  # virtual droop gain for active power / W/Hz
+QDroopGain = 0.0  # virtual droop gain for reactive power / VAR/V
+
+
+class Recorder:
+
+    def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ):
+        self.client = MongoClient(URI)
+        self.db = self.client[database_name]
+
+    def save_to_mongodb(self, col: str = ' trails', data=None):
+        trial_coll = self.db[col]  # get collection named col
+        if data is None:
+            raise ValueError('No data given to store in database!')
+        trial_coll.insert_one(data)
+
+
+def run_experiment():
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma=0,
+                 use_gamma_normalization=1, error_exponent=0.5, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    #####################################
+    # Definitions for the GP
+    prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the initial set
+    noise_var = 0.001  # ** 2  # measurement noise sigma_omega
+    prior_var = 2  # prior variance of the GP
+
+    # Choose Kp and Ki (current and voltage controller) as mutable parameters (below) and define bounds and lengthscale
+    # for both of them
+    bounds = [(0.000, 0.045), (4, 450)]  # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp
+    lengthscale = [.003, 50.]  # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP
+
+    # The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times
+    # the initial performance: safe_threshold = 1.2 means: performance measurement for optimization are seen as
+    # unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!)
+    # parameter set
+    safe_threshold = 0
+    j_min = -5  # cal min allowed performance
+
+    # The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop
+    # expanding points eventually.
+    # The following variable is multiplied with the first performance of the initial set by the factor below:
+    explore_threshold = 0
+
+    # Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
+    # limit exceeded
+    abort_reward = 100 * j_min
+
+    # Definition of the kernel
+    kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True)
+
+    #####################################
+    # Definition of the controllers
+    # Choose Kp and Ki for the current and voltage controller as mutable parameters
+    mutable_params = dict(voltageP=MutableFloat(0.002), voltageI=MutableFloat(143))  # 300Hz
+    # mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105))  # 300Hz
+    voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
+                                    limits=(-iLimit, iLimit))
+
+    kp_c = 0.033
+    ki_c = 17.4  # 11.8
+    current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1))  # Current controller values
+
+    # Define the droop parameters for the inverter of the active power Watt/Hz (DroopGain), delta_t (0.005) used for the
+    # filter and the nominal frequency
+    # Droop controller used to calculate the virtual frequency drop due to load changes
+    droop_param = DroopParams(DroopGain, 0.005, net.freq_nom)
+
+    # Define the Q-droop parameters for the inverter of the reactive power VAR/Volt, delta_t (0.002) used for the
+    # filter and the nominal voltage
+    qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom)
+
+    # Define a voltage forming inverter using the PIPI and droop parameters from above
+
+    # Controller with observer
+    # ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param,
+    #                                   observer=[Lueneberger(*params) for params in
+    #                                             repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample,
+    #                                   name='master')
+
+    # Controller without observer
+    ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
+                                       ts_sim=delta_t,
+                                       ts_ctrl=undersample * delta_t,
+                                       name='master')
+
+    #####################################
+    # Definition of the optimization agent
+    # The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example
+    # Arguments described above
+    # History is used to store results
+    agent = SafeOptAgent(mutable_params,
+                         abort_reward,
+                         j_min,
+                         kernel,
+                         dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean,
+                              safe_threshold=safe_threshold, explore_threshold=explore_threshold),
+                         [ctrl],
+                         dict(master=[[f'lc.inductor{k}.i' for k in '123'],
+                                      [f'lc.capacitor{k}.v' for k in '123']
+                                      ]),
+                         history=FullHistory(),
+                         )
+
+    if include_simulate:
+
+        i_lim = net['inverter1'].i_lim  # inverter current limit / A
+        i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+        v_nom = net.v_nom
+        v_lim = net['inverter1'].v_lim
+        v_DC = net['inverter1'].v_DC
+        # plant
+
+        # toDo: shift this to net?!
+        L_filter = 2.3e-3  # / H
+        R_filter = 400e-3  # / Ohm
+        C_filter = 10e-6  # / F
+
+        lower_bound_load = 11  # to allow maximal load that draws i_limit (toDo: let exceed?)
+        upper_bound_load = 160  # to apply symmetrical load bounds
+
+        R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+        loadstep_timestep = max_episode_steps / 2
+
+        gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                          bounds=(lower_bound_load, upper_bound_load))
+        plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder,
+                              show_plots=show_plots)
+
+        rand_load_test = RandomLoad(max_episode_steps, net.ts, gen,
+                                    load_curve=pd.read_pickle(
+                                        'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl'))
+
+
+        def xylables_R(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            ax.grid(which='both')
+            # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+            ts = time.gmtime()
+            fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            plt.close()
+
+        env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                       reward_fun=rew.rew_fun_PIPI_MRE,
+                       viz_cols=[
+                           PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']],
+                                    callback=plotter.xylables_v_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_v_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']],
+                                    callback=plotter.xylables_i_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                    callback=xylables_R,
+                                    color=[['b', 'r', 'g']],
+                                    style=[[None]]
+                                    ),
+                           # PlotTmpl([[f'master.I_hat{i}' for i in 'abc'], [f'r_load.resistor{i}.i' for i in '123'], ],
+                           #         callback=lambda fig: plotter.update_axes(fig, title='Simulation',
+                           #                                                  ylabel='$i_{\mathrm{o estimate,abc}}\,/\,\mathrm{A}$'),
+                           #         color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                           #         style=[['-*'], ['--*']]
+                           #         ),
+                           # PlotTmpl([[f'master.m{i}' for i in 'dq0']],
+                           #         callback=lambda fig: plotter.update_axes(fig, title='Simulation',
+                           #                                                  ylabel='$m_{\mathrm{dq0}}\,/\,\mathrm{}$',
+                           #                                                  filename='Sim_m_dq0')
+                           #         ),
+                           PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_i_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    )
+                       ],
+                       log_level=logging.INFO,
+                       viz_mode='episode',
+                       max_episode_steps=20000,
+                       model_params={'lc.resistor1.R': R_filter,
+                                     'lc.resistor2.R': R_filter,
+                                     'lc.resistor3.R': R_filter,
+                                     'lc.resistor4.R': 0.0000001,
+                                     'lc.resistor5.R': 0.0000001,
+                                     'lc.resistor6.R': 0.0000001,
+                                     'lc.inductor1.L': L_filter,
+                                     'lc.inductor2.L': L_filter,
+                                     'lc.inductor3.L': L_filter,
+                                     'lc.capacitor1.C': C_filter,
+                                     'lc.capacitor2.C': C_filter,
+                                     'lc.capacitor3.C': C_filter,
+                                     'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor1.R'),
+                                     'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor2.R'),
+                                     'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor3.R')
+                                     },
+                       net=net,
+                       model_path='omg_grid/grid.paper_loadstep.fmu',
+                       history=FullHistory(),
+                       action_time_delay=1 * undersample
+                       )
+
+        return_sum = 0.0
+
+        rew.gamma = 0
+        # episodes will not abort, if limit is exceeded reward = -1
+        rew.det_run = True
+        rew.exponent = 0.5
+        limit_exceeded_in_test = False
+        limit_exceeded_penalty = 0
+
+
+        """
+        # toDo: - Use other Test-episode
+        #       - Rückgabewert = (Summe der üblichen Rewards) / (Anzahl steps Validierung) + (Penalty i.H.v. -1)
+        while True:
+            action = agent.act(obs)
+            obs, rewards, done, info = env_test.step(action)
+
+            if rewards == -1 and not limit_exceeded_in_test:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test = True
+                limit_exceeded_penalty = -1
+            env_test.render()
+            return_sum += rewards
+            rew_list.append(rewards)
+            # print(rewards)
+            if done:
+                env_test.close()
+                # print(limit_exceeded_in_test)
+                break
+        """
+
+        agent.reset()
+        agent.obs_varnames = env.history.cols
+        env.history.cols = env.history.structured_cols(None) + agent.measurement_cols
+        env.measure = agent.measure
+
+        reward_list = []
+
+        agent_fig = None
+        obs = env.reset()
+        for _ in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False):
+            # for _ in tqdm(range(1000), desc='steps', unit='step', leave=False):
+
+            done, r = False, None
+
+            if len(reward_list) > 10000:
+                asd = 1
+
+            agent.observe(r, done)
+            act = agent.act(obs)
+            obs, r, done, info = env.step(act)
+            reward_list.append(r)
+            env.render()
+            return_sum += r
+            if r == -1 and not limit_exceeded_in_test:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test = True
+                limit_exceeded_penalty = -1
+
+            # if done:
+            #    break
+            # close env before calling final agent observe to see plots even if agent crashes
+        _, env_fig = env.close()
+        agent.observe(r, done)
+        print(limit_exceeded_in_test)
+        ret = return_sum / env.max_episode_steps + limit_exceeded_penalty
+
+        ts = time.gmtime()
+        test_after_training = {"Name": "Test",
+                               "time": ts,
+                               "Reward": reward_list}
+
+        # Add v-measurements
+        test_after_training.update({env.viz_col_tmpls[j].vars[i].replace(".", "_"): env.history[
+            env.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                    })
+
+        test_after_training.update({env.viz_col_tmpls[2].vars[i].replace(".", "_"): env.history[
+            env.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                    })
+
+        # va = self.env.env.history[self.env.env.viz_col_tmpls[0].vars[0]].copy()
+        # mongo_recorder = Recorder(database_name=folder_name)
+        # mongo_recorder.save_to_mongodb('Trail_number_3', test_after_training)
+
+        return (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+
+ret = run_experiment()
+
+print(ret)
diff --git a/experiments/hp_tune/execution/reset_runner.py b/experiments/hp_tune/execution/reset_runner.py
new file mode 100644
index 00000000..6d5be09b
--- /dev/null
+++ b/experiments/hp_tune/execution/reset_runner.py
@@ -0,0 +1,168 @@
+import numpy as np
+from typing import Dict, Any
+from tqdm import tqdm
+from openmodelica_microgrid_gym.agents.episodic import EpisodicLearnerAgent
+from openmodelica_microgrid_gym.env import ModelicaEnv
+
+
+class MonteCarloRunner:
+    """
+    This class will execute an agent on the environment.
+    It handles communication between agent and environment and handles the execution of multiple epochs
+    Additionally to runner, the Monte-Carlo runner has an additional loop to perform n_MC experiments using one
+    (controller) parameter set before update the (controller) parameters.
+    Therefore, the agent.observe function is used.
+    Inside the MC-loop the observe function is called with terminated = False to only update the return.
+    The return is stored in an array at the end of the MC-loop.
+    After finishing the MC-loop, the average of the return-array is used to update the (controller) parameters.
+    Therefore, the agent-observe function is called with terminated = True
+    """
+
+    def __init__(self, agent: EpisodicLearnerAgent, env: ModelicaEnv):
+        """
+
+        :param agent: Agent that acts on the environment
+        :param env: Environment tha Agent acts on
+        """
+        self.env = env
+        self.agent = agent
+        self.agent.env = env
+        self.run_data = dict()  # type: Dict[str,Any]
+        """
+        Dictionary storing information about the experiment.
+
+        - "best_env_plt": environment best plots
+        - "best_episode_idx": index of best episode
+        - "agent_plt": last agent plot
+        """
+
+    def run(self, n_episodes: int = 10, n_mc: int = 5, visualise: bool = False, prepare_mc_experiment=lambda: True,
+            return_gradient_extend: bool = False):
+        """
+        Trains/executes the agent on the environment for a number of epochs
+
+        :param n_episodes: number of epochs to play
+        :param n_mc: number of Monte-Carlo experiments using the same parameter set before updating the latter
+        :param visualise: turns on visualization of the environment
+        :param prepare_mc_experiment: prepares experiment by resetting stochastic components
+        :param return_gradient_extend: calculates gradient extension for return if return_gradient_extend
+        """
+        t = np.linspace(0, self.env.max_episode_steps * self.env.net.ts, self.env.max_episode_steps + 1)
+        self.agent.reset()
+        self.env.history.cols = self.env.history.structured_cols(None) + self.agent.measurement_cols
+        self.agent.obs_varnames = self.env.history.cols
+        self.env.measure = self.agent.measure
+
+        initial_performance_mc = np.zeros(n_mc)
+        performance_mc = np.zeros(n_mc)
+
+        if not visualise:
+            self.env.viz_mode = None
+        agent_fig = None
+
+        for i in tqdm(range(n_episodes), desc='episodes', unit='epoch'):
+            done, r = False, None
+            np.random.seed(0)
+            for m in tqdm(range(n_mc), desc='monte_carlo_run', unit='epoch', leave=False):
+                prepare_mc_experiment()  # reset stoch components
+
+                r_vec = np.zeros(self.env.max_episode_steps + 1)
+
+                obs = self.env.reset()
+
+                for p in tqdm(range(self.env.max_episode_steps + 1), desc='steps', unit='step', leave=False):
+                    self.agent.observe(r, False)
+                    act = self.agent.act(obs)
+                    if p % 1000 == 0 and p > 0:
+                        asd = 1
+                        obs = self.env.reset()
+                        self.agent.controllers['master'].reset()
+                    obs, r, done, info = self.env.step(act)
+                    r_vec[p] = r
+                    self.env.render()
+                    if p == self.env.max_episode_steps:
+                        self.agent.observe(r, False)
+
+                        if return_gradient_extend:
+                            w = self.env.history['master.CVVd'].values
+                            w1 = self.env.history['master.CVVq'].values
+                            w2 = self.env.history['master.CVV0'].values
+                            v = self.env.history['master.SPVd'].values
+
+                            SP_sattle = (abs(w - v) < v * 0.12).astype(int)  # 0.12 -> +-20V setpoint
+
+                            dw = np.gradient(w)
+                            dw1 = np.gradient(w1)
+                            dw2 = np.gradient(w2)
+
+                            dev_return = (np.mean(abs(SP_sattle * dw)) + np.mean(abs(SP_sattle * dw1)) + np.mean(
+                                abs(SP_sattle * dw2)))
+                        else:
+                            dev_return = 0
+                            print('NO DEV RETURN!!!!')
+
+                        dev_fac = 0.5  # 3
+
+                        print(self.agent.episode_return)
+                        print(dev_return)
+
+                        self.agent.performance = ((
+                                                          self.agent.episode_return - dev_return * dev_fac) - self.agent.min_performance) \
+                                                 / (self.agent.initial_performance - self.agent.min_performance)
+
+                        if m == 0 and i == 0:
+                            self.agent.initial_performance = self.agent.episode_return - dev_return * dev_fac
+                            self.agent.performance = ((
+                                                              self.agent.episode_return - dev_return * dev_fac) - self.agent.min_performance) \
+                                                     / (
+                                                             self.agent.initial_performance - self.agent.min_performance)  # instead of perf/initial_perf
+                            self.agent.last_best_performance = self.agent.performance
+                            self.agent.last_worst_performance = self.agent.performance
+
+                            self.agent.best_episode = self.agent.history.df.shape[0]
+                            self.agent.last_best_performance = self.agent.performance
+                            self.agent.worst_episode = self.agent.history.df.shape[0]
+                            self.agent.last_worst_performance = self.agent.performance
+
+                        self.agent.performance = ((
+                                                          self.agent.episode_return - dev_return * dev_fac) - self.agent.min_performance) \
+                                                 / (self.agent.initial_performance - self.agent.min_performance)
+
+                        performance_mc[m] = self.agent.performance
+                        initial_performance_mc[m] = self.agent.episode_return
+                        # set iterations and episode return = 0
+                        self.agent.prepare_episode()
+
+                        break
+
+                _, env_fig = self.env.close()
+
+                # vor break?
+                if (m == 0 and i == 0):  # and self.agent.has_improved:
+                    self.run_data['best_env_plt'] = env_fig
+                    self.run_data['best_episode_idx'] = i
+                    self.agent.last_best_performance = self.agent.performance
+
+                if (m == 0 and i == 0):  # and self.agent.has_worsened:
+                    self.run_data['worst_env_plt'] = env_fig
+                    self.run_data['worst_episode_idx'] = i
+                    self.agent.last_worst_performance = self.agent.performance
+
+            if i == 0:
+                # performance was normalized to first run -> use average of first episode so that J_initial for first
+                # is 1
+                eps_ret = performance_mc * (
+                        self.agent.initial_performance - self.agent.min_performance) + self.agent.min_performance
+                self.agent.initial_performance = np.mean(eps_ret)
+                performance_mc = (eps_ret - self.agent.min_performance) \
+                                 / (self.agent.initial_performance - self.agent.min_performance)
+
+            self.agent.performance = np.mean(performance_mc)
+            if self.agent.performance > 1:
+                asd = 1
+            self.agent.update_params()
+
+            if visualise:
+                agent_fig = self.agent.render()
+
+            self.run_data['last_agent_plt'] = agent_fig
diff --git a/experiments/hp_tune/execution/runner.py b/experiments/hp_tune/execution/runner.py
new file mode 100644
index 00000000..55cf1736
--- /dev/null
+++ b/experiments/hp_tune/execution/runner.py
@@ -0,0 +1,84 @@
+from typing import Dict, Any, Optional
+
+from tqdm import tqdm
+
+from openmodelica_microgrid_gym.agents import Agent
+from openmodelica_microgrid_gym.env import ModelicaEnv
+from openmodelica_microgrid_gym.execution.callbacks import Callback
+
+
+class Runner:
+    """
+    This class will execute an agent on the environment.
+    It handles communication between agent and environment and handles the execution of multiple epochs
+    """
+
+    def __init__(self, agent: Agent, env: ModelicaEnv, callback: Optional[Callback] = None):
+        """
+
+        :param agent: Agent that acts on the environment
+        :param env: Environment tha Agent acts on
+        """
+        self.env = env
+        self.agent = agent
+        self.agent.env = env
+        self.run_data = dict()  # type: Dict[str,Any]
+        self.callback = callback
+        """
+        Dictionary storing information about the experiment.
+        
+        - "best_env_plt": environment best plots
+        - "best_episode_idx": index of best episode
+        - "agent_plt": last agent plot
+        """
+
+    def run(self, n_episodes: int = 10, visualise: bool = False):
+        """
+        Trains/executes the agent on the environment for a number of epochs
+
+        :param n_episodes: number of epochs to play
+        :param visualise: turns on visualization of the environment
+        """
+        self.agent.reset()
+        self.agent.obs_varnames = self.env.history.cols
+        self.env.history.cols = self.env.history.structured_cols(None) + self.agent.measurement_cols
+        self.env.measure = self.agent.measure
+
+        reward_list = []
+
+        agent_fig = None
+
+        for i in tqdm(range(n_episodes), desc='episodes', unit='epoch'):
+            obs = self.env.reset()
+            if self.callback is not None:
+                self.callback.reset()
+            done, r = False, None
+            for _ in tqdm(range(self.env.max_episode_steps), desc='steps', unit='step', leave=False):
+                self.agent.observe(r, done)
+                act = self.agent.act(obs)
+                obs, r, done, info = self.env.step(act)
+                reward_list.append(r)
+                if self.callback is not None:
+                    self.callback(self.env.history.cols, self.env.history.last())
+                if visualise:
+                    self.env.render()
+                if done:
+                    break
+            # close env before calling final agent observe to see plots even if agent crashes
+            _, env_fig = self.env.close()
+            self.agent.observe(r, done)
+
+            if visualise:
+                agent_fig = self.agent.render()
+
+            self.run_data['last_agent_plt'] = agent_fig
+
+            if i == 0 or self.agent.has_improved:
+                self.run_data['best_env_plt'] = env_fig
+                self.run_data['best_episode_idx'] = i
+
+            if i == 0 or self.agent.has_worsened:
+                self.run_data['worst_env_plt'] = env_fig
+                self.run_data['worst_episode_idx'] = i
+
+            return reward_list
diff --git a/experiments/hp_tune/experiement_custom_td3.py b/experiments/hp_tune/experiement_custom_td3.py
new file mode 100644
index 00000000..24955690
--- /dev/null
+++ b/experiments/hp_tune/experiement_custom_td3.py
@@ -0,0 +1,273 @@
+import platform
+import time
+from collections import OrderedDict
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG, TD3
+from experiments.hp_tune.policies.split_actor import CustomTD3Policy
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.configTD3 import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# np.random.seed(0)
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+
+def experiment_fit_Custom_TD3(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor,
+                              batch_size,
+                              actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                              alpha_relu_critic,
+                              noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                              training_episode_length, buffer_size,  # learning_starts,
+                              tau, number_learning_steps, integrator_weight, antiwindup_weight,
+                              penalty_I_weight, penalty_P_weight,
+                              train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail,
+                              policy_delay, target_policy_noise, target_noise_clip
+                              ):
+    if node not in cfg['lea_vpn_nodes']:
+        # assume we are on pc2
+        log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/'
+    else:
+        log_path = f'{folder_name}/{n_trail}/'
+
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+                   reward_fun=rew.rew_fun_dq0,
+                   abort_reward=-1,
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                   )
+
+    env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                         recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                         antiwindup_weight=antiwindup_weight, gamma=gamma,
+                         penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                         t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                         number_learing_steps=number_learning_steps)
+
+    # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+    n_actions = env.action_space.shape[-1]
+    noise_var = noise_var  # 20#0.2
+    noise_theta = noise_theta  # 50 # stiffness of OU
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing,
+    #                                              sigma_min=noise_var * np.ones(n_actions) * noise_var_min,
+    #                                              mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+    #                                              sigma=noise_var * np.ones(n_actions), dt=net.ts)
+    print(optimizer)
+    if optimizer == 'SGD':
+        used_optimzer = th.optim.SGD
+    elif optimizer == 'RMSprop':
+        used_optimzer = th.optim.RMSprop
+    # elif optimizer == 'LBFGS':
+    # needs in step additional argument
+    #    used_optimzer = th.optim.LBFGS
+    else:
+        used_optimzer = th.optim.Adam
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                      , qf=[critic_hidden_size] * critic_number_layers),
+                         optimizer_class=used_optimzer)
+
+    model = TD3(policy='CustomTD3Policy',
+                env=env,
+                learning_rate=learning_rate,
+                buffer_size=buffer_size,
+                learning_starts=100,
+                batch_size=batch_size,
+                tau=tau,
+                gamma=gamma,
+                train_freq=(train_freq, train_freq_type),
+                gradient_steps=-1,
+                action_noise=action_noise,
+                optimize_memory_usage=False,
+                policy_delay=policy_delay,
+                target_policy_noise=target_policy_noise,
+                target_noise_clip=target_noise_clip,
+                tensorboard_log=log_path,
+                create_eval_env=False,
+                policy_kwargs=policy_kwargs,
+                verbose=0,
+                seed=None,
+                device="auto",
+                _init_setup_model=True
+                )
+
+    # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
+    # adn scale weights and biases
+    param_dict_scaled = OrderedDict()
+    param_dict = model.get_parameters()['policy']
+    for param in param_dict.items():
+        print(param)
+        param_dict_scaled[param[0]] = param[1] * weight_scale
+
+    new_param_dict = {'policy': param_dict_scaled,
+                      'actor.optimizer': model.get_parameters()['actor.optimizer'],
+                      'critic.optimizer': model.get_parameters()['critic.optimizer']}
+
+    # new_param_dict = OrderedDict()
+
+    model.set_parameters(new_param_dict)
+
+    # print('alphaRelu fehlt noch!')
+
+    count = 0
+    for kk in range(actor_number_layers + 1):
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor.I._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.I._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    # todo: Downscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+    # start training
+    model.learn(total_timesteps=number_learning_steps)
+
+    # Log Train-info data
+    train_data = {"Name": "After_Training",
+                  "Mean_eps_reward": env.reward_episode_mean,
+                  "Trial number": n_trail,
+                  "Database name": folder_name,
+                  "Sum_eps_reward": env.get_episode_rewards()
+                  }
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data)
+
+    model.save(log_path + f'model.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                        reward_fun=rew.rew_fun_dq0,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        )
+    env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                              recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                              gamma=1, penalty_I_weight=0, penalty_P_weight=0)
+    # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+    aP0 = []
+    aP1 = []
+    aP2 = []
+    aI0 = []
+    aI1 = []
+    aI2 = []
+    integrator_sum0 = []
+    integrator_sum1 = []
+    integrator_sum2 = []
+
+    while True:
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        phase_list.append(env_test.env.net.components[0].phase)
+        aP0.append(np.float64(action[0]))
+        aP1.append(np.float64(action[1]))
+        aP2.append(np.float64(action[2]))
+        aI0.append(np.float64(action[3]))
+        aI1.append(np.float64(action[4]))
+        aI2.append(np.float64(action[5]))
+        integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+        integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+        integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "ActionP0": aP0,
+                           "ActionP1": aP1,
+                           "ActionP2": aP2,
+                           "ActionI0": aI0,
+                           "ActionI1": aI1,
+                           "ActionI2": aI2,
+                           "integrator_sum0": integrator_sum0,
+                           "integrator_sum1": integrator_sum1,
+                           "integrator_sum2": integrator_sum2,
+                           "Phase": phase_list,
+                           "Node": platform.uname().node,
+                           "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                           "Reward function": 'rew.rew_fun_dq0',
+                           "Trial number": n_trail,
+                           "Database name": folder_name,
+                           "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase),"
+                                   "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; "
+                                   "Reward = MRE, PI-Approch using AntiWindUp"
+                                   "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+    # Add v-&i-measurements
+    test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                })
+    test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                })
+
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
diff --git a/experiments/hp_tune/experiment_fit_DDPG_custom.py b/experiments/hp_tune/experiment_fit_DDPG_custom.py
new file mode 100644
index 00000000..06c496c3
--- /dev/null
+++ b/experiments/hp_tune/experiment_fit_DDPG_custom.py
@@ -0,0 +1,272 @@
+import platform
+import time
+from collections import OrderedDict
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG, TD3
+from experiments.hp_tune.policies.split_actor import CustomTD3Policy
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# np.random.seed(0)
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+
+def experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor,
+                               batch_size,
+                               actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                               alpha_relu_critic,
+                               noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                               training_episode_length, buffer_size,  # learning_starts,
+                               tau, number_learning_steps, integrator_weight, antiwindup_weight,
+                               penalty_I_weight, penalty_P_weight,
+                               train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail
+                               ):
+    if node not in cfg['lea_vpn_nodes']:
+        # assume we are on pc2
+        log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/'
+    else:
+        log_path = f'{folder_name}/{n_trail}/'
+
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+                   reward_fun=rew.rew_fun_dq0,
+                   abort_reward=-1,
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                   )
+
+    env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                         recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                         antiwindup_weight=antiwindup_weight, gamma=gamma,
+                         penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                         t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                         number_learing_steps=number_learning_steps)
+
+    # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+    n_actions = env.action_space.shape[-1]
+    noise_var = noise_var  # 20#0.2
+    noise_theta = noise_theta  # 50 # stiffness of OU
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing,
+    #                                              sigma_min=noise_var * np.ones(n_actions) * noise_var_min,
+    #                                              mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+    #                                              sigma=noise_var * np.ones(n_actions), dt=net.ts)
+    print(optimizer)
+    if optimizer == 'SGD':
+        used_optimzer = th.optim.SGD
+    elif optimizer == 'RMSprop':
+        used_optimzer = th.optim.RMSprop
+    # elif optimizer == 'LBFGS':
+    # needs in step additional argument
+    #    used_optimzer = th.optim.LBFGS
+    else:
+        used_optimzer = th.optim.Adam
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                      , qf=[critic_hidden_size] * critic_number_layers),
+                         optimizer_class=used_optimzer)
+
+    model = DDPG(policy='CustomTD3Policy',
+                 env=env,
+                 learning_rate=learning_rate,
+                 buffer_size=buffer_size,
+                 learning_starts=100,
+                 batch_size=batch_size,
+                 tau=tau,
+                 gamma=gamma,
+                 train_freq=(train_freq, train_freq_type),
+                 gradient_steps=-1,
+                 action_noise=action_noise,
+                 optimize_memory_usage=False,
+                 # policy_delay=policy_delay,
+                 # target_policy_noise=target_policy_noise,
+                 # target_noise_clip=target_noise_clip,
+                 tensorboard_log=log_path,
+                 create_eval_env=False,
+                 policy_kwargs=policy_kwargs,
+                 verbose=0,
+                 seed=None,
+                 device="auto",
+                 _init_setup_model=True
+                 )
+
+    # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
+    # adn scale weights and biases
+    param_dict_scaled = OrderedDict()
+    param_dict = model.get_parameters()['policy']
+    for param in param_dict.items():
+        print(param)
+        param_dict_scaled[param[0]] = param[1] * weight_scale
+
+    new_param_dict = {'policy': param_dict_scaled,
+                      'actor.optimizer': model.get_parameters()['actor.optimizer'],
+                      'critic.optimizer': model.get_parameters()['critic.optimizer']}
+
+    # new_param_dict = OrderedDict()
+
+    model.set_parameters(new_param_dict)
+
+    # print('alphaRelu fehlt noch!')
+
+    count = 0
+    for kk in range(actor_number_layers + 1):
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor.I._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.I._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            # model.critic.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            # model.critic_target.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    # todo: Downscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+    # start training
+    model.learn(total_timesteps=number_learning_steps)
+
+    # Log Train-info data
+    train_data = {"Name": "After_Training",
+                  "Mean_eps_reward": env.reward_episode_mean,
+                  "Trial number": n_trail,
+                  "Database name": folder_name,
+                  "Sum_eps_reward": env.get_episode_rewards()
+                  }
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data)
+
+    model.save(log_path + f'model.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                        reward_fun=rew.rew_fun_dq0,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        )
+    env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                              recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                              gamma=1, penalty_I_weight=0, penalty_P_weight=0)
+    # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+    aP0 = []
+    aP1 = []
+    aP2 = []
+    aI0 = []
+    aI1 = []
+    aI2 = []
+    integrator_sum0 = []
+    integrator_sum1 = []
+    integrator_sum2 = []
+
+    while True:
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        phase_list.append(env_test.env.net.components[0].phase)
+        aP0.append(np.float64(action[0]))
+        aP1.append(np.float64(action[1]))
+        aP2.append(np.float64(action[2]))
+        aI0.append(np.float64(action[3]))
+        aI1.append(np.float64(action[4]))
+        aI2.append(np.float64(action[5]))
+        integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+        integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+        integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "ActionP0": aP0,
+                           "ActionP1": aP1,
+                           "ActionP2": aP2,
+                           "ActionI0": aI0,
+                           "ActionI1": aI1,
+                           "ActionI2": aI2,
+                           "integrator_sum0": integrator_sum0,
+                           "integrator_sum1": integrator_sum1,
+                           "integrator_sum2": integrator_sum2,
+                           "Phase": phase_list,
+                           "Node": platform.uname().node,
+                           "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                           "Reward function": 'rew.rew_fun_dq0',
+                           "Trial number": n_trail,
+                           "Database name": folder_name,
+                           "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase),"
+                                   "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; "
+                                   "Reward = MRE, PI-Approch using AntiWindUp"
+                                   "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+    # Add v-&i-measurements
+    test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                })
+    test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                })
+
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
diff --git a/experiments/hp_tune/experiment_vctrl_single_inv.py b/experiments/hp_tune/experiment_vctrl_single_inv.py
new file mode 100644
index 00000000..03ea17b7
--- /dev/null
+++ b/experiments/hp_tune/experiment_vctrl_single_inv.py
@@ -0,0 +1,375 @@
+import platform
+import time
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper, FeatureWrapper_pastVals, FeatureWrapper_futureVals, \
+    FeatureWrapper_I_controller, BaseWrapper
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# np.random.seed(0)
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+Ki_ddpg_combi = 182
+
+def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor,
+                        batch_size,
+                        actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                        alpha_relu_critic,
+                        noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                        training_episode_length, buffer_size,  # learning_starts,
+                        tau, number_learning_steps, integrator_weight, antiwindup_weight,
+                        penalty_I_weight, penalty_P_weight,
+                        train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail,
+                        number_past_vals=0):
+    if node not in cfg['lea_vpn_nodes']:
+        # assume we are on pc2
+        log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/'
+    else:
+        log_path = f'{folder_name}/{n_trail}/'
+
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+                   reward_fun=rew.rew_fun_dq0,
+                   abort_reward=-1,
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                   # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i']
+                   )
+
+    if cfg['env_wrapper'] == 'past':
+        env = FeatureWrapper_pastVals(env, number_of_features=9 + number_past_vals * 3,
+                                      training_episode_length=training_episode_length,
+                                      recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                                      antiwindup_weight=antiwindup_weight, gamma=gamma,
+                                      penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                                      t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                                      number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+    elif cfg['env_wrapper'] == 'future':
+        env = FeatureWrapper_futureVals(env, number_of_features=9, training_episode_length=training_episode_length,
+                                        recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                                        antiwindup_weight=antiwindup_weight, gamma=gamma,
+                                        penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                                        t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                                        number_learing_steps=number_learning_steps, number_future_vals=10)
+
+    elif cfg['env_wrapper'] == 'I-controller':
+        env = FeatureWrapper_I_controller(env, number_of_features=12 + number_past_vals * 3,  # including integrator_sum
+                                          training_episode_length=training_episode_length,
+                                          recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                                          antiwindup_weight=antiwindup_weight, gamma=gamma,
+                                          penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                                          t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                                          number_learing_steps=number_learning_steps, Ki=Ki_ddpg_combi,
+                                          number_past_vals=number_past_vals)
+
+    elif cfg['env_wrapper'] == 'no-I-term':
+        env = BaseWrapper(env, number_of_features=6 + number_past_vals * 3,
+                          training_episode_length=training_episode_length,
+                          recorder=mongo_recorder, n_trail=n_trail, gamma=gamma,
+                          number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+    else:
+        env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                             recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                             antiwindup_weight=antiwindup_weight, gamma=gamma,
+                             penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                             t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                             number_learing_steps=number_learning_steps)  # , use_past_vals=True, number_past_vals=30)
+
+    # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+    if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']:
+        env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+    n_actions = env.action_space.shape[-1]
+    noise_var = noise_var  # 20#0.2
+    noise_theta = noise_theta  # 50 # stiffness of OU
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing,
+    #                                              sigma_min=noise_var * np.ones(n_actions) * noise_var_min,
+    #                                              mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+    #                                              sigma=noise_var * np.ones(n_actions), dt=net.ts)
+    print(optimizer)
+    if optimizer == 'SGD':
+        used_optimzer = th.optim.SGD
+    elif optimizer == 'RMSprop':
+        used_optimzer = th.optim.RMSprop
+    # elif optimizer == 'LBFGS':
+    # needs in step additional argument
+    #    used_optimzer = th.optim.LBFGS
+    else:
+        used_optimzer = th.optim.Adam
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                      , qf=[critic_hidden_size] * critic_number_layers),
+                         optimizer_class=used_optimzer)
+
+    model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=log_path,
+                 # model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/',
+                 policy_kwargs=policy_kwargs,
+                 learning_rate=learning_rate, buffer_size=buffer_size,
+                 # learning_starts=int(learning_starts * training_episode_length),
+                 batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise,
+                 train_freq=(train_freq, train_freq_type), gradient_steps=- 1,
+                 optimize_memory_usage=False,
+                 create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
+    # adn scale weights and biases
+    count = 0
+    for kk in range(actor_number_layers + 1):
+
+        model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale
+        model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[
+                                                                     str(count)].weight.data * weight_scale
+
+        model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale
+        model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[
+                                                                   str(count)].bias.data * bias_scale
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    # todo: Downscale actionspace - lessulgy possible? Interaction pytorch...
+    if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']:
+        env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+    # start training
+    model.learn(total_timesteps=number_learning_steps)
+
+    # Log Train-info data
+    train_data = {"Name": "After_Training",
+                  "Mean_eps_reward": env.reward_episode_mean,
+                  "Trial number": n_trail,
+                  "Database name": folder_name,
+                  "Sum_eps_reward": env.get_episode_rewards()
+                  }
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data)
+
+    model.save(log_path + f'model.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1',
+                        reward_fun=rew.rew_fun_dq0,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i']
+                        )
+
+    if cfg['env_wrapper'] == 'past':
+        env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + number_past_vals * 3,
+                                           integrator_weight=integrator_weight,
+                                           recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                           gamma=1, penalty_I_weight=0,
+                                           penalty_P_weight=0, number_past_vals=number_past_vals,
+                                           training_episode_length=training_episode_length, )
+    elif cfg['env_wrapper'] == 'future':
+        env_test = FeatureWrapper_futureVals(env_test, number_of_features=9,
+                                             training_episode_length=training_episode_length,
+                                             recorder=mongo_recorder, n_trail=n_trail,
+                                             integrator_weight=integrator_weight,
+                                             antiwindup_weight=antiwindup_weight, gamma=gamma,
+                                             penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                                             t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                                             number_learing_steps=number_learning_steps, number_future_vals=10)
+    elif cfg['env_wrapper'] == 'I-controller':
+        env_test = FeatureWrapper_I_controller(env_test, number_of_features=12 + number_past_vals * 3,
+                                               # including integrator_sum
+                                               training_episode_length=training_episode_length,
+                                               recorder=mongo_recorder, n_trail=n_trail,
+                                               integrator_weight=integrator_weight,
+                                               antiwindup_weight=antiwindup_weight, gamma=gamma,
+                                               penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                                               t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                                               number_learing_steps=number_learning_steps, Ki=Ki_ddpg_combi,
+                                               number_past_vals=number_past_vals)
+
+    elif cfg['env_wrapper'] == 'no-I-term':
+        env_test = BaseWrapper(env_test, number_of_features=6 + number_past_vals * 3,
+                               training_episode_length=training_episode_length,
+                               recorder=mongo_recorder, n_trail=n_trail, gamma=gamma,
+                               number_learing_steps=number_learning_steps, number_past_vals=number_past_vals)
+
+    else:
+        env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                                  recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                                  gamma=1, penalty_I_weight=0,
+                                  penalty_P_weight=0,
+                                  training_episode_length=training_episode_length, )  # , use_past_vals=True, number_past_vals=30)
+    # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+
+    aP0 = []
+    aP1 = []
+    aP2 = []
+    aI0 = []
+    aI1 = []
+    aI2 = []
+    integrator_sum0 = []
+    integrator_sum1 = []
+    integrator_sum2 = []
+    va = []
+    vb = []
+    vc = []
+    v_ref0 = []
+    v_ref1 = []
+    v_ref2 = []
+    ia = []
+    ib = []
+    ic = []
+    R_load = []
+
+    for step in range(env_test.max_episode_steps):
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        phase_list.append(env_test.env.net.components[0].phase)
+        aP0.append(np.float64(action[0]))
+        aP1.append(np.float64(action[1]))
+        aP2.append(np.float64(action[2]))
+        if cfg['env_wrapper'] not in ['no-I-term']:
+            aI0.append(np.float64(action[3]))
+            aI1.append(np.float64(action[4]))
+            aI2.append(np.float64(action[5]))
+            integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+            integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+            integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+
+        if step % 1000 == 0 and step != 0:
+            # if step % cfg['train_episode_length'] == 0 and step != 0:
+            va.extend(env_test.history[env_test.viz_col_tmpls[0].vars[0]].copy().values.tolist())
+            vb.extend(env_test.history[env_test.viz_col_tmpls[0].vars[1]].copy().values.tolist())
+            vc.extend(env_test.history[env_test.viz_col_tmpls[0].vars[2]].copy().values.tolist())
+            v_ref0.extend(env_test.history[env_test.viz_col_tmpls[0].vars[3]].copy().values.tolist())
+            v_ref1.extend(env_test.history[env_test.viz_col_tmpls[0].vars[4]].copy().values.tolist())
+            v_ref2.extend(env_test.history[env_test.viz_col_tmpls[0].vars[5]].copy().values.tolist())
+            ia.extend(env_test.history[env_test.viz_col_tmpls[1].vars[0]].copy().values.tolist())
+            ib.extend(env_test.history[env_test.viz_col_tmpls[1].vars[1]].copy().values.tolist())
+            ic.extend(env_test.history[env_test.viz_col_tmpls[1].vars[2]].copy().values.tolist())
+            R_load.extend(env_test.history[env_test.viz_col_tmpls[2].vars[1]].copy().values.tolist())
+
+            env_test.close()
+            obs = env_test.reset()
+            phase_list.append(env_test.env.net.components[0].phase)
+
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    reward_test_after_training = {"Name": "Test_Reward",
+                                  "time": ts,
+                                  "Reward": rew_list,
+                                  "Return": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty),
+                                  "Trial number": n_trail,
+                                  "Database name": folder_name,
+                                  "Node": platform.uname().node,
+                                  "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime())}
+
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, reward_test_after_training)
+
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "lc_capacitor1_v": va,
+                           "lc_capacitor2_v": vb,
+                           "lc_capacitor3_v": vc,
+                           "inverter1_v_ref_0": v_ref0,
+                           "inverter1_v_ref_1": v_ref1,
+                           "inverter1_v_ref_2": v_ref2,
+                           "lc_inductor1_i": ia,
+                           "lc_inductor2_i": ib,
+                           "lc_inductor3_i": ic,
+                           "r_load_resistor1_R": R_load,
+                           "ActionP0": aP0,
+                           "ActionP1": aP1,
+                           "ActionP2": aP2,
+                           "ActionI0": aI0,
+                           "ActionI1": aI1,
+                           "ActionI2": aI2,
+                           "integrator_sum0": integrator_sum0,
+                           "integrator_sum1": integrator_sum1,
+                           "integrator_sum2": integrator_sum2,
+                           "Phase": phase_list,
+                           "Node": platform.uname().node,
+                           "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                           "Reward function": 'rew.rew_fun_dq0',
+                           "Trial number": n_trail,
+                           "Database name": folder_name,
+                           "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase),"
+                                   "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; "
+                                   "Reward = MRE, PI-Approch using AntiWindUp"
+                                   "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+    """
+    In new testenv not used, because then only the last episode is stored
+    """
+    # Add v-&i-measurements
+    test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                })
+    test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                })
+
+    # mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
diff --git a/experiments/hp_tune/experiment_vctrl_single_inv_TD3.py b/experiments/hp_tune/experiment_vctrl_single_inv_TD3.py
new file mode 100644
index 00000000..aa5cd210
--- /dev/null
+++ b/experiments/hp_tune/experiment_vctrl_single_inv_TD3.py
@@ -0,0 +1,315 @@
+import platform
+import time
+from collections import OrderedDict
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG, TD3
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.configTD3 import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# np.random.seed(0)
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+
+def experiment_fit_TD3(learning_rate, gamma, use_gamma_in_rew, weight_scale, alpha_relu_actor,
+                       batch_size,
+                       actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                       alpha_relu_critic,
+                       noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                       training_episode_length, buffer_size,  # learning_starts,
+                       tau, number_learning_steps, integrator_weight, antiwindup_weight,
+                       penalty_I_weight, penalty_P_weight,
+                       train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail,
+                       policy_delay, target_policy_noise, target_noise_clip
+                       ):
+    if node not in cfg['lea_vpn_nodes']:
+        # assume we are on pc2
+        log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/'
+    else:
+        log_path = f'{folder_name}/{n_trail}/'
+
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+                   reward_fun=rew.rew_fun_dq0,
+                   abort_reward=-1,
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                   )
+
+    env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                         recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                         antiwindup_weight=antiwindup_weight, gamma=gamma,
+                         penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                         t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                         number_learing_steps=number_learning_steps)
+
+    # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+    n_actions = env.action_space.shape[-1]
+    noise_var = noise_var  # 20#0.2
+    noise_theta = noise_theta  # 50 # stiffness of OU
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing,
+    #                                              sigma_min=noise_var * np.ones(n_actions) * noise_var_min,
+    #                                              mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+    #                                              sigma=noise_var * np.ones(n_actions), dt=net.ts)
+    print(optimizer)
+    if optimizer == 'SGD':
+        used_optimzer = th.optim.SGD
+    elif optimizer == 'RMSprop':
+        used_optimzer = th.optim.RMSprop
+    # elif optimizer == 'LBFGS':
+    # needs in step additional argument
+    #    used_optimzer = th.optim.LBFGS
+    else:
+        used_optimzer = th.optim.Adam
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers
+                                                                      , qf=[critic_hidden_size] * critic_number_layers),
+                         optimizer_class=used_optimzer)
+
+    model = TD3(policy='MlpPolicy',
+                env=env,
+                learning_rate=learning_rate,
+                buffer_size=buffer_size,
+                learning_starts=100,
+                batch_size=batch_size,
+                tau=tau,
+                gamma=gamma,
+                train_freq=(train_freq, train_freq_type),
+                gradient_steps=-1,
+                action_noise=action_noise,
+                optimize_memory_usage=False,
+                policy_delay=policy_delay,
+                target_policy_noise=target_policy_noise,
+                target_noise_clip=target_noise_clip,
+                tensorboard_log=log_path,
+                create_eval_env=False,
+                policy_kwargs=policy_kwargs,
+                verbose=0,
+                seed=None,
+                device="auto",
+                _init_setup_model=True
+                )
+
+    # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html
+    # adn scale weights and biases
+    param_dict_scaled = OrderedDict()
+    param_dict = model.get_parameters()['policy']
+    for param in param_dict.items():
+        # print(param)
+        param_dict_scaled[param[0]] = param[1] * weight_scale
+
+    new_param_dict = {'policy': param_dict_scaled,
+                      'actor.optimizer': model.get_parameters()['actor.optimizer'],
+                      'critic.optimizer': model.get_parameters()['critic.optimizer']}
+
+    # new_param_dict = OrderedDict()
+    """
+    new_param_dict["policy"] = param_dict_scaled
+    new_param_dict["actor.optimizer"] = model.get_parameters()['actor.optimizer']
+    new_param_dict["critic.optimizer"] = model.get_parameters()['critic.optimizer']
+    """
+
+    model.set_parameters(new_param_dict)
+
+    # print('alphaRelu fehlt noch!')
+
+    count = 0
+    for kk in range(actor_number_layers + 1):
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    # todo: Downscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+    # start training
+    model.learn(total_timesteps=number_learning_steps)
+
+    # Log Train-info data
+    train_data = {"Name": "After_Training",
+                  "Mean_eps_reward": env.reward_episode_mean,
+                  "Trial number": n_trail,
+                  "Database name": folder_name,
+                  "Sum_eps_reward": env.get_episode_rewards()
+                  }
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data)
+
+    model.save(log_path + f'model.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1',
+                        reward_fun=rew.rew_fun_dq0,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        )
+    env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                              recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                              gamma=1, penalty_I_weight=0, penalty_P_weight=0)
+    # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+    aP0 = []
+    aP1 = []
+    aP2 = []
+    aI0 = []
+    aI1 = []
+    aI2 = []
+    integrator_sum0 = []
+    integrator_sum1 = []
+    integrator_sum2 = []
+    va = []
+    vb = []
+    vc = []
+    v_ref0 = []
+    v_ref1 = []
+    v_ref2 = []
+    ia = []
+    ib = []
+    ic = []
+    R_load = []
+
+    for step in range(env_test.max_episode_steps):
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        phase_list.append(env_test.env.net.components[0].phase)
+        aP0.append(np.float64(action[0]))
+        aP1.append(np.float64(action[1]))
+        aP2.append(np.float64(action[2]))
+        aI0.append(np.float64(action[3]))
+        aI1.append(np.float64(action[4]))
+        aI2.append(np.float64(action[5]))
+        integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+        integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+        integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+
+        if step % 1000 == 0 and step != 0:
+            va.extend(env_test.history[env_test.viz_col_tmpls[0].vars[0]].copy().values.tolist())
+            vb.extend(env_test.history[env_test.viz_col_tmpls[0].vars[1]].copy().values.tolist())
+            vc.extend(env_test.history[env_test.viz_col_tmpls[0].vars[2]].copy().values.tolist())
+            v_ref0.extend(env_test.history[env_test.viz_col_tmpls[0].vars[3]].copy().values.tolist())
+            v_ref1.extend(env_test.history[env_test.viz_col_tmpls[0].vars[4]].copy().values.tolist())
+            v_ref2.extend(env_test.history[env_test.viz_col_tmpls[0].vars[5]].copy().values.tolist())
+            ia.extend(env_test.history[env_test.viz_col_tmpls[1].vars[0]].copy().values.tolist())
+            ib.extend(env_test.history[env_test.viz_col_tmpls[1].vars[1]].copy().values.tolist())
+            ic.extend(env_test.history[env_test.viz_col_tmpls[1].vars[2]].copy().values.tolist())
+            R_load.extend(env_test.history[env_test.viz_col_tmpls[2].vars[1]].copy().values.tolist())
+
+            env_test.close()
+            obs = env_test.reset()
+            phase_list.append(env_test.env.net.components[0].phase)
+
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "lc_capacitor1_v": va,
+                           "lc_capacitor2_v": vb,
+                           "lc_capacitor3_v": vc,
+                           "inverter1_v_ref_0": v_ref0,
+                           "inverter1_v_ref_1": v_ref1,
+                           "inverter1_v_ref_2": v_ref2,
+                           "lc_inductor1_i": ia,
+                           "lc_inductor2_i": ib,
+                           "lc_inductor3_i": ic,
+                           "r_load_resistor1_R": R_load,
+                           "ActionP0": aP0,
+                           "ActionP1": aP1,
+                           "ActionP2": aP2,
+                           "ActionI0": aI0,
+                           "ActionI1": aI1,
+                           "ActionI2": aI2,
+                           "integrator_sum0": integrator_sum0,
+                           "integrator_sum1": integrator_sum1,
+                           "integrator_sum2": integrator_sum2,
+                           "Phase": phase_list,
+                           "Node": platform.uname().node,
+                           "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                           "Reward function": 'rew.rew_fun_dq0',
+                           "Trial number": n_trail,
+                           "Database name": folder_name,
+                           "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase),"
+                                   "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; "
+                                   "Reward = MRE, PI-Approch using AntiWindUp"
+                                   "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+    """
+        In new testenv not used, because then only the last episode is stored
+    # Add v-&i-measurements
+    test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                })
+    test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                })
+    """
+
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
diff --git a/experiments/hp_tune/hp_tune_ddpg_objective.py b/experiments/hp_tune/hp_tune_ddpg_objective.py
new file mode 100644
index 00000000..96f0e688
--- /dev/null
+++ b/experiments/hp_tune/hp_tune_ddpg_objective.py
@@ -0,0 +1,470 @@
+import json
+import os
+import time
+
+import sqlalchemy
+from optuna.samplers import TPESampler
+
+
+os.environ['PGOPTIONS'] = '-c statement_timeout=1000'
+
+import optuna
+import platform
+import argparse
+import sshtunnel
+import numpy as np
+# np.random.seed(0)
+from experiments.hp_tune.util.config import cfg
+
+from experiments.hp_tune.experiment_vctrl_single_inv import mongo_recorder, experiment_fit_DDPG
+from experiments.hp_tune.util.scheduler import linear_schedule
+
+model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/'
+
+PC2_LOCAL_PORT2PSQL = 11999
+SERVER_LOCAL_PORT2PSQL = 6432
+DB_NAME = 'optuna'
+PC2_LOCAL_PORT2MYSQL = 11998
+SERVER_LOCAL_PORT2MYSQL = 3306
+STUDY_NAME = cfg['STUDY_NAME']  # 'DDPG_MRE_sqlite_PC2'
+
+node = platform.uname().node
+
+
+def ddpg_objective_fix_params(trial):
+    file_congfig = open(model_path +
+                        'PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json', )
+    trial_config = json.load(file_congfig)
+
+    number_learning_steps = 500000  # trial.suggest_int("number_learning_steps", 100000, 1000000)
+    # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5)
+    # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5)
+    penalty_I_weight = trial_config["penalty_I_weight"]  # trial.suggest_float("penalty_I_weight", 100e-6, 2)
+    penalty_P_weight = trial_config["penalty_P_weight"]  # trial.suggest_float("penalty_P_weight", 100e-6, 2)
+
+    penalty_I_decay_start = trial_config[
+        "penalty_I_decay_start"]  # trial.suggest_float("penalty_I_decay_start", 0.00001, 1)
+    penalty_P_decay_start = trial_config[
+        "penalty_P_decay_start"]  # trial.suggest_float("penalty_P_decay_start", 0.00001, 1)
+
+    t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps)
+    t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps)
+
+    integrator_weight = trial_config["integrator_weight"]  # trial.suggest_float("integrator_weight", 1 / 200, 2)
+    # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0)
+    # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3)
+    antiwindup_weight = trial_config["antiwindup_weight"]  # trial.suggest_float("antiwindup_weight", 0.00001, 1)
+
+    learning_rate = trial_config["learning_rate"]  # trial.suggest_loguniform("learning_rate", 1e-6, 1e-1)  # 0.0002#
+
+    lr_decay_start = trial_config[
+        "lr_decay_start"]  # trial.suggest_float("lr_decay_start", 0.00001, 1)  # 3000  # 0.2 * number_learning_steps?
+    lr_decay_duration = trial_config["lr_decay_duration"]  # trial.suggest_float("lr_decay_duration", 0.00001,
+    #  1)  # 3000  # 0.2 * number_learning_steps?
+    t_start = int(lr_decay_start * number_learning_steps)
+    t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                           number_learning_steps))
+    final_lr = trial_config["final_lr"]  # trial.suggest_float("final_lr", 0.00001, 1)
+
+    gamma = trial_config["gamma"]  # trial.suggest_float("gamma", 0.5, 0.9999)
+    weight_scale = trial_config["weight_scale"]  # trial.suggest_loguniform("weight_scale", 5e-5, 0.2)  # 0.005
+
+    bias_scale = trial_config["bias_scale"]  # trial.suggest_loguniform("bias_scale", 5e-4, 0.1)  # 0.005
+    alpha_relu_actor = trial_config[
+        "alpha_relu_actor"]  # trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5)  # 0.005
+    alpha_relu_critic = trial_config[
+        "alpha_relu_critic"]  # trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5)  # 0.005
+
+    batch_size = trial_config["batch_size"]  # trial.suggest_int("batch_size", 16, 1024)  # 128
+    buffer_size = trial_config[
+        "buffer_size"]  # trial.suggest_int("buffer_size", int(1e4), number_learning_steps)  # 128
+
+    actor_hidden_size = trial_config[
+        "actor_hidden_size"]  # trial.suggest_int("actor_hidden_size", 10, 200)  # 100  # Using LeakyReLU
+    actor_number_layers = trial_config["actor_number_layers"]  # trial.suggest_int("actor_number_layers", 1, 4)
+
+    critic_hidden_size = trial_config["critic_hidden_size"]  # trial.suggest_int("critic_hidden_size", 10, 300)  # 100
+    critic_number_layers = trial_config["critic_number_layers"]  # trial.suggest_int("critic_number_layers", 1, 4)
+
+    n_trail = str(trial.number)
+    use_gamma_in_rew = 1
+    noise_var = trial_config["noise_var"]  # trial.suggest_loguniform("noise_var", 0.01, 1)  # 2
+    # min var, action noise is reduced to (depends on noise_var)
+    noise_var_min = 0.0013  # trial.suggest_loguniform("noise_var_min", 0.0000001, 2)
+    # min var, action noise is reduced to (depends on training_episode_length)
+    noise_steps_annealing = int(
+        0.25 * number_learning_steps)  # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+    # number_learning_steps)
+    noise_theta = trial_config["noise_theta"]  # trial.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+    error_exponent = 0.5  # trial.suggest_loguniform("error_exponent", 0.001, 4)
+
+    training_episode_length = trial_config[
+        "training_episode_length"]  # trial.suggest_int("training_episode_length", 500, 5000)  # 128
+    # learning_starts = 0.32  # trial.suggest_loguniform("learning_starts", 0.1, 2)  # 128
+    tau = trial_config["tau"]  # trial.suggest_loguniform("tau", 0.0001, 0.3)  # 2
+
+    train_freq_type = "step"  # trial.suggest_categorical("train_freq_type", ["episode", "step"])
+    train_freq = trial_config["train_freq"]  # trial.suggest_int("train_freq", 1, 15000)
+
+    optimizer = trial_config[
+        "optimizer"]  # trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])  # , "LBFGS"])
+
+    number_past_vals = 5  # trial.suggest_int("number_past_vals", 0, 15)
+
+    learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr,
+                                    t_start=t_start,
+                                    t_end=t_end,
+                                    total_timesteps=number_learning_steps)
+
+    trail_config_mongo = {"Name": "Config",
+                          "Node": node,
+                          "Agent": "DDPG",
+                          "Number_learning_Steps": number_learning_steps,
+                          "Trial number": n_trail,
+                          "Database name": cfg['STUDY_NAME'],
+                          "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                          "Info": "AltesTestcase setting mit Integrator-Actor; 50 runs mit bestem HP-setting",
+                          }
+    trail_config_mongo.update(trial.params)
+    # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo)
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo)
+
+    loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               alpha_relu_actor,
+                               batch_size,
+                               actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                               alpha_relu_critic,
+                               noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                               training_episode_length, buffer_size,  # learning_starts,
+                               tau, number_learning_steps, integrator_weight,
+                               integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight,
+                               train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer,
+                               n_trail, number_past_vals)
+
+    return loss
+
+def ddpg_objective(trial):
+    number_learning_steps = 500000  # trial.suggest_int("number_learning_steps", 100000, 1000000)
+    # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5)
+    # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5)
+    penalty_I_weight = 1  # trial.suggest_float("penalty_I_weight", 100e-6, 2)
+    penalty_P_weight = 1  # trial.suggest_float("penalty_P_weight", 100e-6, 2)
+
+    penalty_I_decay_start = 0.5  # trial.suggest_float("penalty_I_decay_start", 0.00001, 1)
+    penalty_P_decay_start = 0.5  # trial.suggest_float("penalty_P_decay_start", 0.00001, 1)
+
+    t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps)
+    t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps)
+
+    integrator_weight = 0.1  # trial.suggest_float("integrator_weight", 1 / 200, 0.5)
+    # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0)
+    # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3)
+    antiwindup_weight = 0.1  # trial.suggest_float("antiwindup_weight", 0.00001, 1)
+
+    learning_rate = trial.suggest_loguniform("learning_rate", 1e-7, 1e-2)  # 0.0002#
+
+    lr_decay_start = trial.suggest_float("lr_decay_start", 0.00001, 1)  # 3000  # 0.2 * number_learning_steps?
+    lr_decay_duration = trial.suggest_float("lr_decay_duration", 0.00001,
+                                            1)  # 3000  # 0.2 * number_learning_steps?
+    t_start = int(lr_decay_start * number_learning_steps)
+    t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                           number_learning_steps))
+    final_lr = trial.suggest_float("final_lr", 0.00001, 1)
+
+    gamma = trial.suggest_float("gamma", 0.8, 0.9999)
+    weight_scale = trial.suggest_loguniform("weight_scale", 5e-5, 0.2)  # 0.005
+
+    bias_scale = trial.suggest_loguniform("bias_scale", 0.01, 0.1)  # 0.005
+    alpha_relu_actor = trial.suggest_loguniform("alpha_relu_actor", 0.001, 0.5)  # 0.005
+    alpha_relu_critic = trial.suggest_loguniform("alpha_relu_critic", 0.001, 0.5)  # 0.005
+
+    batch_size = trial.suggest_int("batch_size", 16, 512)  # 128
+    buffer_size = trial.suggest_int("buffer_size", int(20e4), number_learning_steps)  # 128
+
+    actor_hidden_size = trial.suggest_int("actor_hidden_size", 10, 75)  # 100  # Using LeakyReLU
+    actor_number_layers = trial.suggest_int("actor_number_layers", 1, 3)
+
+    critic_hidden_size = trial.suggest_int("critic_hidden_size", 10, 300)  # 100
+    critic_number_layers = trial.suggest_int("critic_number_layers", 1, 4)
+
+    n_trail = str(trial.number)
+    use_gamma_in_rew = 1
+    noise_var = trial.suggest_loguniform("noise_var", 0.01, 1)  # 2
+    # min var, action noise is reduced to (depends on noise_var)
+    noise_var_min = 0.0013  # trial.suggest_loguniform("noise_var_min", 0.0000001, 2)
+    # min var, action noise is reduced to (depends on training_episode_length)
+    noise_steps_annealing = int(
+        0.25 * number_learning_steps)  # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+    # number_learning_steps)
+    noise_theta = trial.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+    error_exponent = 0.5  # 0.5  # trial.suggest_loguniform("error_exponent", 0.001, 4)
+
+    training_episode_length = trial.suggest_int("training_episode_length", 1000, 4000)  # 128
+    # learning_starts = 0.32  # trial.suggest_loguniform("learning_starts", 0.1, 2)  # 128
+    tau = trial.suggest_loguniform("tau", 0.0001, 0.3)  # 2
+
+    train_freq_type = "step"  # trial.suggest_categorical("train_freq_type", ["episode", "step"])
+    train_freq = trial.suggest_int("train_freq", 1, 5000)
+
+    optimizer = trial.suggest_categorical("optimizer", ["Adam"])  # ["Adam", "SGD", "RMSprop"])  # , "LBFGS"])
+
+    learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr,
+                                    t_start=t_start,
+                                    t_end=t_end,
+                                    total_timesteps=number_learning_steps)
+    number_past_vals = trial.suggest_int("number_past_vals", 0, 15)
+
+    trail_config_mongo = {"Name": "Config",
+                          "Node": node,
+                          "Agent": "DDPG",
+                          "Number_learning_Steps": number_learning_steps,
+                          "Trial number": n_trail,
+                          "Database name": cfg['STUDY_NAME'],
+                          "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                          "Optimierer/ Setting stuff": "DDPG HPO ohne Integrator, alle HPs fuer den I-Anteil "
+                                                       "wurden daher fix gesetzt. Vgl. zu DDPG+I-Anteil"
+                          }
+    trail_config_mongo.update(trial.params)
+    # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo)
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo)
+
+    loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale,
+                               alpha_relu_actor,
+                               batch_size,
+                               actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                               alpha_relu_critic,
+                               noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                               training_episode_length, buffer_size,  # learning_starts,
+                               tau, number_learning_steps, integrator_weight,
+                               integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight,
+                               train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer,
+                               n_trail, number_past_vals)
+
+    return loss
+
+
+def get_storage(url, storage_kws):
+    successfull = False
+    retry_counter = 0
+
+    while not successfull:
+        try:
+            storage = optuna.storages.RDBStorage(
+                url=url, **storage_kws)
+            successfull = True
+        except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.DatabaseError) as e:
+            wait_time = np.random.randint(60, 300)
+            retry_counter += 1
+            if retry_counter > 10:
+                print('Stopped after 10 connection attempts!')
+                raise e
+            print(f'Could not connect, retry in {wait_time} s')
+            time.sleep(wait_time)
+
+    return storage
+
+
+def optuna_optimize_mysql_lea35(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=1, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+    if node in ('LEA-WORK35', 'fe1'):
+        if node == 'fe1':
+            port = PC2_LOCAL_PORT2MYSQL
+        else:
+            port = SERVER_LOCAL_PORT2MYSQL
+
+        storage = get_storage(f'mysql://{optuna_creds}@localhost:{port}/{DB_NAME}')
+
+        study = optuna.create_study(
+            storage=storage,
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=sampler, study_name=study_name,
+            load_if_exists=True,
+            direction='maximize')
+        study.optimize(objective, n_trials=n_trials)
+    else:
+        if node in cfg['lea_vpn_nodes']:
+            # we are in LEA VPN
+            server_name = 'lea38'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               SERVER_LOCAL_PORT2MYSQL)}
+        else:
+            # assume we are on a PC2 compute node
+            server_name = 'fe.pc2.uni-paderborn.de'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               PC2_LOCAL_PORT2MYSQL),
+                       'ssh_username': 'webbah'}
+        with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun:
+
+            study = optuna.create_study(
+                storage=f"mysql+pymysql://{optuna_creds}@127.0.0.1:{tun.local_bind_port}/{DB_NAME}",
+                sampler=sampler, study_name=study_name,
+                load_if_exists=True,
+                direction='maximize')
+            study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize_mysql(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=1, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+    study = optuna.create_study(study_name=study_name,
+                                direction='maximize',
+                                storage=f"mysql://{optuna_creds}@localhost/{DB_NAME}",
+                                load_if_exists=True,
+                                sampler=sampler
+                                )
+    study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize_sqlite(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=50, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 100
+
+    print(n_trials)
+    print('Local optimization is run but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in cfg['lea_vpn_nodes']:
+        optuna_path = './optuna/'
+    else:
+        # assume we are on not of pc2 -> store to project folder
+        optuna_path = '/scratch/hpc-prf-reinfl/weber/OMG/optuna/'
+
+    os.makedirs(optuna_path, exist_ok=True)
+
+    study = optuna.create_study(study_name=study_name,
+                                direction='maximize',
+                                storage=f'sqlite:///{optuna_path}optuna.sqlite',
+                                load_if_exists=True,
+                                sampler=sampler
+                                )
+    study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=50, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_psql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_psql'
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+    # set trial to failed if it seems dead for 20 minutes
+    storage_kws = dict(engine_kwargs={"pool_timeout": 600})
+    if node in ('lea-cyberdyne', 'fe1'):
+        if node == 'fe1':
+            port = PC2_LOCAL_PORT2PSQL
+        else:
+            port = SERVER_LOCAL_PORT2PSQL
+
+        storage = get_storage(f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', storage_kws=storage_kws)
+
+        study = optuna.create_study(
+            storage=storage,
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=sampler, study_name=study_name,
+            load_if_exists=True,
+            direction='maximize')
+        study.optimize(objective, n_trials=n_trials)
+    else:
+        if node in cfg['lea_vpn_nodes']:
+            # we are in LEA VPN
+            server_name = 'lea38'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               SERVER_LOCAL_PORT2PSQL)}
+        else:
+            # assume we are on a PC2 compute node
+            server_name = 'fe.pc2.uni-paderborn.de'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               PC2_LOCAL_PORT2PSQL),
+                       'ssh_username': 'webbah'}
+        with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun:
+
+            storage = get_storage(url=f'postgresql://{optuna_creds}'
+                                      f'@localhost:{tun.local_bind_port}/{DB_NAME}', storage_kws=storage_kws)
+
+            # storage = optuna.storages.RDBStorage(
+            #    url=f'postgresql://{optuna_creds}'
+            #        f'@localhost:{tun.local_bind_port}/{DB_NAME}',
+            #    **storage_kws)
+
+            study = optuna.create_study(
+                storage=storage,
+                # storage=f'postgresql://{optuna_creds}'
+                #        f'@localhost:{tun.local_bind_port}/{DB_NAME}',
+                sampler=sampler, study_name=study_name,
+                load_if_exists=True,
+                direction='maximize')
+            study.optimize(objective, n_trials=n_trials)
+
+
+if __name__ == "__main__":
+    # learning_rate = list(itertools.chain(*[[1e-9] * 1]))
+    # search_space = {'learning_rate': learning_rate}  # , 'number_learning_steps': number_learning_steps}
+
+    TPE_sampler = TPESampler(n_startup_trials=400)  # , constant_liar=True)
+    # TPE_sampler = TPESampler(n_startup_trials=2500)  # , constant_liar=True)
+
+    # optuna_optimize_mysql_lea35(ddpg_objective, study_name=STUDY_NAME, sampler=TPE_sampler)
+
+    optuna_optimize_mysql_lea35(ddpg_objective_fix_params, study_name=STUDY_NAME, sampler=TPE_sampler)
+    # optuna_optimize_sqlite(ddpg_objective_fix_params, study_name=STUDY_NAME, sampler=TPE_sampler)
+
+    # optuna_optimize(ddpg_objective, study_name=STUDY_NAME,
+    # sampler=TPE_sampler)  #, sampler=optuna.samplers.GridSampler(search_space))
diff --git a/experiments/hp_tune/hp_tune_td3_objective.py b/experiments/hp_tune/hp_tune_td3_objective.py
new file mode 100644
index 00000000..d7db77eb
--- /dev/null
+++ b/experiments/hp_tune/hp_tune_td3_objective.py
@@ -0,0 +1,360 @@
+import os
+import time
+
+import sqlalchemy
+from optuna.samplers import TPESampler
+
+from experiments.hp_tune.experiment_vctrl_single_inv_TD3 import experiment_fit_TD3
+
+os.environ['PGOPTIONS'] = '-c statement_timeout=1000'
+
+import optuna
+import platform
+import argparse
+import sshtunnel
+import numpy as np
+# np.random.seed(0)
+from experiments.hp_tune.util.configTD3 import cfg
+
+# from experiments.hp_tune.experiment_vctrl_single_inv import experiment_fit_DDPG, mongo_recorder
+from experiments.hp_tune.experiment_vctrl_single_inv_TD3 import mongo_recorder
+from experiments.hp_tune.util.scheduler import linear_schedule
+
+PC2_LOCAL_PORT2PSQL = 11999
+SERVER_LOCAL_PORT2PSQL = 6432
+DB_NAME = 'optuna'
+PC2_LOCAL_PORT2MYSQL = 11998
+SERVER_LOCAL_PORT2MYSQL = 3306
+STUDY_NAME = cfg['STUDY_NAME']  # 'DDPG_MRE_sqlite_PC2'
+
+node = platform.uname().node
+
+
+def td3_objective(trial):
+    number_learning_steps = 500000  # trial.suggest_int("number_learning_steps", 100000, 1000000)
+    # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5)
+    # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5)
+    penalty_I_weight = trial.suggest_float("penalty_I_weight", 100e-6, 2)
+    penalty_P_weight = trial.suggest_float("penalty_P_weight", 100e-6, 2)
+
+    penalty_I_decay_start = trial.suggest_float("penalty_I_decay_start", 0.00001, 1)
+    penalty_P_decay_start = trial.suggest_float("penalty_P_decay_start", 0.00001, 1)
+
+    t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps)
+    t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps)
+
+    integrator_weight = trial.suggest_float("integrator_weight", 1 / 200, 2)
+    # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0)
+    # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3)
+    antiwindup_weight = trial.suggest_float("antiwindup_weight", 0.00001, 1)
+
+    learning_rate = trial.suggest_loguniform("learning_rate", 1e-6, 1e-1)  # 0.0002#
+
+    lr_decay_start = trial.suggest_float("lr_decay_start", 0.00001, 1)  # 3000  # 0.2 * number_learning_steps?
+    lr_decay_duration = trial.suggest_float("lr_decay_duration", 0.00001,
+                                            1)  # 3000  # 0.2 * number_learning_steps?
+    t_start = int(lr_decay_start * number_learning_steps)
+    t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                           number_learning_steps))
+    final_lr = trial.suggest_float("final_lr", 0.00001, 1)
+
+    gamma = trial.suggest_float("gamma", 0.5, 0.9999)
+    weight_scale = trial.suggest_loguniform("weight_scale", 5e-5, 0.2)  # 0.005
+
+    # bias_scale = trial.suggest_loguniform("bias_scale", 5e-4, 0.1)  # 0.005
+    alpha_relu_actor = trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5)  # 0.005
+    alpha_relu_critic = trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5)  # 0.005
+
+    batch_size = trial.suggest_int("batch_size", 16, 1024)  # 128
+    buffer_size = trial.suggest_int("buffer_size", int(1e4), number_learning_steps)  # 128
+
+    actor_hidden_size = trial.suggest_int("actor_hidden_size", 10, 250)  # 100  # Using LeakyReLU
+    actor_number_layers = trial.suggest_int("actor_number_layers", 1, 5)
+
+    critic_hidden_size = trial.suggest_int("critic_hidden_size", 10, 300)  # 100
+    critic_number_layers = trial.suggest_int("critic_number_layers", 1, 4)
+
+    n_trail = str(trial.number)
+    use_gamma_in_rew = 1
+    noise_var = trial.suggest_loguniform("noise_var", 0.01, 1)  # 2
+    # min var, action noise is reduced to (depends on noise_var)
+    noise_var_min = 0.0013  # trial.suggest_loguniform("noise_var_min", 0.0000001, 2)
+    # min var, action noise is reduced to (depends on training_episode_length)
+    noise_steps_annealing = int(
+        0.25 * number_learning_steps)  # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps),
+    # number_learning_steps)
+    noise_theta = trial.suggest_loguniform("noise_theta", 1, 50)  # 25  # stiffness of OU
+    error_exponent = 0.5  # trial.suggest_loguniform("error_exponent", 0.001, 4)
+
+    training_episode_length = trial.suggest_int("training_episode_length", 500, 5000)  # 128
+    # learning_starts = 0.32  # trial.suggest_loguniform("learning_starts", 0.1, 2)  # 128
+    tau = trial.suggest_loguniform("tau", 0.0001, 0.2)  # 2
+
+    train_freq_type = "step"  # trial.suggest_categorical("train_freq_type", ["episode", "step"])
+    train_freq = trial.suggest_int("train_freq", 1, 15000)
+
+    optimizer = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])  # , "LBFGS"])
+
+    learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr,
+                                    t_start=t_start,
+                                    t_end=t_end,
+                                    total_timesteps=number_learning_steps)
+
+    policy_delay = trial.suggest_int("policy_delay", 1, 50)
+    target_policy_noise = trial.suggest_float("target_policy_noise", 0.0001, 1)
+    target_noise_clip = trial.suggest_float("target_noise_clip", 0.0001, 2)
+
+    trail_config_mongo = {"Name": "Config",
+                          "Node": node,
+                          "Agent": "TD3",
+                          "Number_learning_Steps": number_learning_steps,
+                          "Trial number": n_trail,
+                          "Database name": cfg['STUDY_NAME'],
+                          "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                          "Optimierer/ Setting stuff": "Kein Const_liar_feature, hoehere Grenzen, INtergrator Gewicht als HP,"
+                                                       "Actionspace = 6, da P und I-Anteil seperate ausgänge und im wrapper addiert werden"
+                                                       "Integratorzustand+used_P_Action (je um einen verzoegert) wird mit als feature uebergeben"
+                                                       "Penalties fuer action_P und action_P"
+                                                       "Mehr HPs: trainfreq, batch/buffer_size, a_relu ",
+                          "additionalInfo": "Long Holiday run"
+                          }
+    trail_config_mongo.update(trial.params)
+    # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo)
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo)
+
+    loss = experiment_fit_TD3(learning_rate, gamma, use_gamma_in_rew, weight_scale, alpha_relu_actor,
+                              # loss = experiment_fit_Custom_TD3(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor,
+                              batch_size,
+                              actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers,
+                              alpha_relu_critic,
+                              noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent,
+                              training_episode_length, buffer_size,  # learning_starts,
+                              tau, number_learning_steps, integrator_weight,
+                              integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight,
+                              train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer,
+                              n_trail,
+                              policy_delay, target_policy_noise, target_noise_clip)
+
+    return loss
+
+
+def get_storage(url, storage_kws):
+    successfull = False
+    retry_counter = 0
+
+    while not successfull:
+        try:
+            storage = optuna.storages.RDBStorage(
+                url=url, **storage_kws)
+            successfull = True
+        except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.DatabaseError) as e:
+            wait_time = np.random.randint(60, 300)
+            retry_counter += 1
+            if retry_counter > 10:
+                print('Stopped after 10 connection attempts!')
+                raise e
+            print(f'Could not connect, retry in {wait_time} s')
+            time.sleep(wait_time)
+
+    return storage
+
+
+def optuna_optimize_mysql_lea35(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=1, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+    if node in ('LEA-WORK35', 'fe1'):
+        if node == 'fe1':
+            port = PC2_LOCAL_PORT2MYSQL
+        else:
+            port = SERVER_LOCAL_PORT2MYSQL
+
+        storage = get_storage(f'mysql://{optuna_creds}@localhost:{port}/{DB_NAME}')
+
+        study = optuna.create_study(
+            storage=storage,
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=sampler, study_name=study_name,
+            load_if_exists=True,
+            direction='maximize')
+        study.optimize(objective, n_trials=n_trials)
+    else:
+        if node in cfg['lea_vpn_nodes']:
+            # we are in LEA VPN
+            server_name = 'lea38'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               SERVER_LOCAL_PORT2MYSQL)}
+        else:
+            # assume we are on a PC2 compute node
+            server_name = 'fe.pc2.uni-paderborn.de'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               PC2_LOCAL_PORT2MYSQL),
+                       'ssh_username': 'webbah'}
+        with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun:
+
+            study = optuna.create_study(
+                storage=f"mysql+pymysql://{optuna_creds}@127.0.0.1:{tun.local_bind_port}/{DB_NAME}",
+                sampler=sampler, study_name=study_name,
+                load_if_exists=True,
+                direction='maximize')
+            study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize_mysql(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=1, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+    study = optuna.create_study(study_name=study_name,
+                                direction='maximize',
+                                storage=f"mysql://{optuna_creds}@localhost/{DB_NAME}",
+                                load_if_exists=True,
+                                sampler=sampler
+                                )
+    study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize_sqlite(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=50, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+    print('Local optimization is run but measurement data is logged to MongoDB on Cyberdyne!')
+    print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in '
+          'the same MongoDB Collection!!!')
+    print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to '
+          'safely store it to ssh port for cyberdyne connection to mongodb')
+
+    if node in cfg['lea_vpn_nodes']:
+        optuna_path = './optuna/'
+    else:
+        # assume we are on not of pc2 -> store to project folder
+        optuna_path = '/scratch/hpc-prf-reinfl/weber/OMG/optuna/'
+
+    os.makedirs(optuna_path, exist_ok=True)
+
+    study = optuna.create_study(study_name=study_name,
+                                direction='maximize',
+                                storage=f'sqlite:///{optuna_path}optuna.sqlite',
+                                load_if_exists=True,
+                                sampler=sampler
+                                )
+    study.optimize(objective, n_trials=n_trials)
+
+
+def optuna_optimize(objective, sampler=None, study_name='dummy'):
+    parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl')
+    parser.add_argument('-n', '--n_trials', default=50, required=False,
+                        help='number of trials to execute', type=int)
+    args = parser.parse_args()
+    n_trials = args.n_trials or 10
+
+    print(n_trials)
+
+    if node in ('lea-picard', 'lea-barclay'):
+        creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_psql.txt'
+    else:
+        # read db credentials
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_psql'
+    with open(creds_path, 'r') as f:
+        optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+    # set trial to failed if it seems dead for 20 minutes
+    storage_kws = dict(engine_kwargs={"pool_timeout": 600})
+    if node in ('lea-cyberdyne', 'fe1'):
+        if node == 'fe1':
+            port = PC2_LOCAL_PORT2PSQL
+        else:
+            port = SERVER_LOCAL_PORT2PSQL
+
+        storage = get_storage(f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', storage_kws=storage_kws)
+
+        study = optuna.create_study(
+            storage=storage,
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=sampler, study_name=study_name,
+            load_if_exists=True,
+            direction='maximize')
+        study.optimize(objective, n_trials=n_trials)
+    else:
+        if node in cfg['lea_vpn_nodes']:
+            # we are in LEA VPN
+            server_name = 'lea38'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               SERVER_LOCAL_PORT2PSQL)}
+        else:
+            # assume we are on a PC2 compute node
+            server_name = 'fe.pc2.uni-paderborn.de'
+            tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                               PC2_LOCAL_PORT2PSQL),
+                       'ssh_username': 'webbah'}
+        with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun:
+
+            storage = get_storage(url=f'postgresql://{optuna_creds}'
+                                      f'@localhost:{tun.local_bind_port}/{DB_NAME}', storage_kws=storage_kws)
+
+            # storage = optuna.storages.RDBStorage(
+            #    url=f'postgresql://{optuna_creds}'
+            #        f'@localhost:{tun.local_bind_port}/{DB_NAME}',
+            #    **storage_kws)
+
+            study = optuna.create_study(
+                storage=storage,
+                # storage=f'postgresql://{optuna_creds}'
+                #        f'@localhost:{tun.local_bind_port}/{DB_NAME}',
+                sampler=sampler, study_name=study_name,
+                load_if_exists=True,
+                direction='maximize')
+            study.optimize(objective, n_trials=n_trials)
+
+
+if __name__ == "__main__":
+    # learning_rate = list(itertools.chain(*[[1e-9] * 1]))
+    # search_space = {'learning_rate': learning_rate}  # , 'number_learning_steps': number_learning_steps}
+
+    TPE_sampler = TPESampler(n_startup_trials=2500)  # , constant_liar=True)
+
+    optuna_optimize_mysql_lea35(td3_objective, study_name=STUDY_NAME, sampler=TPE_sampler)
+
+    # optuna_optimize_sqlite(td3_objective, study_name=STUDY_NAME, sampler=TPE_sampler)
+    # optuna_optimize(td3_objective, study_name=STUDY_NAME,
+    # sampler=TPE_sampler)  #, sampler=optuna.samplers.GridSampler(search_space))
diff --git a/experiments/hp_tune/pc2_run_allowed_workers_once_ddpg.py b/experiments/hp_tune/pc2_run_allowed_workers_once_ddpg.py
new file mode 100644
index 00000000..12a1ccdd
--- /dev/null
+++ b/experiments/hp_tune/pc2_run_allowed_workers_once_ddpg.py
@@ -0,0 +1,81 @@
+"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass
+the allowed cpu core limit"""
+
+import os
+import pathlib
+import time
+import uuid
+
+from experiments.hp_tune.util import pc2
+# config
+from experiments.hp_tune.util.config import cfg
+
+USER = os.getenv('USER')
+ALLOWED_MAX_CPU_CORES = 512
+
+# resources request
+job_resource_plan = {
+    'duration': 24,  # in hours
+    'ncpus': 2,
+    'memory': 12,
+    'vmemory': 16,
+}
+
+MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus']
+STUDY_NAME = cfg['STUDY_NAME']
+NUMBER_INTERATIONS = 1
+
+def main():
+    print('Start slavedriving loop..')
+    print('Will start MAX_WORKERS and terminate.')
+    old_ccsinfo_counts = None
+    for _ in range(MAX_WORKERS):
+        job_files_path = pathlib.Path(
+            f"/scratch/hpc-prf-reinfl/weber/OMG/ccs_job_files/{STUDY_NAME}")  # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet
+        job_files_path.mkdir(parents=False, exist_ok=True)
+
+        # read ccsinfo
+        ccsinfo = pc2.get_ccsinfo(USER)
+        ccsinfo_state_counts = ccsinfo.state.value_counts()
+        ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0)
+        ccs_planned = ccsinfo_state_counts.get('PLANNED', 0)
+        total_busy = ccs_running + ccs_planned
+        if not ccsinfo_state_counts.equals(old_ccsinfo_counts):
+            print("\n## ccs summary ##")
+            print(f"Running: {ccs_running}")
+            print(f"Planned : {ccs_planned}")
+            print(f"Total busy workers (ccs): {total_busy}")
+
+        if total_busy < MAX_WORKERS:
+            #  call workers to work
+            # n_workers = MAX_WORKERS - total_busy
+            # print(f'Start {n_workers} workers:')
+            # for w in range(n_workers):
+            jobid = str(uuid.uuid4()).split('-')[0]
+            cluster = "oculus"
+            job_name = job_files_path / f"pc2_job_{jobid}.sh"
+            res_plan = pc2.calculate_resources(**job_resource_plan)
+
+            execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \
+                             "python $HOME/openmodelica-microgrid-gym/experiments/hp_tune/hp_tune_ddpg_objective.py -n 3"
+
+            print(f'Start job {jobid} ..')
+            pc2.create_n_run_script(
+                job_name,
+                pc2.build_shell_script_lines(job_files_path, cluster,
+                                             job_name, res_plan,
+                                             execution_line),
+                dry=False)
+
+            print('sleep 10s for better DB interaction', end='\r')
+            time.sleep(10)
+
+        old_ccsinfo_counts = ccsinfo_state_counts
+
+        # print('sleep..', end='\r')
+        # time.sleep(120)
+    print('Finished, need resatart to schedule again!..', end='\r')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/experiments/hp_tune/pc2_schedule_ddpg.py b/experiments/hp_tune/pc2_schedule_ddpg.py
new file mode 100644
index 00000000..d5d691ac
--- /dev/null
+++ b/experiments/hp_tune/pc2_schedule_ddpg.py
@@ -0,0 +1,105 @@
+"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass
+the allowed cpu core limit"""
+
+import os
+import pathlib
+import uuid
+import time
+
+import optuna
+from optuna.samplers import TPESampler
+
+from experiments.hp_tune.util import pc2
+from experiments.hp_tune.util.config import cfg
+
+# config
+USER = os.getenv('USER')
+ALLOWED_MAX_CPU_CORES = 500  # 512
+STUDY_NAME = cfg['STUDY_NAME']
+DB_NAME = 'optuna'
+# resources request
+job_resource_plan = {
+    'duration': 24,  # in hours
+    'ncpus': 2,
+    'memory': 12,
+    'vmemory': 16,
+}
+
+MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus']
+
+PC2_LOCAL_PORT2MYSQL = 11998
+SERVER_LOCAL_PORT2MYSQL = 3306
+
+
+def main():
+    started_workers = 0
+    print('Start slavedriving loop..')
+    old_ccsinfo_counts = None
+    while True:
+
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+        with open(creds_path, 'r') as f:
+            optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+        study = optuna.create_study(
+            storage=f'mysql+pymysql://{optuna_creds}@localhost:{PC2_LOCAL_PORT2MYSQL}/{DB_NAME}',
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=TPESampler(n_startup_trials=2500), study_name=STUDY_NAME,
+            load_if_exists=True,
+            direction='maximize')
+
+        complete_trials = len([t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE])
+        print(f'Completed trials in study: {complete_trials}')
+        if complete_trials > 12000:
+            print('Maximal completed trials reached - STOPPING')
+            break
+
+        job_files_path = pathlib.Path(
+            f"/scratch/hpc-prf-reinfl/weber/OMG/ccs_job_files/{STUDY_NAME}")  # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet
+        job_files_path.mkdir(parents=False, exist_ok=True)
+
+        # read ccsinfo
+        ccsinfo = pc2.get_ccsinfo(USER)
+        ccsinfo_state_counts = ccsinfo.state.value_counts()
+        ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0)
+        ccs_planned = ccsinfo_state_counts.get('PLANNED', 0)
+        total_busy = ccs_running + ccs_planned
+        if not ccsinfo_state_counts.equals(old_ccsinfo_counts):
+            print("\n## ccs summary ##")
+            print(f"Running: {ccs_running}")
+            print(f"Planned : {ccs_planned}")
+            print(f"Total busy workers (ccs): {total_busy}")
+
+        if total_busy < MAX_WORKERS:
+            #  call workers to work
+            n_workers = MAX_WORKERS - total_busy
+            print(f'Start {n_workers} workers:')
+            for w in range(n_workers):
+                started_workers += 1
+                jobid = str(uuid.uuid4()).split('-')[0]
+                cluster = "oculus"
+                job_name = job_files_path / f"pc2_job_{jobid}.sh"
+                res_plan = pc2.calculate_resources(**job_resource_plan)
+
+                execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \
+                                 "python $HOME/openmodelica-microgrid-gym/experiments/hp_tune/hp_tune_ddpg_objective.py -n 1"
+
+                print(f'Start job {jobid} ..')
+                pc2.create_n_run_script(
+                    job_name,
+                    pc2.build_shell_script_lines(job_files_path, cluster,
+                                                 job_name, res_plan,
+                                                 execution_line),
+                    dry=False)
+                print('sleep 10s for better DB interaction', end='\r')
+                time.sleep(10)
+
+        old_ccsinfo_counts = ccsinfo_state_counts
+
+        print('sleep..', end='\r')
+        time.sleep(300)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/experiments/hp_tune/pc2_schedule_td3.py b/experiments/hp_tune/pc2_schedule_td3.py
new file mode 100644
index 00000000..0fb94108
--- /dev/null
+++ b/experiments/hp_tune/pc2_schedule_td3.py
@@ -0,0 +1,102 @@
+"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass
+the allowed cpu core limit"""
+
+import os
+import pathlib
+import uuid
+import time
+
+import optuna
+from optuna.samplers import TPESampler
+
+from experiments.hp_tune.util import pc2
+from experiments.hp_tune.util.configTD3 import cfg
+
+# config
+USER = os.getenv('USER')
+ALLOWED_MAX_CPU_CORES = 512
+STUDY_NAME = cfg['STUDY_NAME']
+DB_NAME = 'optuna'
+# resources request
+job_resource_plan = {
+    'duration': 24,  # in hours
+    'ncpus': 2,
+    'memory': 12,
+    'vmemory': 16,
+}
+
+MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus']
+PC2_LOCAL_PORT2MYSQL = 11998
+
+def main():
+    started_workers = 0
+    print('Start slavedriving loop..')
+    old_ccsinfo_counts = None
+    while True:
+
+        creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql'
+
+        with open(creds_path, 'r') as f:
+            optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()])
+
+        study = optuna.create_study(
+            storage=f'mysql+pymysql://{optuna_creds}@localhost:{PC2_LOCAL_PORT2MYSQL}/{DB_NAME}',
+            # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}',
+            sampler=TPESampler(n_startup_trials=2500), study_name=STUDY_NAME,
+            load_if_exists=True,
+            direction='maximize')
+
+        complete_trials = len([t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE])
+        print(f'Completed trials in study: {complete_trials}')
+        if complete_trials > 12000:
+            print('Maximal completed trials reached - STOPPING')
+            break
+
+        job_files_path = pathlib.Path(
+            f"/scratch/hpc-prf-reinfl/weber/OMG/ccs_job_files/{STUDY_NAME}")  # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet
+        job_files_path.mkdir(parents=False, exist_ok=True)
+
+        # read ccsinfo
+        ccsinfo = pc2.get_ccsinfo(USER)
+        ccsinfo_state_counts = ccsinfo.state.value_counts()
+        ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0)
+        ccs_planned = ccsinfo_state_counts.get('PLANNED', 0)
+        total_busy = ccs_running + ccs_planned
+        if not ccsinfo_state_counts.equals(old_ccsinfo_counts):
+            print("\n## ccs summary ##")
+            print(f"Running: {ccs_running}")
+            print(f"Planned : {ccs_planned}")
+            print(f"Total busy workers (ccs): {total_busy}")
+
+        if total_busy < MAX_WORKERS:
+            #  call workers to work
+            n_workers = MAX_WORKERS - total_busy
+            print(f'Start {n_workers} workers:')
+            for w in range(n_workers):
+                started_workers += 1
+                jobid = str(uuid.uuid4()).split('-')[0]
+                cluster = "oculus"
+                job_name = job_files_path / f"pc2_job_{jobid}.sh"
+                res_plan = pc2.calculate_resources(**job_resource_plan)
+
+                execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \
+                                 "python $HOME/openmodelica-microgrid-gym/experiments/hp_tune/hp_tune_td3_objective.py -n 1"
+
+                print(f'Start job {jobid} ..')
+                pc2.create_n_run_script(
+                    job_name,
+                    pc2.build_shell_script_lines(job_files_path, cluster,
+                                                 job_name, res_plan,
+                                                 execution_line),
+                    dry=False)
+                print('sleep 10s for better DB interaction', end='\r')
+                time.sleep(10)
+
+        old_ccsinfo_counts = ccsinfo_state_counts
+
+        print('sleep..', end='\r')
+        time.sleep(300)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/experiments/hp_tune/policies/split_actor.py b/experiments/hp_tune/policies/split_actor.py
new file mode 100644
index 00000000..42a0ffb4
--- /dev/null
+++ b/experiments/hp_tune/policies/split_actor.py
@@ -0,0 +1,144 @@
+from typing import Optional, Tuple, List, Type
+
+import gym
+import torch as th
+from torch import nn
+
+from stable_baselines3.common.policies import register_policy, BaseModel
+from stable_baselines3.common.preprocessing import get_action_dim
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+from stable_baselines3.td3.policies import TD3Policy, Actor
+
+
+def mlp(sizes, activation, output_activation=None):
+    """
+    Defines a multi layer perceptron using pytorch layers and activation funtions
+    """
+    layers = []
+    for j in range(len(sizes) - 1):
+        act = activation if j < len(sizes) - 2 else output_activation
+        if act is not None:
+            layers += [nn.Linear(sizes[j], sizes[j + 1]), act()]
+        else:
+            layers += [nn.Linear(sizes[j], sizes[j + 1])]
+    # layers.append(nn.Tanh())
+    return layers
+
+
+class CustomActor(Actor):
+    """
+    Actor network (policy) for TD3.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(CustomActor, self).__init__(*args, **kwargs)
+        # Define custom network with Dropout
+        # WARNING: it must end with a tanh activation to squash the output
+        # self.mu = nn.Sequential(*mlp([20, 10, 5, 6], nn.LeakyReLU()))
+
+        # self.mu = nn.Sequential(nn.Linear(kwargs['observation_space'].shape[0], 32),
+        #                       kwargs['activation_fn'](negative_slope=0.02),
+        #                       nn.Linear(32, 10),
+        #                       nn.LeakyReLU(negative_slope=0.02),
+        #                       nn.Linear(10, int(kwargs['action_space'].shape[0] / 2)))
+        # self.I = nn.Sequential(nn.Linear(kwargs['observation_space'].shape[0], 32),
+        #                       kwargs['activation_fn'](negative_slope=0.02),
+        #                       nn.Linear(32, 10),
+        #                       nn.LeakyReLU(negative_slope=0.02),
+        #                       nn.Linear(10, int(kwargs['action_space'].shape[0] / 2)))
+
+        self.mu = nn.Sequential(*mlp([kwargs['observation_space'].shape[0], *kwargs['net_arch'],
+                                      int(kwargs['action_space'].shape[0] / 2)],
+                                     kwargs['activation_fn'],
+                                     nn.Tanh))
+
+        self.I = nn.Sequential(*mlp([kwargs['observation_space'].shape[0], *kwargs['net_arch'],
+                                     int(kwargs['action_space'].shape[0] / 2)],
+                                    kwargs['activation_fn'],
+                                    nn.Tanh))
+
+    def forward(self, obs: th.Tensor) -> th.Tensor:
+        # assert deterministic, 'The TD3 actor only outputs deterministic actions'
+        features = self.extract_features(obs)
+        return th.cat((self.mu(features), self.I(features)), 1)
+
+
+class CustomContinuousCritic(BaseModel):
+    """
+    Critic network(s) for DDPG/SAC/TD3.
+    """
+
+    def __init__(
+            self,
+            observation_space: gym.spaces.Space,
+            action_space: gym.spaces.Space,
+            net_arch: List[int],
+            features_extractor: nn.Module,
+            features_dim: int,
+            activation_fn: Type[nn.Module] = nn.ReLU,
+            normalize_images: bool = True,
+            n_critics: int = 2,
+            share_features_extractor: bool = True,
+    ):
+        super().__init__(
+            observation_space,
+            action_space,
+            features_extractor=features_extractor,
+            normalize_images=normalize_images,
+        )
+
+        action_dim = get_action_dim(self.action_space)
+
+        self.share_features_extractor = share_features_extractor
+        self.n_critics = n_critics
+        self.q_networks = []
+        for idx in range(n_critics):
+            # q_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn)
+            # Define critic with Dropout here
+            # q_net = nn.Sequential(  nn.Linear(features_dim + action_dim, 32),
+            #                        nn.ReLU(),
+            #                        nn.Linear(32, 10),
+            #                        nn.ReLU(),
+            #                        nn.Linear(10, 1)
+            #                    )
+
+            q_net = nn.Sequential(*mlp([features_dim + action_dim, *net_arch, 1],
+                                       activation_fn
+                                       ))
+
+            self.add_module(f"qf{idx}", q_net)
+            self.q_networks.append(q_net)
+
+    def forward(self, obs: th.Tensor, actions: th.Tensor) -> Tuple[th.Tensor, ...]:
+        # Learn the features extractor using the policy loss only
+        # when the features_extractor is shared with the actor
+        with th.set_grad_enabled(not self.share_features_extractor):
+            features = self.extract_features(obs)
+        qvalue_input = th.cat([features, actions], dim=1)
+        return tuple(q_net(qvalue_input) for q_net in self.q_networks)
+
+    def q1_forward(self, obs: th.Tensor, actions: th.Tensor) -> th.Tensor:
+        """
+        Only predict the Q-value using the first network.
+        This allows to reduce computation when all the estimates are not needed
+        (e.g. when updating the policy in TD3).
+        """
+        with th.no_grad():
+            features = self.extract_features(obs)
+        return self.q_networks[0](th.cat([features, actions], dim=1))
+
+
+class CustomTD3Policy(TD3Policy):
+    def __init__(self, *args, **kwargs):
+        super(CustomTD3Policy, self).__init__(*args, **kwargs)
+
+    def make_actor(self, features_extractor: Optional[BaseFeaturesExtractor] = None) -> CustomActor:
+        actor_kwargs = self._update_features_extractor(self.actor_kwargs, features_extractor)
+        return CustomActor(**actor_kwargs).to(self.device)
+
+    def make_critic(self, features_extractor: Optional[BaseFeaturesExtractor] = None) -> CustomContinuousCritic:
+        critic_kwargs = self._update_features_extractor(self.critic_kwargs, features_extractor)
+        return CustomContinuousCritic(**critic_kwargs).to(self.device)
+
+
+register_policy("CustomTD3Policy", CustomTD3Policy)
diff --git a/experiments/hp_tune/retrain.py b/experiments/hp_tune/retrain.py
new file mode 100644
index 00000000..3c36eb02
--- /dev/null
+++ b/experiments/hp_tune/retrain.py
@@ -0,0 +1,190 @@
+import platform
+import time
+
+import gym
+import numpy as np
+from stable_baselines3 import DDPG
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# imports net to define reward and executes script to register experiment
+
+# np.random.seed(0)
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+
+def retrain_DDPG(learning_rate, gamma, use_gamma_in_rew,
+                 error_exponent,
+                 training_episode_length,
+                 number_learning_steps, integrator_weight, antiwindup_weight,
+                 penalty_I_weight, penalty_P_weight,
+                 t_start_penalty_I, t_start_penalty_P, optimizer, n_trail, model_path):
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0',
+                   reward_fun=rew.rew_fun_dq0,
+                   abort_reward=-1,
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                   )
+
+    env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                         recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                         antiwindup_weight=antiwindup_weight, gamma=gamma,
+                         penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                         t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                         number_learing_steps=number_learning_steps)
+
+    env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+    used_optimzer = optimizer  # th.optim.Adam, th.optim.RMSprop, th.optim.SGD
+
+    # policy_kwargs = dict(optimizer_class=used_optimzer)
+
+    model = DDPG.load(model_path + f'model.zip', env=env, tensorboard_log=model_path + n_trail)
+    # env = model.get_env()
+    env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+    # start training
+    model.learn(total_timesteps=number_learning_steps)
+
+    # Log Train-info data
+    train_data = {"Name": "After_Training",
+                  "Mean_eps_reward": env.reward_episode_mean,
+                  "Trial number": n_trail,
+                  "Database name": folder_name,
+                  "Sum_eps_reward": env.get_episode_rewards(),
+                  "model_path": model_path
+                  }
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data)
+
+    model.save(model_path + f'model_retrained.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                        reward_fun=rew.rew_fun_dq0,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        )
+    env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                              recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                              gamma=1, penalty_I_weight=0, penalty_P_weight=0)
+    # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+    aP0 = []
+    aP1 = []
+    aP2 = []
+    aI0 = []
+    aI1 = []
+    aI2 = []
+    integrator_sum0 = []
+    integrator_sum1 = []
+    integrator_sum2 = []
+
+    while True:
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        phase_list.append(env_test.env.net.components[0].phase)
+        aP0.append(np.float64(action[0]))
+        aP1.append(np.float64(action[1]))
+        aP2.append(np.float64(action[2]))
+        aI0.append(np.float64(action[3]))
+        aI1.append(np.float64(action[4]))
+        aI2.append(np.float64(action[5]))
+        integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+        integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+        integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "ActionP0": aP0,
+                           "ActionP1": aP1,
+                           "ActionP2": aP2,
+                           "ActionI0": aI0,
+                           "ActionI1": aI1,
+                           "ActionI2": aI2,
+                           "integrator_sum0": integrator_sum0,
+                           "integrator_sum1": integrator_sum1,
+                           "integrator_sum2": integrator_sum2,
+                           "Phase": phase_list,
+                           "Node": platform.uname().node,
+                           "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                           "Reward function": 'rew.rew_fun_dq0',
+                           "Trial number": n_trail,
+                           "Database name": folder_name,
+                           "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase),"
+                                   "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; "
+                                   "Reward = MRE, PI-Approch using AntiWindUp"
+                                   "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+    # Add v-&i-measurements
+    test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                })
+    test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                })
+
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+
+reward = retrain_DDPG(learning_rate=1e-4, gamma=0.8003175741091463, use_gamma_in_rew=1,
+                      error_exponent=0.3663140388100587,
+                      training_episode_length=3413,
+                      number_learning_steps=200000,
+                      integrator_weight=0.6618979905182214,
+                      antiwindup_weight=0.9197062574269099,
+                      penalty_I_weight=0.7813975187119389,
+                      penalty_P_weight=1.5344102590339561,
+                      t_start_penalty_I=0.9996190838462778,
+                      t_start_penalty_P=0.14935820375506648,
+                      optimizer='Adam',
+                      n_trail='1',
+                      model_path='experiments/hp_tune/trained_models/study_18_run_6462/'
+                      )
+
+print(reward)
diff --git a/experiments/hp_tune/retrain_DDPG_singleLoadstep_per_episode.py b/experiments/hp_tune/retrain_DDPG_singleLoadstep_per_episode.py
new file mode 100644
index 00000000..5b127f1b
--- /dev/null
+++ b/experiments/hp_tune/retrain_DDPG_singleLoadstep_per_episode.py
@@ -0,0 +1,389 @@
+import json
+import platform
+import time
+from functools import partial
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from stochastic.processes import VasicekProcess
+
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper
+from experiments.hp_tune.env.random_load import RandomLoad
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net, CallbackList  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# np.random.seed(0)
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.util import RandProcess
+
+model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/'
+
+folder_name = cfg['STUDY_NAME'] + '_retrain'
+node = platform.uname().node
+file_congfig = open(model_path +
+                    'PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json', )
+trial_config = json.load(file_congfig)
+print('Config-Params:')
+print(*trial_config.items(), sep='\n')
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+
+
+def xylables_v(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+    ax.grid(which='both')
+    # ax.set_xlim([0, 0.005])
+    ts = time.gmtime()
+    # fig.savefig(
+    #    f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.show()
+
+
+def xylables_i(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+    ax.grid(which='both')
+    # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+    plt.close()
+
+
+def xylables_R(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+    ax.grid(which='both')
+    # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+    # ts = time.gmtime()
+    # fig.savefig(f'{folder_name + experiment_name}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+    plt.show()
+
+
+# plant
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+# R = 40  # nomVoltPeak / 7.5   # / Ohm
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                  bounds=(lower_bound_load, upper_bound_load))
+
+
+def experiment_fit_DDPG(number_learning_steps,
+                        gamma=trial_config['gamma'], n_trail=trial_config['Trial number'],
+                        training_episode_length=trial_config['training_episode_length'],
+                        integrator_weight=trial_config['integrator_weight'],
+                        antiwindup_weight=trial_config['antiwindup_weight'],
+                        penalty_I_weight=trial_config['penalty_I_weight'],
+                        penalty_P_weight=trial_config['penalty_P_weight'],
+                        t_start_penalty_I=trial_config['penalty_I_decay_start'],
+                        t_start_penalty_P=trial_config['penalty_P_decay_start'],
+                        actor_number_layers=trial_config['actor_number_layers'],
+                        alpha_relu_actor=trial_config['alpha_relu_actor'],
+                        critic_number_layers=trial_config['critic_number_layers'],
+                        alpha_relu_critic=trial_config['alpha_relu_critic'],
+
+                        ):
+    rand_load_train = RandomLoad(training_episode_length, net.ts, gen,
+                                 bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                                 bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+
+    cb = CallbackList()
+    # set initial = None to reset load random in range of bounds
+    cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+    cb.append(rand_load_train.reset)
+
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=1, error_exponent=0.5, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    """
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v1',
+                   reward_fun=rew.rew_fun_dq0,
+                   max_episode_steps=training_episode_length,
+                   abort_reward=-1,
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                   )
+    """
+
+    env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v1',
+                   reward_fun=rew.rew_fun_dq0,
+                   abort_reward=-1,
+                   obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                               'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                               'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'],
+                   max_episode_steps=training_episode_length,
+
+                   viz_mode='episode',
+                   viz_cols=[
+                       PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                                callback=xylables_v,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                                callback=xylables_i,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                callback=xylables_R,
+                                color=[['b', 'r', 'g']],
+                                style=[[None]]
+                                )
+                   ],
+                   # max_episode_steps=max_episode_steps,
+                   model_params={'lc.resistor1.R': R_filter,
+                                 'lc.resistor2.R': R_filter,
+                                 'lc.resistor3.R': R_filter,
+                                 'lc.resistor4.R': 0.0000001,
+                                 'lc.resistor5.R': 0.0000001,
+                                 'lc.resistor6.R': 0.0000001,
+                                 'lc.inductor1.L': L_filter,
+                                 'lc.inductor2.L': L_filter,
+                                 'lc.inductor3.L': L_filter,
+                                 'lc.capacitor1.C': C_filter,
+                                 'lc.capacitor2.C': C_filter,
+                                 'lc.capacitor3.C': C_filter,
+                                 # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R),
+                                 # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R),
+                                 # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R),
+                                 'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode,
+                                 'r_load.resistor2.R': rand_load_train.clipped_step,
+                                 'r_load.resistor3.R': rand_load_train.clipped_step,
+                                 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                high=v_nom) if t == -1 else None,
+                                 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                high=v_nom) if t == -1 else None,
+                                 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom,
+                                                                                high=v_nom) if t == -1 else None,
+                                 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                               high=i_nom) if t == -1 else None,
+                                 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                               high=i_nom) if t == -1 else None,
+                                 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom,
+                                                                               high=i_nom) if t == -1 else None,
+                                 },
+                   net=net,
+                   model_path='omg_grid/grid.paper_loadstep.fmu',
+                   on_episode_reset_callback=cb.fire,
+                   is_normalized=True,
+                   action_time_delay=1
+                   )
+
+    env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length,
+                         recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight,
+                         antiwindup_weight=antiwindup_weight, gamma=gamma,
+                         penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight,
+                         t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P,
+                         number_learing_steps=number_learning_steps)
+
+    # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+    model = DDPG.load(model_path + f'model.zip', env=env, tensorboard_log=model_path)
+
+    count = 0
+    for kk in range(actor_number_layers + 1):
+
+        if kk < actor_number_layers:
+            model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+            model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+        count = count + 2
+
+    count = 0
+
+    for kk in range(critic_number_layers + 1):
+
+        if kk < critic_number_layers:
+            model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+            model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+        count = count + 2
+
+    # todo: Downscale actionspace - lessulgy possible? Interaction pytorch...
+    env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+    # start training
+    model.learn(total_timesteps=number_learning_steps)
+
+    # Log Train-info data
+    train_data = {"Name": "After_Training",
+                  "Mean_eps_reward": env.reward_episode_mean,
+                  "Trial number": n_trail,
+                  "Database name": folder_name,
+                  "Sum_eps_reward": env.get_episode_rewards()
+                  }
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data)
+
+    model.save(model_path + f'model_retrained.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1',
+                        reward_fun=rew.rew_fun_dq0,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        )
+    env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight,
+                              recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+                              gamma=1, penalty_I_weight=0, penalty_P_weight=0)
+    # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+    aP0 = []
+    aP1 = []
+    aP2 = []
+    aI0 = []
+    aI1 = []
+    aI2 = []
+    integrator_sum0 = []
+    integrator_sum1 = []
+    integrator_sum2 = []
+    va = []
+    vb = []
+    vc = []
+    v_ref0 = []
+    v_ref1 = []
+    v_ref2 = []
+    ia = []
+    ib = []
+    ic = []
+    R_load = []
+    for step in range(env_test.max_episode_steps):
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        phase_list.append(env_test.env.net.components[0].phase)
+        aP0.append(np.float64(action[0]))
+        aP1.append(np.float64(action[1]))
+        aP2.append(np.float64(action[2]))
+        aI0.append(np.float64(action[3]))
+        aI1.append(np.float64(action[4]))
+        aI2.append(np.float64(action[5]))
+        integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+        integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+        integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+
+        if step % 1000 == 0 and step != 0:
+            va.extend(env_test.history[env_test.viz_col_tmpls[0].vars[0]].copy().values.tolist())
+            vb.extend(env_test.history[env_test.viz_col_tmpls[0].vars[1]].copy().values.tolist())
+            vc.extend(env_test.history[env_test.viz_col_tmpls[0].vars[2]].copy().values.tolist())
+            v_ref0.extend(env_test.history[env_test.viz_col_tmpls[0].vars[3]].copy().values.tolist())
+            v_ref1.extend(env_test.history[env_test.viz_col_tmpls[0].vars[4]].copy().values.tolist())
+            v_ref2.extend(env_test.history[env_test.viz_col_tmpls[0].vars[5]].copy().values.tolist())
+            ia.extend(env_test.history[env_test.viz_col_tmpls[1].vars[0]].copy().values.tolist())
+            ib.extend(env_test.history[env_test.viz_col_tmpls[1].vars[1]].copy().values.tolist())
+            ic.extend(env_test.history[env_test.viz_col_tmpls[1].vars[2]].copy().values.tolist())
+            R_load.extend(env_test.history[env_test.viz_col_tmpls[2].vars[1]].copy().values.tolist())
+
+            env_test.close()
+            obs = env_test.reset()
+            phase_list.append(env_test.env.net.components[0].phase)
+
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "lc_capacitor1_v": va,
+                           "lc_capacitor2_v": vb,
+                           "lc_capacitor3_v": vc,
+                           "inverter1_v_ref_0": v_ref0,
+                           "inverter1_v_ref_1": v_ref1,
+                           "inverter1_v_ref_2": v_ref2,
+                           "lc_inductor1_i": ia,
+                           "lc_inductor2_i": ib,
+                           "lc_inductor3_i": ic,
+                           "r_load_resistor1_R": R_load,
+                           "ActionP0": aP0,
+                           "ActionP1": aP1,
+                           "ActionP2": aP2,
+                           "ActionI0": aI0,
+                           "ActionI1": aI1,
+                           "ActionI2": aI2,
+                           "integrator_sum0": integrator_sum0,
+                           "integrator_sum1": integrator_sum1,
+                           "integrator_sum2": integrator_sum2,
+                           "Phase": phase_list,
+                           "Node": platform.uname().node,
+                           "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                           "Reward function": 'rew.rew_fun_dq0',
+                           "Trial number": n_trail,
+                           "Database name": folder_name,
+                           "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase),"
+                                   "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; "
+                                   "Reward = MRE, PI-Approch using AntiWindUp"
+                                   "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+    """
+    In new testenv not used, because then only the last episode is stored
+    # Add v-&i-measurements
+    test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                })
+    test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                })
+    """
+
+    mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+    print(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+
+experiment_fit_DDPG(10000)
diff --git a/experiments/hp_tune/run_testcase.py b/experiments/hp_tune/run_testcase.py
new file mode 100644
index 00000000..38bca70e
--- /dev/null
+++ b/experiments/hp_tune/run_testcase.py
@@ -0,0 +1,401 @@
+import platform
+import time
+from typing import Union
+
+import gym
+import numpy as np
+import torch as th
+import matplotlib.pyplot as plt
+from stable_baselines3 import DDPG
+from stable_baselines3.common.monitor import Monitor
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+from stable_baselines3.common.type_aliases import GymStepReturn
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.recorder import Recorder
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc, abc_to_dq0
+
+# np.random.seed(0)
+
+folder_name = cfg['STUDY_NAME']
+node = platform.uname().node
+
+# mongo_recorder = Recorder(database_name=folder_name)
+mongo_recorder = Recorder(node=node,
+                          database_name=folder_name)  # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder]
+
+
+class FeatureWrapper(Monitor):
+
+    def __init__(self, env, number_of_features: int = 0, training_episode_length: int = np.inf,
+                 recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts):
+        """
+        Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in
+        case of an continuing (non-episodic) task to reset the environment without being terminated by done
+
+        Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and
+            the observation is tranfered back to dq using the next phase
+
+        :param env: Gym environment to wrap
+        :param number_of_features: Number of features added to the env observations in the wrapped step method
+        :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset
+            by the agent for training purpose (Set to inf in test env!)
+
+        """
+        super().__init__(env)
+        self.observation_space = gym.spaces.Box(
+            low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf),
+            high=np.full(env.observation_space.shape[0] + number_of_features, np.inf))
+        self.training_episode_length = training_episode_length
+        self.recorder = recorder
+        self._n_training_steps = 0
+        self._i_phasor = 0.0
+        self.i_a = []
+        self.i_b = []
+        self.i_c = []
+        self.v_a = []
+        self.v_b = []
+        self.v_c = []
+        self._v_pahsor = 0.0
+        self.n_episode = 0
+        self.R_training = []
+        self.i_phasor_training = []
+        self.v_phasor_training = []
+        self.reward_episode_mean = []
+        self.n_trail = n_trail
+        self.phase = []
+        self.integrator_sum = np.zeros(self.action_space.shape)
+        self.integrator_weight = integrator_weight
+        self.antiwindup_weight = antiwindup_weight
+        self.action_P0 = []
+        self.action_P1 = []
+        self.action_P2 = []
+        self.action_I0 = []
+        self.action_I1 = []
+        self.action_I2 = []
+
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Adds additional features and infos after the gym env.step() function is executed.
+        Triggers the env to reset without done=True every training_episode_length steps
+        """
+        self.integrator_sum += action * self.integrator_weight
+
+        self.action_P0.append(action[0])
+        self.action_P1.append(action[1])
+        self.action_P2.append(action[2])
+        self.action_I0.append(self.integrator_sum[0])
+        self.action_I1.append(self.integrator_sum[1])
+        self.action_I2.append(self.integrator_sum[2])
+
+        action_PI = action  # + self.integrator_sum
+
+        if cfg['is_dq0']:
+            # Action: dq0 -> abc
+            action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase)
+
+        # check if m_abc will be clipped
+        if np.any(abs(action_abc) > 1):
+            # if, reduce integrator by clipped delta
+            action_delta = abc_to_dq0(np.clip(action_abc, -1, 1) - action_abc, self.env.net.components[0].phase)
+            self.integrator_sum += action_delta * self.antiwindup_weight
+
+        obs, reward, done, info = super().step(action_abc)
+        self._n_training_steps += 1
+
+        if self._n_training_steps % self.training_episode_length == 0:
+            # info["timelimit_reached"] = True
+            done = True
+
+        # add wanted features here (add appropriate self.observation in init!!)
+        # calculate magnitude of current phasor abc
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if done:
+            self.reward_episode_mean.append(np.mean(self.rewards))
+            self.n_episode += 1
+
+            if cfg['loglevel'] == 'train':
+                episode_data = {"Name": "On_Training",
+                                "Episode_number": self.n_episode,
+                                "Episode_length": self._n_training_steps,
+                                "R_load_training": self.R_training,
+                                "i_phasor_training": self.i_phasor_training,
+                                "i_a_training": self.i_a,
+                                "i_b_training": self.i_b,
+                                "i_c_training": self.i_c,
+                                "v_a_training": self.v_a,
+                                "v_b_training": self.v_b,
+                                "v_c_training": self.v_c,
+                                "v_phasor_training": self.v_phasor_training,
+                                "Rewards": self.rewards,
+                                "Phase": self.phase,
+                                "Node": platform.uname().node,
+                                "Trial number": self.n_trail,
+                                "Database name": folder_name,
+                                "Reward function": 'rew.rew_fun_dq0',
+                                }
+
+                """
+                add here "model_params_change": callback.params_change, from training_recorder?
+                """
+
+                # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc
+                # mongo_recorder.save_to_json('Trial_number_' + self.n_trail, episode_data)
+
+                # clear lists
+                self.R_training = []
+                self.i_phasor_training = []
+                self.v_phasor_training = []
+                self.i_a = []
+                self.i_b = []
+                self.i_c = []
+                self.v_a = []
+                self.v_b = []
+                self.v_c = []
+                self.phase = []
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+
+        """
+        Features
+        """
+        error = obs[6:9] - obs[3:6]  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+        # obs = np.append(obs, delta_i_lim_i_phasor)
+
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_action)
+
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """
+        Reset the wrapped env and the flag for the number of training steps after the env is reset
+        by the agent for training purpose and internal counters
+        """
+        obs = super().reset()
+        self._n_training_steps = 0
+        self.integrator_sum = np.zeros(self.action_space.shape)
+
+        self.i_phasor = self.cal_phasor_magnitude(obs[0:3])
+        self.v_phasor = self.cal_phasor_magnitude(obs[3:6])
+
+        if cfg['loglevel'] == 'train':
+            self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1])
+            self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim)
+            self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim)
+
+            self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1])
+            self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1])
+            self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1])
+
+            self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1])
+            self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1])
+            self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1])
+            self.phase.append(self.env.net.components[0].phase)
+
+        if cfg['is_dq0']:
+            # if setpoint in dq: Transform measurement to dq0!!!!
+            obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase)
+            obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase)
+        """
+        Features
+        """
+        error = obs[6:9] - obs[3:6]  # control error: v_setpoint - v_mess
+        # delta_i_lim_i_phasor = 1 - self.i_phasor  # delta to current limit
+
+        """
+        Following maps the return to the range of [-0.5, 0.5] in
+        case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude
+        exceeds the limit (no extra env interruption here!, all phases should be validated separately)
+        """
+        # obs = np.append(obs, self.i_phasor - 0.5)
+        obs = np.append(obs, error)
+        obs = np.append(obs, np.sin(self.env.net.components[0].phase))
+        obs = np.append(obs, np.cos(self.env.net.components[0].phase))
+        # obs = np.append(obs, delta_i_lim_i_phasor)
+        """
+        Add used action to the NN input to learn delay
+        """
+        obs = np.append(obs, self.used_action)
+
+        return obs
+
+    def cal_phasor_magnitude(self, abc: np.array) -> float:
+        """
+        Calculated the magnitude of a phasor in a three phase system. M
+
+        :param abc: Due to limit normed currents or voltages in abc frame
+        :return: magnitude of the current or voltage phasor
+        """
+        # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)|
+        i_alpha_beta = abc_to_alpha_beta(abc)
+        i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2)
+
+        return i_phasor_mag
+
+
+def run_testcase_DDPG(gamma, integrator_weight, antiwindup_weight, model_path, error_exponent=0.5, use_gamma_in_rew=1,
+                      n_trail=50000):
+    rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                 use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim,
+                 i_nom=net['inverter1'].i_nom)
+
+    model = DDPG.load(model_path + f'model.zip')
+
+    ####### Run Test #########
+    return_sum = 0.0
+    rew.gamma = 0
+    # episodes will not abort, if limit is exceeded reward = -1
+    rew.det_run = True
+    rew.exponent = 0.5  # 1
+    limit_exceeded_in_test = False
+    limit_exceeded_penalty = 0
+    env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                        reward_fun=rew.rew_fun_dq0,
+                        abort_reward=-1,  # no needed if in rew no None is given back
+                        # on_episode_reset_callback=cb.fire  # needed?
+                        obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                    'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                    'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2']
+                        )
+    env_test = FeatureWrapper(env_test, number_of_features=8, integrator_weight=integrator_weight,
+                              antiwindup_weight=antiwindup_weight)
+    obs = env_test.reset()
+    phase_list = []
+    phase_list.append(env_test.env.net.components[0].phase)
+
+    rew_list = []
+    a0 = []
+    a1 = []
+    a2 = []
+    v_d = []
+    v_q = []
+    v_0 = []
+
+    while True:
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, done, info = env_test.step(action)
+        phase_list.append(env_test.env.net.components[0].phase)
+        a0.append(np.float64(action[0]))
+        a1.append(np.float64(action[1]))
+        a2.append(np.float64(action[2]))
+        v_a = env_test.history.df['lc.capacitor1.v'].iloc[-1]
+        v_b = env_test.history.df['lc.capacitor2.v'].iloc[-1]
+        v_c = env_test.history.df['lc.capacitor3.v'].iloc[-1]
+
+        v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), env_test.env.net.components[0].phase)
+
+        v_d.append(v_dq0[0])
+        v_q.append(v_dq0[1])
+        v_0.append(v_dq0[2])
+
+        if rewards == -1 and not limit_exceeded_in_test:
+            # Set addidional penalty of -1 if limit is exceeded once in the test case
+            limit_exceeded_in_test = True
+            limit_exceeded_penalty = -1
+        env_test.render()
+        return_sum += rewards
+        rew_list.append(rewards)
+        # print(rewards)
+        if done:
+            env_test.close()
+            # print(limit_exceeded_in_test)
+            break
+
+    ts = time.gmtime()
+    test_after_training = {"Name": "Test",
+                           "time": ts,
+                           "Reward": rew_list,
+                           "Action0": a0,
+                           "Action1": a1,
+                           "Action2": a2,
+                           "Phase": phase_list,
+                           "Node": platform.uname().node,
+                           "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()),
+                           "Reward function": 'rew.rew_fun_dq0',
+                           "Trial number": n_trail,
+                           "Database name": folder_name,
+                           "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)]; "
+                                   "Reward = MRE, PI-Approch using AntiWindUp"
+                                   "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"}
+
+    # fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
+    # ax1, ax2 = ax.flatten()
+
+    plt.plot(env_test.action_P0)
+    plt.plot(env_test.action_P1)
+    plt.plot(env_test.action_P2)
+    plt.xlabel("")
+    plt.ylabel("action_P")
+    plt.title('Test')
+    plt.show()
+
+    plt.plot(env_test.action_I0)
+    plt.plot(env_test.action_I1)
+    plt.plot(env_test.action_I2)
+    plt.xlabel("")
+    plt.ylabel("action_I")
+    plt.title('Test')
+    plt.show()
+
+    plt.plot(v_d)
+    plt.plot(v_q)
+    plt.plot(v_0)
+    plt.xlabel("")
+    plt.ylabel("v_dq0")
+    plt.title('Test')
+    plt.show()
+    # Add v-&i-measurements
+    test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+                                })
+    test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[
+        env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+                                })
+
+    # mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training)
+
+    return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+
+run_testcase_DDPG(gamma=0.94713819942184, integrator_weight=2.3448684965845657e-06,
+                  antiwindup_weight=0.0035792826486838116,
+                  model_path='experiments/hp_tune/trained_models/study_10_run_954/',
+                  error_exponent=0.5, use_gamma_in_rew=1, n_trail=50000)
diff --git a/experiments/hp_tune/trained_models/Future_10Rvals/model.zip b/experiments/hp_tune/trained_models/Future_10Rvals/model.zip
new file mode 100644
index 00000000..51ee2f63
Binary files /dev/null and b/experiments/hp_tune/trained_models/Future_10Rvals/model.zip differ
diff --git a/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Actor.zip b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Actor.zip
new file mode 100644
index 00000000..fbbf9a36
Binary files /dev/null and b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Actor.zip differ
diff --git a/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals.zip b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals.zip
new file mode 100644
index 00000000..2f9f1a13
Binary files /dev/null and b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals.zip differ
diff --git a/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_corr.zip b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_corr.zip
new file mode 100644
index 00000000..4d0b0207
Binary files /dev/null and b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_corr.zip differ
diff --git a/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip
new file mode 100644
index 00000000..ed3ecead
Binary files /dev/null and b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip differ
diff --git a/experiments/hp_tune/trained_models/study_18_run_6462/model.zip b/experiments/hp_tune/trained_models/study_18_run_6462/model.zip
new file mode 100644
index 00000000..b16c43cd
Binary files /dev/null and b/experiments/hp_tune/trained_models/study_18_run_6462/model.zip differ
diff --git a/experiments/hp_tune/trained_models/study_18_run_6462_new/model.zip b/experiments/hp_tune/trained_models/study_18_run_6462_new/model.zip
new file mode 100644
index 00000000..b16c43cd
Binary files /dev/null and b/experiments/hp_tune/trained_models/study_18_run_6462_new/model.zip differ
diff --git a/experiments/hp_tune/trained_models/study_22_run_11534/PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json b/experiments/hp_tune/trained_models/study_22_run_11534/PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json
new file mode 100644
index 00000000..eadd42a2
--- /dev/null
+++ b/experiments/hp_tune/trained_models/study_22_run_11534/PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json
@@ -0,0 +1,39 @@
+{
+  "Name": "Config",
+  "Node": "node04-048",
+  "Agent": "DDPG",
+  "Number_learning_Steps": 500000,
+  "Trial number": "11534",
+  "Database name": "PC2_DDPG_Vctrl_single_inv_22_newTestcase",
+  "Start time": "2021_08_03__07_32_51",
+  "Optimierer/ Setting stuff": "Kein Const_liar_feature, hoehere Grenzen, INtergrator Gewicht als HP,Actionspace = 6, da P und I-Anteil seperate ausg\u00e4nge und im wrapper addiert werdenIntegratorzustand+used_P_Action (je um einen verzoegert) wird mit als feature uebergebenPenalties fuer action_P und action_PMehr HPs: trainfreq, batch/buffer_size, a_relu ",
+  "Weitere Info": "NEUES TEST ENV - 100k steps alle 1000 resettet das vom training - zuf\u00e4lligeLast",
+  "additionalInfo": "Long Holiday run",
+  "penalty_I_weight": 1.132480628572647,
+  "penalty_P_weight": 1.4834257541454123,
+  "penalty_I_decay_start": 0.5489063567901366,
+  "penalty_P_decay_start": 0.23007974811664603,
+  "integrator_weight": 0.31113470671968957,
+  "antiwindup_weight": 0.660818130720168,
+  "learning_rate": 0.00037457864914508586,
+  "lr_decay_start": 0.2750816923408933,
+  "lr_decay_duration": 0.3240504611772025,
+  "final_lr": 0.8356876361923928,
+  "gamma": 0.9462178519540726,
+  "weight_scale": 0.000852050757214834,
+  "bias_scale": 0.020070268741104066,
+  "alpha_relu_actor": 0.20809806015130924,
+  "alpha_relu_critic": 0.006784965521936233,
+  "batch_size": 261,
+  "buffer_size": 386945,
+  "actor_hidden_size": 25,
+  "actor_number_layers": 2,
+  "critic_hidden_size": 295,
+  "critic_number_layers": 4,
+  "noise_var": 0.023580253339050283,
+  "noise_theta": 31.575020911887215,
+  "training_episode_length": 2811,
+  "tau": 0.002609222715831891,
+  "train_freq": 2,
+  "optimizer": "Adam"
+}
\ No newline at end of file
diff --git a/experiments/hp_tune/trained_models/study_22_run_11534/model.zip b/experiments/hp_tune/trained_models/study_22_run_11534/model.zip
new file mode 100644
index 00000000..50daca14
Binary files /dev/null and b/experiments/hp_tune/trained_models/study_22_run_11534/model.zip differ
diff --git a/experiments/hp_tune/trained_models/study_22_run_11534/model_retrained.zip b/experiments/hp_tune/trained_models/study_22_run_11534/model_retrained.zip
new file mode 100644
index 00000000..2d40930f
Binary files /dev/null and b/experiments/hp_tune/trained_models/study_22_run_11534/model_retrained.zip differ
diff --git a/experiments/hp_tune/util/__init__.py b/experiments/hp_tune/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/experiments/hp_tune/util/action_noise_wrapper.py b/experiments/hp_tune/util/action_noise_wrapper.py
new file mode 100644
index 00000000..3a2bf7ea
--- /dev/null
+++ b/experiments/hp_tune/util/action_noise_wrapper.py
@@ -0,0 +1,48 @@
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+import numpy as np
+
+
+class myOrnsteinUhlenbeckActionNoise(OrnsteinUhlenbeckActionNoise):
+    """
+    Wraps the OU-noise from sb3 to give the possibility to reduce the action noise over training time
+    Implementation similar to kerasRL2 (https://github.com/wau/keras-rl2/blob/master/rl/random.py)
+    """
+
+    def __init__(self, n_steps_annealing=1000, sigma_min=None, *args, **kwargs):
+        super(myOrnsteinUhlenbeckActionNoise, self).__init__(*args, **kwargs)
+        self.n_steps_annealing = n_steps_annealing
+        self.sigma_min = sigma_min
+        self.n_steps = 0
+
+        if sigma_min is not None:
+            # self.m = -float(self._sigma - sigma_min) / float(n_steps_annealing)
+            self.m = -(self._sigma - sigma_min) / (n_steps_annealing)
+            self.c = self._sigma
+            self.sigma_min = sigma_min
+        else:
+            self.m = 0.
+            self.c = self._sigma
+            self.sigma_min = self._sigma
+
+    @property
+    def current_sigma(self):
+        sigma = np.maximum(self.sigma_min, self.m * float(self.n_steps) + self.c)
+        return sigma
+
+    def __call__(self) -> np.ndarray:
+        noise = (
+                self.noise_prev
+                + self._theta * (self._mu - self.noise_prev) * self._dt
+                + self.current_sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape)
+        )
+        self.noise_prev = noise
+        self.n_steps += 1
+        return noise
+
+    def reset(self) -> None:
+        super().reset()
+
+        # should not be reset because action_noise is reset after episode, but noise reduction over learning-length
+        # does not reset the noise reduction! Reduction not per episode but per learing, since action noise
+        # is redifiend then, no reset of annealing needed
+        # self.n_steps = 0
diff --git a/experiments/hp_tune/util/callbacklist.py b/experiments/hp_tune/util/callbacklist.py
new file mode 100644
index 00000000..2a94f306
--- /dev/null
+++ b/experiments/hp_tune/util/callbacklist.py
@@ -0,0 +1,6 @@
+class CallbackList(list):
+    # List of callback functions
+    def fire(self, *args, **kwargs):
+        # executes all callbacks in list
+        for listener in self:
+            listener(*args, **kwargs)
diff --git a/experiments/hp_tune/util/config.py b/experiments/hp_tune/util/config.py
new file mode 100644
index 00000000..12d40e0d
--- /dev/null
+++ b/experiments/hp_tune/util/config.py
@@ -0,0 +1,17 @@
+cfg = dict(lea_vpn_nodes=['lea-skynet', 'lea-picard', 'lea-barclay',
+                          'lea-cyberdyne', 'webbah-ThinkPad-L380', 'LEA_WORK35'],
+           # STUDY_NAME='PC2_TD3_Vctrl_single_inv_2',
+           # STUDY_NAME='PC2_DDPG_Vctrl_single_inv_23_added_Past_vals',
+           # STUDY_NAME='PC2_DDPG_Vctrl_single_inv_HPO_noI_term_study_25',
+           # STUDY_NAME='OMG_DDPG_Integrator_no_pastVals',
+           # meas_data_folder='Json_to_MonogDB_OMG_DDPG_Integrator_no_pastVals_New/',
+           STUDY_NAME='OMG_DDPG_Actor',
+           meas_data_folder='Json_to_MonogDB_OMG_DDPG_Integrator_no_pastVals/',
+           MONGODB_PORT=12001,
+           loglevel='test',
+           is_dq0=True,
+           train_episode_length=2881,  # defines when in training the env is reset e.g. for exploring starts,
+           # nothing -> Standard FeatureWrapper; past -> FeatureWrapper_pastVals; future -> FeatureWrapper_futureVals
+           # I-controller -> DDPG as P-term + standard I-controller; no-I-term -> Pure DDPG without integrator
+           env_wrapper='past'
+           )
diff --git a/experiments/hp_tune/util/configTD3.py b/experiments/hp_tune/util/configTD3.py
new file mode 100644
index 00000000..f9806adf
--- /dev/null
+++ b/experiments/hp_tune/util/configTD3.py
@@ -0,0 +1,10 @@
+cfg = dict(lea_vpn_nodes=['lea-skynet', 'lea-picard', 'lea-barclay',
+                          'lea-cyberdyne', 'webbah-ThinkPad-L380', 'LEA_WORK35'],
+
+           STUDY_NAME='PC2_TD3_Vctrl_single_inv_5_newTestcase',
+           # STUDY_NAME='DEGUB_1',
+           meas_data_folder='Json_to_MonogDB/',
+           MONGODB_PORT=12001,
+           loglevel='test',
+           is_dq0=True
+           )
diff --git a/experiments/hp_tune/util/pc2.py b/experiments/hp_tune/util/pc2.py
new file mode 100644
index 00000000..4da43241
--- /dev/null
+++ b/experiments/hp_tune/util/pc2.py
@@ -0,0 +1,74 @@
+"""Helper functions for the communication with the high-performance computing
+cluster 'Paderborn Center for Parallel Computing' (PC²)"""
+
+import subprocess as sub
+import pathlib
+import time
+import pandas as pd
+
+
+def build_shell_script_lines(path, cluster, job_name, res_plan, execution_lines):
+    cfg_id_d = {'oculus': '#CCS', 'noctua': '#SBATCH'}
+    assert cluster in cfg_id_d, f'cluster "{cluster}" not supported'
+    assert isinstance(path, pathlib.Path)
+    log_path = path / 'logs'
+    log_path.mkdir(parents=True, exist_ok=True)
+    cfg_id = cfg_id_d[cluster]
+    lines = ['#! /usr/bin/env zsh', '#! /bin/zsh', '',
+             f'{cfg_id} -t {res_plan["duration"]}',
+             f'{cfg_id} -o {log_path / "%reqid.log"}',
+             f'{cfg_id} -N {job_name}',
+             f'{cfg_id} --res=rset={res_plan["rset"]}'
+             f':ncpus={res_plan["ncpus"]}'
+             f':mem={res_plan["mem"]}'
+             f':vmem={res_plan["vmem"]}',
+             f'{cfg_id} -j', '']
+
+    lines.extend(execution_lines if isinstance(execution_lines, list)
+                 else [execution_lines])
+    return [line + '\n' for line in lines]
+
+
+def calculate_resources(duration=1, ncpus=6, memory=4, vmemory=8):
+    # todo: Think of a more intelligent (adaptive) resource plan
+    plan = {'duration': str(duration) + 'h',
+            'rset': '1',
+            'ncpus': str(ncpus),
+            'mem': str(memory) + 'g',
+            'vmem': str(vmemory) + 'g'
+            }
+    return plan
+
+
+def create_n_run_script(name, content, dry=False):
+    with open(name, 'w+') as f:
+        f.writelines(content)
+    sub.run(["chmod", "+x", name])  # Make script executable
+
+    if not dry:
+        # allocate and run, zB name = pc2_job_412643.sh
+        sub.run(['ccsalloc', name])
+        time.sleep(1)
+
+
+def get_ccsinfo(user):
+    """Returns the current ccs schedule as DataFrame"""
+    ccsinfo = sub.run(['ccsinfo', '-s', f'--user={user}', '--raw'],
+                      stdout=sub.PIPE).stdout.decode().strip('\n').split('\n')
+
+    # def run(*popenargs,
+    #        input=None, capture_output=False, timeout=None, check=False, **kwargs):
+
+    info_lines = [l.strip().split() for l in ccsinfo]
+    base_columns = ['jobid', 'jobname', 'user',
+                    'state', 'time', 'allocated_time_days',
+                    'allocated_time_hm', ]
+    if any(len(l) > 9 for l in info_lines):
+        columns = base_columns + ['efficiency_1', 'efficiency_2', 'resources']
+    else:
+        columns = base_columns + ['efficiency', 'resources']
+    ccsinfo = pd.DataFrame(info_lines
+                           if len(info_lines) > 0 and len(info_lines[0]) > 0
+                           else None, columns=columns)
+
+    return ccsinfo
diff --git a/experiments/hp_tune/util/record_env.py b/experiments/hp_tune/util/record_env.py
new file mode 100644
index 00000000..469e82ea
--- /dev/null
+++ b/experiments/hp_tune/util/record_env.py
@@ -0,0 +1,71 @@
+from stable_baselines3.common.callbacks import BaseCallback
+import matplotlib.pyplot as plt
+import time
+from experiments.hp_tune.env.vctrl_single_inv import folder_name
+
+
+class RecordEnvCallback(BaseCallback):
+
+    def __init__(self, env, agent, max_episode_steps, recorder=None, n_trail=0):
+        """
+        Class to interact with stable_baseline learner callback,
+        Runs e.g. every 1000 steps to evaluate the learning process in the env:
+
+        plot_callback = EveryNTimesteps(n_steps=1000, callback=RecordEnvCallback(env, model))
+        agent.learn(total_timesteps=2000, callback=[plot_callback])
+
+        :param env: environment to run on
+        :param agent: agent to run on env to evaluate
+        """
+        self.env = env
+        self.agent = agent
+        self.max_episode_steps = max_episode_steps
+        self.recorder = recorder
+        self.n_trail = n_trail
+        super().__init__()
+
+    def _on_step(self) -> bool:
+        rewards = []
+        obs = self.env.reset()
+        for _ in range(self.max_episode_steps):
+            self.env.render()
+            action, _states = self.agent.predict(obs, deterministic=True)
+            obs, reward, done, info = self.env.step(action)
+            rewards.append(reward)
+            if done or info.get("timelimit_reached", False):
+                break
+        # plot rewards?
+
+        ts = time.gmtime()
+
+        plt.plot(rewards)
+        plt.xlabel(r'$t\,/\,\mathrm{s}$')
+        plt.ylabel('$Reward$')
+        plt.grid(which='both')
+
+        plt.savefig(f'{folder_name}/{self.n_trail}/Reward{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+        plt.close()
+
+        validation_during_training = {"Name": "Validation during training",
+                                      "num_timesteps_learned_so_far": self.num_timesteps,
+                                      "time": ts,
+                                      "Reward": rewards}
+
+        # Add v-measurements
+        validation_during_training.update(
+            {self.env.env.viz_col_tmpls[j].vars[i].replace(".", "_"): self.env.env.history[
+                self.env.env.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6)
+             })
+
+        validation_during_training.update(
+            {self.env.env.viz_col_tmpls[2].vars[i].replace(".", "_"): self.env.env.history[
+                self.env.env.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3)
+             })
+
+        # va = self.env.env.history[self.env.env.viz_col_tmpls[0].vars[0]].copy()
+
+        self.recorder.save_to_mongodb('Trail_number_' + self.n_trail, validation_during_training)
+
+        self.env.close()
+        self.env.reset()
+        return True
diff --git a/experiments/hp_tune/util/recorder.py b/experiments/hp_tune/util/recorder.py
new file mode 100644
index 00000000..48f66f95
--- /dev/null
+++ b/experiments/hp_tune/util/recorder.py
@@ -0,0 +1,69 @@
+import json
+from os import makedirs
+
+import sshtunnel
+from pymongo import MongoClient
+
+from experiments.hp_tune.util.config import cfg
+
+MONGODB_PORT = cfg['MONGODB_PORT']  # 12001
+
+
+class Recorder:
+
+    def __init__(self, node, database_name):
+        """
+        Class to record measured data to mongo database using pymongo
+        Depending on the node we are operating at it connects via ssh to
+         - in lea_vpn: to cyberdyne port 12001
+         - else: assume pc2 node -> connect to frontend
+         and stores data to mongoDB at port MONGODB_PORT ( =12001).
+         HINT: From pc2 frontend permanent tunnel from cyberdyne port 12001 to frontend 12001
+         is needed (assuming Mongod-Process running on cyberdyne
+         :params node: platform.uname().node
+         :params database_name: string for the database name to store data in
+        """
+        self.node = node
+        self.save_count = 0
+
+        if self.node in cfg['lea_vpn_nodes']:
+            self.server_name = 'lea38'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT)}
+            self.save_folder = cfg['meas_data_folder']
+        else:
+            # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001
+            # from there they can be grep via permanent tunnel from cyberdyne
+            self.server_name = 'fe.pc2.uni-paderborn.de'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT),
+                            'ssh_username': 'webbah'}
+
+            self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder']
+
+        self.database_name = database_name
+        makedirs(self.save_folder, exist_ok=True)
+        #pathlib.Path(self.save_folder.mkdir(exist_ok=True))
+
+    def save_to_mongodb(self, col: str = ' trails', data=None):
+        """
+        Stores data to database in document col
+        """
+        with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun:
+            with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+                db = client[self.database_name]
+                trial_coll = db[col]  # get collection named col
+                trial_coll.insert_one(data)
+
+    def save_to_json(self, col: str = ' trails', data=None):
+        """
+        Stores data to json file in specified directory. From there the data can be grept by another process
+        and can be stored to a DB via ssh
+        To distinguish the files of one trail a save_count is incremented and added to the filename
+        """
+
+        with open(self.save_folder + self.database_name + '_' + col + '_' + str(self.save_count) + '.json',
+                  'w') as outfile:
+            json.dump(data, outfile)
+
+        self.save_count += 1
diff --git a/experiments/hp_tune/util/reporter.py b/experiments/hp_tune/util/reporter.py
new file mode 100644
index 00000000..6a8983f3
--- /dev/null
+++ b/experiments/hp_tune/util/reporter.py
@@ -0,0 +1,147 @@
+import json
+import os
+import platform
+import time
+
+import numpy as np
+
+import sshtunnel
+from pymongo import MongoClient
+# from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.config import cfg
+
+
+class Reporter:
+
+    def __init__(self):
+        """
+        Greps json data which is stored in the cfg[meas_data_folder] and sends it to mongoDB
+        on cyberdyne (lea38) via sshtunnel on port MONGODB_PORT
+        """
+
+        MONGODB_PORT = cfg['MONGODB_PORT']
+
+        node = platform.uname().node
+
+        if node in cfg['lea_vpn_nodes']:
+            self.server_name = 'lea38'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT)}
+            self.save_folder = './' + cfg['meas_data_folder']
+        else:
+            # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001
+            # from there they can be grep via permanent tunnel from cyberdyne
+            self.server_name = 'fe.pc2.uni-paderborn.de'
+            self.tun_cfg = {'remote_bind_address': ('127.0.0.1',
+                                                    MONGODB_PORT),
+                            'ssh_username': 'webbah'}
+
+            self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder']
+
+    def save_to_mongodb(self, database_name: str, col: str = ' trails', data=None):
+        """
+        Stores data to database in document col
+        """
+        with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun:
+            with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+                db = client[database_name]
+                trial_coll = db[col]  # get collection named col
+                trial_coll.insert_one(data)
+
+    def oldest_file_in_tree(self, extension=".json"):
+        """
+        Returns the oldest file-path string
+        """
+        print(os.getcwd())
+        return min(
+            (os.path.join(dirname, filename)
+             for dirname, dirnames, filenames in os.walk(self.save_folder)
+             for filename in filenames
+             if filename.endswith(extension)),
+            key=lambda fn: os.stat(fn).st_mtime)
+
+    def oldest_file_with_name_in_tree(self, count_number_to_find, extension=".json"):
+        """
+        Returns the oldest file-path string
+
+        :param count_number_to_find: List of count_numbers to find and store instead of storing all
+        """
+        print(os.getcwd())
+        return min(
+            (os.path.join(dirname, filename)
+             for dirname, dirnames, filenames in os.walk(self.save_folder)
+             for filename in filenames
+             if filename.endswith(str(count_number_to_find) + extension)),
+            key=lambda fn: os.stat(fn).st_mtime)
+
+    def json_to_mongo_via_sshtunnel(self, file_name_to_store=None):
+
+        if not len(os.listdir(self.save_folder)) == 0:
+
+            if file_name_to_store is None:
+                try:
+                    oldest_file_path = self.oldest_file_in_tree()
+                except(ValueError) as e:
+                    print('Folder seems empty or no matching data found!')
+                    print(f'ValueError{e}')
+                    print('Empty directory! Go to sleep for 5 minutes!')
+                    time.sleep(5 * 60)
+                    return
+            else:
+                oldest_file_path = file_name_to_store
+
+            with open(oldest_file_path) as json_file:
+                data = json.load(json_file)
+
+            successfull = False
+            retry_counter = 0
+
+            while not successfull:
+                try:
+                    now = time.time()
+                    if os.stat(oldest_file_path).st_mtime < now - 60:
+                        self.save_to_mongodb(database_name=data['Database name'],
+                                             col='Trial_number_' + data['Trial number'], data=data)
+                        print('Reporter: Data stored successfully to MongoDB and will be removed locally!')
+                        os.remove(oldest_file_path)
+                        successfull = True
+                except (sshtunnel.BaseSSHTunnelForwarderError) as e:
+                    wait_time = np.random.randint(1, 60)
+                    retry_counter += 1
+                    if retry_counter > 10:
+                        print('Stopped after 10 connection attempts!')
+                        raise e
+                    print(f'Reporter: Could not connect via ssh to frontend, retry in {wait_time} s')
+                    time.sleep(wait_time)
+
+        else:
+            print('Empty directory! Go to sleep for 5 minutes!')
+            time.sleep(5 * 60)
+
+
+if __name__ == "__main__":
+
+    file_ending_number = [0, 1, 2]
+
+    reporter = Reporter()
+    print("Starting Reporter for logging from local savefolder to mongoDB")
+
+    print(f"Searching for files in directory with number ending on {file_ending_number}")
+
+    # print(reporter.oldest_file_in_tree())
+    while True:
+        # reporter.json_to_mongo_via_sshtunnel()
+
+        # to send only files ending with number file_ending_number
+        for number in file_ending_number:
+            try:
+                oldest_named_file_path = reporter.oldest_file_with_name_in_tree(number)
+                print(oldest_named_file_path)
+
+                reporter.json_to_mongo_via_sshtunnel(oldest_named_file_path)
+
+            except(ValueError) as e:
+                print(f'No file with number {number} ending')
+                print(f'ValueError{e}')
+                print('Go to sleep for 5 seconds and go on with next number!')
+                time.sleep(5)
diff --git a/experiments/hp_tune/util/scheduler.py b/experiments/hp_tune/util/scheduler.py
new file mode 100644
index 00000000..e524a633
--- /dev/null
+++ b/experiments/hp_tune/util/scheduler.py
@@ -0,0 +1,59 @@
+from typing import Union, Callable
+
+import numpy as np
+
+
+def linear_schedule(initial_value, final_value, t_start, t_end, total_timesteps: int = 1000) -> Callable[
+    [float], float]:
+    """
+    Linear learning rate schedule from t_start to t_end in between initial -> final value.
+    :param initial_value: (float or str) start value
+    :param final_value: final value
+    :param t_start: timestep (int!) at which the linear decay starts
+    :param t_ends: timestep (int!) at which the linear decay ends
+    :param total_timesteps: number of learning steps
+    :return: (function)
+    """
+
+    def func(progress_remaining: float) -> float:
+        """
+        Progress will decrease from 1 (beginning) to 0
+        :param progress_remaining: (float)  progress_remaining = 1.0 - (num_timesteps / total_timesteps)
+        :return: (float)
+        """
+        # Original: return= initial_value *  progress_remaining
+
+        return np.maximum(
+            np.minimum(initial_value, initial_value + (t_start * (initial_value - final_value)) / (t_end - t_start) \
+                       - (initial_value - final_value) / (t_end - t_start) * ((1.0 - progress_remaining) \
+                                                                              * total_timesteps)), final_value)
+
+        # return  np.maximum(final_value, np.minimum(initial_value,b+ m *(1.0 - progress_remaining) * total_timesteps))
+
+    return func
+
+
+def exopnential_schedule(initial_value: Union[float, str], final_value: float = 0) -> Callable[[float], float]:
+    """
+    Linear learning rate schedule.
+    :param initial_value: (float or str) start value
+    :param final_value: final value as percentage of initial value (e.g. 0.1 -> final value is 10 % of initial value
+    :return: (function)
+    """
+    if isinstance(initial_value, str):
+        initial_value = float(initial_value)
+
+    def func(progress_remaining: float) -> float:
+        """
+        Progress will decrease from 1 (beginning) to 0
+        :param progress_remaining: (float) 1.0 - (num_timesteps / total_timesteps)
+                                            Y - X *  M
+        :return: (float)
+        https://www.jeremyjordan.me/nn-learning-rate/
+        """
+        # return (progress_remaining * initial_value)*(1-(1-progress_remaining))# + final_value * initial_value
+        # return ( initial_value)**(1/progress_remaining)# + final_value * initial_value
+        raise NotImplementedError
+        return (initial_value) * (progress_remaining - 1)
+
+    return func
diff --git a/experiments/hp_tune/util/training_recorder.py b/experiments/hp_tune/util/training_recorder.py
new file mode 100644
index 00000000..6922d447
--- /dev/null
+++ b/experiments/hp_tune/util/training_recorder.py
@@ -0,0 +1,66 @@
+from stable_baselines3.common.callbacks import BaseCallback
+import numpy as np
+
+
+class TrainRecorder(BaseCallback):
+
+    def __init__(self, verbose=1):
+        super(TrainRecorder, self).__init__(verbose)
+        self.last_model_params = None  # self.model.policy.state_dict()
+        self.params_change = []
+
+    def _on_training_end(self) -> None:
+        """
+        This event is triggered before exiting the `learn()` method.
+        """
+        # asd = 1
+        # ads = 2
+        pass
+
+    def _on_step(self) -> bool:
+        asd = 1
+        # R_training[self.n_calls, number_trails] = self.training_env.envs[0].env.history.df['r_load.resistor1.R'].iloc[-1]
+        # R_training[self.n_calls-1, 0] = self.training_env.envs[0].env.history.df['r_load.resistor1.R'].iloc[-1]
+        """
+        R_training.append(self.training_env.envs[0].env.history.df['r_load.resistor1.R'].iloc[-1])
+        i_phasor_training.append((self.training_env.envs[0].i_phasor+0.5)*net['inverter1'].i_lim)
+        v_phasor_training.append((self.training_env.envs[0].v_phasor+0.5)*net['inverter1'].v_lim)
+
+        if (self.training_env.envs[0].i_phasor)*net['inverter1'].i_lim > 15:
+            asd = 1
+
+        i_a.append(self.training_env.envs[0].env.history.df['lc.inductor1.i'].iloc[-1])
+        i_b.append(self.training_env.envs[0].env.history.df['lc.inductor2.i'].iloc[-1])
+        i_c.append(self.training_env.envs[0].env.history.df['lc.inductor3.i'].iloc[-1])
+
+        v_a.append(self.training_env.envs[0].env.history.df['lc.capacitor1.v'].iloc[-1])
+        v_b.append(self.training_env.envs[0].env.history.df['lc.capacitor2.v'].iloc[-1])
+        v_c.append(self.training_env.envs[0].env.history.df['lc.capacitor3.v'].iloc[-1])
+        # nach env.step()
+        """
+        return True
+
+    def _on_rollout_end(self) -> None:
+        # asd = 1
+
+        model_params = self.model.policy.parameters_to_vector()
+
+        if self.last_model_params is None:
+            self.last_model_params = model_params
+        else:
+            self.params_change.append(np.float64(np.mean(self.last_model_params - model_params)))
+
+        """model_params = self.model.policy.state_dict()
+        if self.last_model_params is None:
+            for key, value in model_params.items():
+                self.params_change[key.replace(".", "_")] = []
+        else:
+            for key, value in model_params.items():
+                #print(key)
+                self.params_change[key.replace(".", "_")].append(th.mean((model_params[key]-self.last_model_params[key])).tolist())
+        """
+
+        self.last_model_params = model_params
+
+        # self.model.actor.mu._modules # alle :)
+        pass
diff --git a/experiments/hp_tune/visualize_tests/Collect_from_mongoDB.py b/experiments/hp_tune/visualize_tests/Collect_from_mongoDB.py
new file mode 100644
index 00000000..8529d523
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/Collect_from_mongoDB.py
@@ -0,0 +1,76 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import optuna
+import pandas as pd
+import plotly.graph_objects as px
+import sshtunnel
+from bson import ObjectId
+from plotly import tools
+from pymongo import MongoClient
+
+from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0
+
+plt_train = True
+plotly = False
+
+# db_name = 'OMG_DDPG_Actor' # 17
+# db_names = 'OMG_DDPG_Integrator_no_pastVals'
+# db_names = ['OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr']  # 15
+# db_name = 'OMG_DDPG_Integrator_no_pastVals_corr'
+
+db_names = [
+    'OMG_DDPG_Actor']  # , 'OMG_DDPG_Integrator_no_pastVals', 'OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr',
+# 'OMG_DDPG_Integrator_no_pastVals_corr']
+
+for db_name in db_names:
+    ret_list_test = []
+    ret_mean_list_test = []
+    ret_std_list_test = []
+    reward_list = []
+
+    with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun:
+        with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+            db = client[db_name]
+
+            # store best trail
+
+            # [for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]:
+            trial = db['Trial_number_44']
+            # if trial.state == optuna.structs.TrialState.COMPLETE:
+            trial_test = trial.find_one({"Name": "Test_Reward"})
+
+            if trial_test is not None:
+                # If, trail is not comleted
+                # ret_list_test.append(trial_test['Return'])
+                ret_std_list_test.append(np.std(trial_test['Reward']))
+                ret_mean_list_test.append(np.mean(trial_test['Reward']))
+                reward_list.append(trial_test['Reward'])
+
+            for coll_name in db.list_collection_names():
+
+                # [for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]:
+                trial = db[coll_name]
+                # if trial.state == optuna.structs.TrialState.COMPLETE:
+                trial_test = trial.find_one({"Name": "Test_Reward"})
+
+                if trial_test is not None:
+                    # If, trail is not comleted
+                    # ret_list_test.append(trial_test['Return'])
+                    ret_std_list_test.append(np.std(trial_test['Reward']))
+                    ret_mean_list_test.append(np.mean(trial_test['Reward']))
+                    # reward_list.append(trial_test['Reward'])
+
+                # if len(ret_list_test) > 550:
+                #    break
+
+    print(ret_mean_list_test)
+    print(ret_std_list_test)
+    asd = 1
+    results = {
+        #'return': ret_list_test,
+        'return_Mean': ret_mean_list_test,
+        'return_Std': ret_std_list_test,
+        'study_name': db_name}
+
+    df = pd.DataFrame(results)
+    df.to_pickle(db_name + "return_8XX_agents.pkl")
diff --git a/experiments/hp_tune/visualize_tests/Collect_trainData_from_Mongo.py b/experiments/hp_tune/visualize_tests/Collect_trainData_from_Mongo.py
new file mode 100644
index 00000000..075e5709
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/Collect_trainData_from_Mongo.py
@@ -0,0 +1,79 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+import sshtunnel
+from bson import ObjectId
+from plotly import tools
+from pymongo import MongoClient
+
+from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0
+
+plt_train = True
+plotly = False
+
+db_name = 'OMG_DDPG_Actor'  # 15
+db_name = 'OMG_DDPG_Integrator_no_pastVals'  # 15
+
+trial = '0'
+show_episode_number = 19
+
+ret_mean_list_test = []
+ret_std_list_test = []
+i_q_delta_mean_list_test = []
+i_q_delta_std_list_test = []
+i_d_delta_mean_list_test = []
+i_d_delta_std_list_test = []
+reward_list = []
+
+reward_df = pd.DataFrame()
+
+with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun:
+    with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+        db = client[db_name]
+
+        idx = 0
+        for coll_name in db.list_collection_names():
+            trial = db[coll_name]
+            # trial = db.Trial_number_23
+
+            train_data = trial.find_one({"Name": "After_Training"})
+            # trial_test = trial.find_one({"Name": "Test"})
+
+            if train_data is not None:  # if trial not finished (was in actor_Ddpg > 550)
+
+                if idx == 0:
+                    reward_df = pd.DataFrame({str(idx): train_data['Mean_eps_reward']})
+                else:
+
+                    df_tmp = pd.DataFrame({str(idx): train_data['Mean_eps_reward']})
+                    reward_df = reward_df.join(df_tmp)
+                idx += 1
+
+            # reward_list.append(train_data['Mean_eps_reward'])
+
+            # reward_df = reward_df.join(df_tmp)
+
+            # if len(reward_list) >= 550:
+            #   break
+
+reward_df.to_pickle(db_name + "_8XX_agents_train_data.pkl")
+# print(ret_mean_list_test)
+# print(ret_std_list_test)
+# asd = 1
+# results = {
+#    'reward': reward_list,
+#    'study_name': db_name}
+
+# df = pd.DataFrame(results)
+# df.to_pickle(db_name+"_1250_agents_train_data.pkl")
+
+
+plt.plot(reward_list)
+# plt.fill_between( m - s, m + s, facecolor='r')
+plt.ylabel('Average return +- sdt')
+plt.xlabel('Max_episode steps')
+# plt.ylim([0, 200])
+plt.grid()
+plt.title(db_name)
+plt.show()
diff --git a/experiments/hp_tune/visualize_tests/Com_models_pc2.py b/experiments/hp_tune/visualize_tests/Com_models_pc2.py
new file mode 100644
index 00000000..67eecf5d
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/Com_models_pc2.py
@@ -0,0 +1,809 @@
+print('Start script')
+
+import logging
+import os
+import platform
+import time
+from functools import partial
+
+import GPy
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper, FeatureWrapper_pastVals, FeatureWrapper_futureVals, \
+    BaseWrapper, FeatureWrapper_I_controller
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# imports for PIPI
+from experiments.hp_tune.env.random_load import RandomLoad
+from openmodelica_microgrid_gym.agents import SafeOptAgent
+from openmodelica_microgrid_gym.agents.util import MutableFloat
+from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \
+    MultiPhaseDQ0PIPIController
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.env.plotmanager import PlotManager
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import FullHistory, RandProcess
+
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+import gym
+
+# np.random.seed(0)
+
+show_plots = True
+save_results = False
+
+# folder_name = 'saves/OMG_DDPGActor_wo_integrator_butPastVals_3_Deterministic'  # cfg['STUDY_NAME']
+folder_name = 'saves/paper_desscaR_load'  # cfg['STUDY_NAME']
+#  folder_name = 'saves/OMG_i_load_feature_0_Deterministic'  # cfg['STUDY_NAME']
+node = platform.uname().node
+
+# model_name = 'model_retrain_pastVals12.zip'
+# number_past_vals = [5, 5, 0, 0]  # [0, 5, 10, 16, 25]  # [30, 0]
+number_past_vals = [5]  # [0, 5, 10, 16, 25]  # [30, 0]
+# use_past_vals = [True]  # [False, True, True, True, True]  # [True, False]
+# wrapper = ['past', 'no-I-term', 'past', 'i_load']  # ['past', 'future', 'no-I-term', 'I-controller']
+wrapper = ['no-I-term']  # ['past', 'future', 'no-I-term', 'I-controller']
+
+# model_name = ['model.zip']
+# model_path = 'OMG_Integrator_Actor_i_load_feature_2/1/'
+# model_path = 'OMG_DDPG_Actor/3/'
+model_path = 'experiments/hp_tune/trained_models/paper/'
+# model_path = 'OMG_Integrator_Actor/32/'
+
+# model_name = ['model_OMG_DDPG_Integrator_no_pastVals.zip', 'model_OMG_DDPG_Actor.zip',
+#              'model_OMG_DDPG_Integrator_no_pastVals_corr.zip',
+#              'model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip']
+
+model_name = ['model_OMG_DDPG_Integrator_no_pastVals.zip', 'model_OMG_DDPG_Actor.zip']
+model_name = ['model_OMG_DDPG_Actor.zip']
+# model_name = ['model.zip']
+################DDPG Config Stuff#########################################################################
+gamma = 0.946218
+integrator_weight = 0.311135
+antiwindup_weight = 0.660818
+error_exponent = 0.5
+use_gamma_in_rew = 1
+n_trail = 50001
+actor_number_layers = 2
+critic_number_layers = 4
+alpha_relu_actor = 0.208098
+alpha_relu_critic = 0.00678497
+"""
+################DDPG Config Stuff#########################################################################
+gamma = 0.984421  # 0.946218
+integrator_weight = 0  # 0.311135
+antiwindup_weight = 0  # 0.660818
+error_exponent = 0.5
+use_gamma_in_rew = 1
+n_trail = 50001
+actor_number_layers = 2
+critic_number_layers = 3  # 4
+alpha_relu_actor = 0.0034719  # 0.208098
+alpha_relu_critic = 0.00613757  # 0.00678497
+
+print('HPs für DDPG ohne I-Anteil!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
+"""
+mongo_recorder = Recorder(node=node, database_name=folder_name)
+
+num_average = 1
+max_episode_steps_list = [10000]  # [1000, 5000, 10000, 20000, 50000, 100000]
+
+# data_str = 'experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl'
+data_str = 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl'
+data_str = 'experiments/hp_tune/data/R_load_dessca.pkl'
+
+result_list = []
+ret_list = []
+mean_list = []
+std_list = []
+ret_array = np.zeros(num_average)
+
+df = pd.DataFrame()
+ret_dict = dict()
+
+#################PI Config stuff##############################################################################
+
+current_directory = os.getcwd()
+# folder_name = 'Pipi_safeopt_best_run4d'
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+# Simulation definitions
+# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+net = Network.load('net/net_vctrl_single_inv.yaml')
+delta_t = 1e-4  # simulation time step size / s
+undersample = 1
+# max_episode_steps = 1002  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
+DroopGain = 0.0  # virtual droop gain for active power / W/Hz
+QDroopGain = 0.0  # virtual droop gain for reactive power / VAR/V
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+
+"""
+print("P10 stuff!")
+L_filter = 70e-6  # / H
+R_filter = 1.1e-3  # / Ohm
+C_filter = 250e-6  # / F
+"""
+
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+#####################################
+# Definitions for the GP
+prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the initial set
+noise_var = 0.001  # ** 2  # measurement noise sigma_omega
+prior_var = 2  # prior variance of the GP
+
+bounds = [(0.000, 0.045), (4, 450)]  # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp
+lengthscale = [.003, 50.]  # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP
+
+safe_threshold = 0
+j_min = -5  # cal min allowed performance
+
+explore_threshold = 0
+
+# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
+# limit exceeded
+abort_reward = 100 * j_min
+
+# Definition of the kernel
+kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True)
+
+#####################################
+# Definition of the controllers
+# kp_v = 0.002
+# ki_v = 143
+
+# Old optimized parameters:
+kp_v = 0  # 0.0095  # 0.0
+ki_v = 182  # 173.22  # 200
+kp_c = 0.0308  # 0.0404  # 0.04
+ki_c = 13.3584  # 4.065  # 11.8
+
+"""
+#P10:
+print('using p10 setting')
+kp_v = 0.2972
+ki_v = 142.7
+kp_c = 0.00068
+ki_c = 0.731
+"""
+# Choose Kp and Ki for the current and voltage controller as mutable parameters
+mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v))  # 300Hz
+# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105))  # 300Hz
+voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
+                                limits=(-i_lim * 10, i_lim * 10))
+
+current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1))  # Current controller values
+droop_param = DroopParams(DroopGain, 0.005, net.freq_nom)
+qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom)
+
+ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
+                                   ts_sim=delta_t,
+                                   ts_ctrl=undersample * delta_t,
+                                   name='master')
+
+agent = SafeOptAgent(mutable_params,
+                     abort_reward,
+                     j_min,
+                     kernel,
+                     dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean,
+                          safe_threshold=safe_threshold, explore_threshold=explore_threshold),
+                     [ctrl],
+                     dict(master=[[f'lc.inductor{k}.i' for k in '123'],
+                                  [f'lc.capacitor{k}.v' for k in '123']
+                                  ]),
+                     history=FullHistory(),
+                     )
+
+""""""
+for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+    for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False):
+
+        rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                     use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent,
+                     i_lim=net['inverter1'].i_lim,
+                     i_nom=net['inverter1'].i_nom)
+
+        ####################################PI Stuff################################################
+        R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+        gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                          bounds=(lower_bound_load, upper_bound_load))
+
+        rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                     bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                                     bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+        rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                    load_curve=pd.read_pickle(
+                                        # 'experiments/hp_tune/data/R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_deterministic_test_case_25_ohm_1_seconds.pkl'))
+                                        data_str))
+
+        cb = CallbackList()
+        # set initial = None to reset load random in range of bounds
+        cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+        cb.append(rand_load_train.reset)
+
+        plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder,
+                              show_plots=show_plots)
+
+
+        # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+        #                            load_curve=pd.read_pickle(
+        #                                'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl'))
+
+        def xylables_R(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            ax.grid(which='both')
+            # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+            ts = time.gmtime()
+            # fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_i(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+            ax.grid(which='both')
+            # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_v(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+            ax.grid(which='both')
+            # ax.set_xlim([0, 0.005])
+            ts = time.gmtime()
+            # fig.savefig(
+            #    f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                       reward_fun=rew.rew_fun_PIPI_MRE,
+                       viz_cols=[
+                           PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']],
+                                    callback=plotter.xylables_v_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_v_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']],
+                                    callback=plotter.xylables_i_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                    callback=xylables_R,
+                                    color=[['b', 'r', 'g']],
+                                    style=[[None]]
+                                    ),
+                           PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_i_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    )
+                       ],
+                       viz_mode='episode',
+                       max_episode_steps=max_episode_steps_list[max_eps_steps],
+                       model_params={'lc.resistor1.R': R_filter,
+                                     'lc.resistor2.R': R_filter,
+                                     'lc.resistor3.R': R_filter,
+                                     'lc.resistor4.R': 0.0000001,
+                                     'lc.resistor5.R': 0.0000001,
+                                     'lc.resistor6.R': 0.0000001,
+                                     'lc.inductor1.L': L_filter,
+                                     'lc.inductor2.L': L_filter,
+                                     'lc.inductor3.L': L_filter,
+                                     'lc.capacitor1.C': C_filter,
+                                     'lc.capacitor2.C': C_filter,
+                                     'lc.capacitor3.C': C_filter,
+                                     'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor1.R'),
+                                     'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor2.R'),
+                                     'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor3.R'),
+                                     # 'lc.capacitor1.v': 0,
+                                     # 'lc.capacitor2.v': 0,
+                                     # 'lc.capacitor3.v': 0,
+                                     # 'lc.inductor1.i': 0,
+                                     # 'lc.inductor2.i': 0,
+                                     # 'lc.inductor3.i': 0,
+                                     },
+                       net=net,
+                       model_path='omg_grid/grid.paper_loadstep.fmu',
+                       history=FullHistory(),
+                       # on_episode_reset_callback=cb.fire,
+                       action_time_delay=1 * undersample
+                       )
+
+        rew.gamma = 0
+        return_sum_PI = 0.0
+        rew_list_PI = []
+        v_d_PI = []
+        v_q_PI = []
+        v_0_PI = []
+        R_load_PI = []
+        limit_exceeded_in_test_PI = False
+        limit_exceeded_penalty_PI = 0
+
+        agent.reset()
+        agent.obs_varnames = env.history.cols
+        env.history.cols = env.history.structured_cols(None) + agent.measurement_cols
+        env.measure = agent.measure
+        agent_fig = None
+        obs_PI = env.reset()
+
+        for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False):
+            # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+            agent.observe(None, False)
+            act_PI = agent.act(obs_PI)
+            obs_PI, r_PI, done_PI, info_PI = env.step(act_PI)
+            rew_list_PI.append(r_PI)
+            env.render()
+            return_sum_PI += r_PI
+            if r_PI == -1 and not limit_exceeded_in_test_PI:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test_PI = True
+                limit_exceeded_penalty_PI = -1
+
+        # _, env_fig = env.close()
+        agent.observe(r_PI, done_PI)
+
+        v_a_PI = env.history.df['lc.capacitor1.v']
+        v_b_PI = env.history.df['lc.capacitor2.v']
+        v_c_PI = env.history.df['lc.capacitor3.v']
+        i_a_PI = env.history.df['lc.inductor1.i']
+        i_b_PI = env.history.df['lc.inductor2.i']
+        i_c_PI = env.history.df['lc.inductor3.i']
+        R_load_PI = (env.history.df['r_load.resistor1.R'].tolist())
+        phase_PI = env.history.df['inverter1.phase.0']  # env.net.components[0].phase
+
+        i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI)
+        v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+
+        i_d_PI = i_dq0_PI[0].tolist()
+        i_q_PI = i_dq0_PI[1].tolist()
+        i_0_PI = i_dq0_PI[2].tolist()
+        v_d_PI = (v_dq0_PI[0].tolist())
+        v_q_PI = (v_dq0_PI[1].tolist())
+        v_0_PI = (v_dq0_PI[2].tolist())
+
+        ts = time.gmtime()
+        compare_result = {"Name": "comparison_PI_DDPG",
+                          "time": ts,
+                          "PI_Kp_c": kp_c,
+                          "PI_Ki_c": ki_c,
+                          "PI_Kp_v": kp_v,
+                          "PI_Ki_v": ki_v,
+                          "DDPG_model_path": model_path,
+                          "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI),
+                          "Reward PI": rew_list_PI,
+                          "env_hist_PI": env.history.df,
+                          "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                          "number of averages per run": num_average,
+                          "info": "PI result for comparison with RL agent",
+                          "optimization node": 'Thinkpad',
+                          "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2'
+                          }
+        store_df = pd.DataFrame([compare_result])
+        store_df.to_pickle(f'{folder_name}/PI_{max_episode_steps_list[max_eps_steps]}steps')
+
+        ####################################DDPG Stuff##############################################
+
+        rew.gamma = 0
+        # episodes will not abort, if limit is exceeded reward = -1
+        rew.det_run = True
+        rew.exponent = 0.5  # 1
+
+        net = Network.load('net/net_vctrl_single_inv_dq0.yaml')  # is used from vctrl_single_env, not needed here
+
+        for used_model, wrapper_mode, used_number_past_vales in zip(model_name, wrapper, number_past_vals):
+
+            if wrapper_mode == 'i_load':
+                env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                                    reward_fun=rew.rew_fun_dq0,
+                                    abort_reward=-1,  # no needed if in rew no None is given back
+                                    # on_episode_reset_callback=cb.fire  # needed?
+                                    obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                                'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                                'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'  # ],
+                                        , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'],
+                                    max_episode_steps=max_episode_steps_list[max_eps_steps],
+                                    model_params={'lc.resistor1.R': R_filter,
+                                                  'lc.resistor2.R': R_filter,
+                                                  'lc.resistor3.R': R_filter,
+                                                  'lc.resistor4.R': 0.0000001,
+                                                  'lc.resistor5.R': 0.0000001,
+                                                  'lc.resistor6.R': 0.0000001,
+                                                  'lc.inductor1.L': L_filter,
+                                                  'lc.inductor2.L': L_filter,
+                                                  'lc.inductor3.L': L_filter,
+                                                  'lc.capacitor1.C': C_filter,
+                                                  'lc.capacitor2.C': C_filter,
+                                                  'lc.capacitor3.C': C_filter,
+                                                  'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor1.R'),
+                                                  'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor2.R'),
+                                                  'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor3.R'),
+                                                  # 'lc.capacitor1.v': 0,
+                                                  # 'lc.capacitor2.v': 0,
+                                                  # 'lc.capacitor3.v': 0,
+                                                  # 'lc.inductor1.i': 0,
+                                                  # 'lc.inductor2.i': 0,
+                                                  # 'lc.inductor3.i': 0,
+                                                  },
+                                    )
+            else:
+                env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                                    reward_fun=rew.rew_fun_dq0,
+                                    abort_reward=-1,  # no needed if in rew no None is given back
+                                    # on_episode_reset_callback=cb.fire  # needed?
+                                    obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                                'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                                'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'],
+                                    # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'],
+                                    max_episode_steps=max_episode_steps_list[max_eps_steps],
+                                    model_params={'lc.resistor1.R': R_filter,
+                                                  'lc.resistor2.R': R_filter,
+                                                  'lc.resistor3.R': R_filter,
+                                                  'lc.resistor4.R': 0.0000001,
+                                                  'lc.resistor5.R': 0.0000001,
+                                                  'lc.resistor6.R': 0.0000001,
+                                                  'lc.inductor1.L': L_filter,
+                                                  'lc.inductor2.L': L_filter,
+                                                  'lc.inductor3.L': L_filter,
+                                                  'lc.capacitor1.C': C_filter,
+                                                  'lc.capacitor2.C': C_filter,
+                                                  'lc.capacitor3.C': C_filter,
+                                                  'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor1.R'),
+                                                  'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor2.R'),
+                                                  'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor3.R'),
+                                                  # 'lc.capacitor1.v': 0,
+                                                  # 'lc.capacitor2.v': 0,
+                                                  # 'lc.capacitor3.v': 0,
+                                                  # 'lc.inductor1.i': 0,
+                                                  # 'lc.inductor2.i': 0,
+                                                  # 'lc.inductor3.i': 0,
+                                                  },
+                                    )
+
+            if wrapper_mode in ['past', 'i_load']:
+                env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + used_number_past_vales * 3,
+                                                   # training_episode_length=training_episode_length, (da aus pickle!)
+                                                   recorder=mongo_recorder, n_trail=n_trail,
+                                                   integrator_weight=integrator_weight,
+                                                   antiwindup_weight=antiwindup_weight, gamma=1,
+                                                   penalty_I_weight=0, penalty_P_weight=0,
+                                                   number_past_vals=used_number_past_vales)
+
+            elif wrapper_mode == 'future':
+                env_test = FeatureWrapper_futureVals(env_test, number_of_features=9,
+                                                     recorder=mongo_recorder, n_trail=n_trail,
+                                                     integrator_weight=integrator_weight,
+                                                     antiwindup_weight=antiwindup_weight, gamma=1,
+                                                     penalty_I_weight=0, penalty_P_weight=0, number_future_vals=10,
+                                                     future_data=data_str)
+
+            elif wrapper_mode == 'I-controller':
+                env_test = FeatureWrapper_I_controller(env_test, number_of_features=14 + used_number_past_vales * 3,
+                                                       recorder=mongo_recorder, n_trail=n_trail,
+                                                       integrator_weight=integrator_weight,
+                                                       antiwindup_weight=antiwindup_weight, gamma=1,
+                                                       penalty_I_weight=0, penalty_P_weight=0,
+                                                       Ki=12,
+                                                       number_past_vals=number_past_vals)
+
+            elif wrapper_mode == 'no-I-term':
+                env_test = BaseWrapper(env_test, number_of_features=6 + used_number_past_vales * 3,
+                                       recorder=mongo_recorder, n_trail=n_trail, gamma=gamma,
+                                       number_past_vals=used_number_past_vales)
+
+            else:
+                env_test = FeatureWrapper(env_test, number_of_features=11,
+                                          recorder=mongo_recorder, integrator_weight=integrator_weight,
+                                          antiwindup_weight=antiwindup_weight, gamma=1,
+                                          penalty_I_weight=0,
+                                          penalty_P_weight=0)  # , use_past_vals=True, number_past_vals=30)
+
+            if wrapper_mode not in ['no-I-term', 'I-controller']:
+                env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+            # model2 = DDPG.load(model_path + f'model.zip')  # , env=env_test)
+            print('Before load')
+
+            model = DDPG.load(model_path + f'{used_model}')  # , env=env_test)
+
+            print('After load')
+
+            count = 0
+            for kk in range(actor_number_layers + 1):
+
+                if kk < actor_number_layers:
+                    model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+                    model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+                count = count + 2
+
+            count = 0
+
+            for kk in range(critic_number_layers + 1):
+
+                if kk < critic_number_layers:
+                    model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+                    model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+                count = count + 2
+
+            if wrapper_mode not in ['no-I-term', 'I-controller']:
+                env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+            return_sum = 0.0
+            limit_exceeded_in_test = False
+            limit_exceeded_penalty = 0
+
+            rew_list = []
+            v_d = []
+            v_q = []
+            v_0 = []
+            action_P0 = []
+            action_P1 = []
+            action_P2 = []
+            action_I0 = []
+            action_I1 = []
+            action_I2 = []
+            integrator_sum0 = []
+            integrator_sum1 = []
+            integrator_sum2 = []
+            R_load = []
+
+            ####### Run Test #########
+            # agent ~ PI Controllerv using env
+            # model ~ RL Controller using env_test
+            # Both run in the same loop
+
+            obs = env_test.reset()
+
+            for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False):
+                # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+                action, _states = model.predict(obs, deterministic=True)
+                if step == 988:
+                    asd = 1
+                obs, rewards, done, info = env_test.step(action)
+                action_P0.append(np.float64(action[0]))
+                action_P1.append(np.float64(action[1]))
+                action_P2.append(np.float64(action[2]))
+                if wrapper_mode not in ['no-I-term', 'I-controller']:
+                    action_I0.append(np.float64(action[3]))
+                    action_I1.append(np.float64(action[4]))
+                    action_I2.append(np.float64(action[5]))
+                    integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+                    integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+                    integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+                if rewards == -1 and not limit_exceeded_in_test:
+                    # Set addidional penalty of -1 if limit is exceeded once in the test case
+                    limit_exceeded_in_test = True
+                    limit_exceeded_penalty = -1
+                env_test.render()
+                return_sum += rewards
+                rew_list.append(rewards)
+
+                # print(rewards)
+                if done:
+                    env_test.close()
+
+                    # print(limit_exceeded_in_test)
+                    break
+
+            env_test.close()
+
+            v_a = env_test.history.df['lc.capacitor1.v']
+            v_b = env_test.history.df['lc.capacitor2.v']
+            v_c = env_test.history.df['lc.capacitor3.v']
+            i_a = env_test.history.df['lc.inductor1.i']
+            i_b = env_test.history.df['lc.inductor2.i']
+            i_c = env_test.history.df['lc.inductor3.i']
+            R_load = (env_test.history.df['r_load.resistor1.R'].tolist())
+            phase = env_test.history.df['inverter1.phase.0']  # env_test.env.net.components[0].phase
+            v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+            i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+
+            i_d = i_dq0[0].tolist()
+            i_q = i_dq0[1].tolist()
+            i_0 = i_dq0[2].tolist()
+            v_d = (v_dq0[0].tolist())
+            v_q = (v_dq0[1].tolist())
+            v_0 = (v_dq0[2].tolist())
+            """
+            plt.plot(v_d_PI, 'b')
+            plt.plot(v_q_PI, 'r')
+            plt.plot(v_0_PI, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("v_dq0")
+            plt.title('PI')
+            plt.show()
+
+            plt.plot(rew_list_PI)
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("Reward")
+            plt.title('PI')
+            plt.show()
+
+            plt.plot(R_load_PI, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            plt.title('Test')
+            plt.show()
+            """
+
+            plt.plot(v_d, 'b')
+            plt.plot(v_q, 'r')
+            plt.plot(v_0, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("v_dq0")
+            plt.title(f'DDPG - {used_model}')
+            plt.show()
+
+            plt.plot(rew_list)
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("Reward")
+            plt.title(f'DDPG - {used_model}')
+            plt.show()
+
+            plt.plot(R_load, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            plt.title('DDPG')
+            plt.show()
+
+            plt.plot(integrator_sum0)
+            plt.plot(integrator_sum0)
+            plt.plot(integrator_sum0)
+            plt.ylabel('intergratorzustand')
+            plt.show()
+
+            plt.plot(action_I0)
+            plt.plot(action_I1)
+            plt.plot(action_I2)
+            plt.ylabel('action I')
+            plt.show()
+
+            plt.plot(action_P0)
+            plt.plot(action_P1)
+            plt.plot(action_P2)
+            plt.ylabel('action P')
+            plt.show()
+
+            # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+            print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}')
+            # print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}')
+
+            ts = time.gmtime()
+            compare_result = {"Name": "comparison_PI_DDPG",
+                              "model name": model_name,
+                              "Wrapper": wrapper,
+                              "used_number_past_vales": used_number_past_vales,
+                              "time": ts,
+                              "ActionP0": action_P0,
+                              "ActionP1": action_P1,
+                              "ActionP2": action_P2,
+                              "ActionI0": action_I0,
+                              "ActionI1": action_I1,
+                              "ActionI2": action_I2,
+                              "integrator_sum0": integrator_sum0,
+                              "integrator_sum1": integrator_sum1,
+                              "integrator_sum2": integrator_sum2,
+                              "DDPG_model_path": model_path,
+                              "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty),
+                              "Reward DDPG": rew_list,
+                              "env_hist_DDPG": env_test.env.history.df,
+                              "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                              "number of averages per run": num_average,
+                              "info": "execution of RL agent on 10 s test case-loading values",
+                              "optimization node": 'Thinkpad',
+                              }
+            store_df = pd.DataFrame([compare_result])
+            store_df.to_pickle(f'{folder_name}/' + used_model + f'_{max_episode_steps_list[max_eps_steps]}steps')
+
+        ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty))
+        ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+        # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+        # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list)
+        # temp_dict = dict(zipped)
+    temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list}
+    result_list.append(temp_dict)
+    # ret_dict.append(zipped)
+    # df = df.append(ret_dict)
+
+    mean_list.append(np.mean(ret_array))
+    std_list.append(np.std(ret_array))
+
+# df = df.append(temp_list, True)
+print(mean_list)
+print(std_list)
+print(result_list)
+
+results = {
+    'Mean': mean_list,
+    'Std': std_list,
+    'All results': result_list,
+    'max_episode_steps_list': max_episode_steps_list
+}
+
+df = pd.DataFrame(results)
+# df.to_pickle("DDPG_study18_best_test_varianz.pkl")
+asd = 1
diff --git a/experiments/hp_tune/visualize_tests/CompareModelsPlotting_noNoise.py b/experiments/hp_tune/visualize_tests/CompareModelsPlotting_noNoise.py
new file mode 100644
index 00000000..40fa282e
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/CompareModelsPlotting_noNoise.py
@@ -0,0 +1,267 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+make_pyplot = False
+show_load = True
+interval_plt = True
+
+# interval_list_x = [[0, 0.01], [0.01, 1.0], [0.78, 0.9]]
+# interval_list_y = [[-25, 210], [-40, 210], [165, 175]]
+
+
+# Fuer den Detzerministc case
+interval_list_x = [[0, 0.01], [0.105, 0.2], [0.695, 0.71], [0.85, 0.88]]
+interval_list_y = [[-25, 210], [165, 175], [-25, 335], [165, 175]]
+
+# Fuer den 10s Fall
+interval_list_x = [[0, 0.006], [2.0925, 2.1], [3.11, 3.12], [7.1, 7.14], [8.145, 8.16]]
+# interval_list_x = [[0, 0.01], [2.09, 2.1], [2.11, 2.12], [7.08, 7.16], [7.145, 7.16]]
+interval_list_y = [[-25, 210], [-25, 340], [160, 190], [-25, 340], [125, 340]]
+# folder_name = 'saves/Comparison_study_future10Rvals_deterministicTestcase'
+# folder_name = 'saves/Comparison_study_22_best_pastVal_HPO_deterministic_noMeasNoise'
+folder_names = [
+    'saves/OMG_integratorActor_3_Deterministic']  # , 'saves/OMG_i_load_feature_0_Deterministic']  # _deterministic'
+folder_names = ['saves/OMG_i_load_feature_0_Deterministic']  # _deterministic'
+folder_names = ['saves/paper_deterministic']  # _deterministic'
+folder_names = ['saves/paper']  # _deterministic'
+folder_names = ['saves/paper_new', 'saves/paper_new', 'saves/paper_new', 'saves/paper_new']  # _deterministic'
+
+number_of_steps = '_100000steps'
+
+# df = pd.read_pickle(folder_names[0] + '/PI' + number_of_steps)
+df = pd.read_pickle('saves/paper_desscaR_load' + '/PI_10000steps')
+# df = pd.read_pickle(folder_name + '/PI_9989steps')
+
+env_hist_PI = df['env_hist_PI']
+v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist()
+v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist()
+v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist()
+R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist())
+phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+v_d_PI = (v_dq0_PI[0].tolist())
+v_q_PI = (v_dq0_PI[1].tolist())
+v_0_PI = (v_dq0_PI[2].tolist())
+
+reward_PI = df['Reward PI'][0]
+return_PI = df['Return PI'][0]
+kp_c = df['PI_Kp_c'][0]
+ki_c = df['PI_Ki_c'][0]
+kp_v = df['PI_Kp_v'][0]
+ki_v = df['PI_Ki_v'][0]
+
+model_names = ['model_OMG_DDPG_Actor.zip', 'model_OMG_DDPG_Integrator_no_pastVals_corr.zip',
+               'model_OMG_DDPG_Integrator_no_pastVals.zip',
+               'model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip']
+ylabels = ['DDPG', 'DDPG-I', 'DDPG-I+pastVals', 'DDPG-I+i_load']
+# ylabels = ['DDPG-I+pastVals']
+# ylabels = ['DDPG-I+pastVals']
+# model_names = ['model_OMG_DDPG_Actor.zip']  # ['model_0_pastVals.zip','model_2_pastVals.zip', 'model_5_pastVals.zip', 'model_10_pastVals.zip', 'model_16_pastVals.zip', 'model_25_pastVals.zip', ]  # , 'model_noPastVals.zip']
+# model_names = ['model_OMG_DDPG_Integrator_no_pastVals.zip']
+# model_names = ['model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip']
+# model_names = ['model_OMG_DDPG_Integrator_no_pastVals_corr.zip']
+pastVals = ['5', '0', '5', '0']  # ['0', '2', '5', '10', '16', '25']
+# pastVals = ['5']  # ['0', '2', '5', '10', '16', '25']
+return_list_DDPG = []
+reward_list_DDPG = []
+
+ts = 1e-4  # if ts stored: take from db
+
+# t_test_R = np.arange(ts, (len(testcase_100k['v_d_PI'])) * ts, ts).tolist()
+
+t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist()
+t_reward = np.arange(0, round((len(reward_PI)) * ts, 4), ts).tolist()
+
+# fig, axs = plt.subplots(len(model_names)+2, len(interval_list_y), figsize=(16, 12))  # , sharex=True)  # a new figure window
+fig, axs = plt.subplots(len(model_names) + 3, len(interval_list_y),
+                        figsize=(12, 10))  # , sharex=True)  # a new figure window
+
+for i in range(len(interval_list_y)):
+    plt_count = 3
+    ############## Subplots
+    # fig = plt.figure(figsize=(10,12))  # a new figure window
+
+    for model_name, pV, folder_name, ylabel_use in zip(model_names, pastVals, folder_names, ylabels):
+
+        df_DDPG = pd.read_pickle(folder_name + '/' + model_name + number_of_steps)
+        #df_DDPG = pd.read_pickle(folder_name + '/' 'model_5_pastVals.zip_100000steps_NoPhaseFeature_1427')
+
+        if i == 0:
+            return_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 7))
+        #    reward_list_DDPG.append(df_DDPG['Reward DDPG'][0])
+
+        env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+        v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+        v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+        v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+        phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+        v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+        v_d_DDPG = (v_dq0[0].tolist())
+        v_q_DDPG = (v_dq0[1].tolist())
+        v_0_DDPG = (v_dq0[2].tolist())
+
+        axs[0, i].plot(t_test, R_load_PI)
+        axs[0, i].grid()
+        axs[0, i].set_xlim(interval_list_x[i])
+        # axs[0, i].set_ylim([15, 75])
+        if i == 0:
+            axs[0, i].set_ylabel("$R_{\mathrm{load}}\,/\,\mathrm{\Omega}$")
+        # ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+
+        DDPG_reward = df_DDPG['Reward DDPG'][0]
+        if plt_count == 3:
+            axs[1, i].plot(t_reward, reward_PI, 'b', label=f'      PI: '
+                                                           f'{round(sum(reward_PI[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+        axs[1, i].plot(DDPG_reward, 'r', label=f'DDPG: '
+                                               f'{round(sum(DDPG_reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+        axs[1, i].grid()
+        axs[1, i].set_xlim(interval_list_x[i])
+        # axs[1, i].set_ylim(interval_list_y[i])
+        axs[1, i].legend()
+        if i == 0:
+            axs[1, i].set_ylabel("Reward")
+
+        axs[2, i].plot(t_test, v_d_PI, 'b', label='v_d')
+        axs[2, i].plot(t_test, v_q_PI, 'r', label='v_q')
+        axs[2, i].plot(t_test, v_0_PI, 'g', label='v_0')
+        axs[2, i].grid()
+        axs[2, i].set_xlim(interval_list_x[i])
+        axs[2, i].set_ylim(interval_list_y[i])
+        if i == 0:
+            axs[2, i].set_ylabel("$v_{\mathrm{dq0, PI}}\,/\,\mathrm{V}$")
+        # else:
+        #    axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$")
+
+        axs[plt_count, i].plot(v_d_DDPG, 'b')
+        axs[plt_count, i].plot(v_q_DDPG, 'r')
+        axs[plt_count, i].plot(v_0_DDPG, 'g')
+        axs[plt_count, i].grid()
+        axs[plt_count, i].set_xlim(interval_list_x[i])
+        axs[plt_count, i].set_ylim(interval_list_y[i])
+        axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+        if i == 0:
+            # axs[plt_count, i].set_ylabel(pV)
+            axs[plt_count, i].set_ylabel(ylabel_use)
+            # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$")
+        # else:
+        #    axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$")
+        plt_count += 1
+        plt.show()
+
+fig.suptitle(f'Model using pastVals:' + str(pastVals) + ' \n '
+                                                        f'Model-return(MRE)' + str(return_list_DDPG) + ' \n'
+                                                                                                       f'  PI-return(MRE):     {round(return_PI, 7)} \n '
+                                                                                                       f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}',
+             fontsize=14)
+
+fig.subplots_adjust(wspace=0.2, hspace=0.2)
+plt.show()
+
+#fig.savefig(f'{folder_name}/Ausschnitt_2pV_q0.pdf')
+
+if make_pyplot:
+    # pyplot Load
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_test, y=R_load_PI))  # , title='R_load')
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
+
+    # pyplot PI
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_reward, y=DDPG_reward))
+    plot.add_trace(
+        px.Scatter(x=t_reward, y=reward_PI))
+    # plot.add_trace(
+    #    px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
+
+    for model_name in model_names:
+        df_DDPG = pd.read_pickle(folder_name + '/' + model_name + number_of_steps)
+
+        env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+        v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+        v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+        v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+        phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+        v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+        v_d_DDPG = (v_dq0[0].tolist())
+        v_q_DDPG = (v_dq0[1].tolist())
+        v_0_DDPG = (v_dq0[2].tolist())
+        # pyplot ddpg
+        plot = px.Figure()
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_d_DDPG))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_q_DDPG))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_0_DDPG))
+        # plot.add_trace(
+        #    px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_d_PI))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_q_PI))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_0_PI))
+
+        plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+            dict(count=1, step="day", stepmode="backward"), ])),
+            rangeslider=dict(visible=True), ))
+        plot.show()
+
+plt.plot(t_test, v_d_DDPG, 'b')
+# plt.plot(t_test, v_d_PI, 'r')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+# plt.xlim([0, 0.025])
+plt.ylim([160, 190])
+plt.xlabel("time")
+plt.ylabel("v_dq0_DDPG")
+plt.title(f'DDPG')
+plt.show()
+
+plt.plot(t_test, v_d_PI, 'r')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+# plt.xlim([0, 0.025])
+plt.ylim([160, 190])
+plt.xlabel("time")
+plt.ylabel("v_dq0_PI")
+plt.title(f'PI')
+plt.show()
+
+plt.plot(t_test, v_d_DDPG, 'b')
+# plt.plot(t_test, v_d_PI, 'r')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+plt.xlim([0.1, 0.11])
+plt.ylim([290, 360])
+plt.xlabel("time")
+plt.ylabel("v_dq0_DDPG")
+plt.title(f'DDPG')
+plt.show()
+
+plt.plot(t_test, v_d_PI, 'r')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+plt.xlim([0.1, 0.2])
+plt.ylim([290, 360])
+plt.xlabel("time")
+plt.ylabel("v_dq0_PI")
+plt.title(f'PI')
+plt.show()
diff --git a/experiments/hp_tune/visualize_tests/CompareModels_noNoise.py b/experiments/hp_tune/visualize_tests/CompareModels_noNoise.py
new file mode 100644
index 00000000..72f473b2
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/CompareModels_noNoise.py
@@ -0,0 +1,803 @@
+print('Start script')
+
+import logging
+import os
+import platform
+import time
+from functools import partial
+
+import GPy
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper, FeatureWrapper_pastVals, FeatureWrapper_futureVals, \
+    BaseWrapper, FeatureWrapper_I_controller
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# imports for PIPI
+from experiments.hp_tune.env.random_load import RandomLoad
+from openmodelica_microgrid_gym.agents import SafeOptAgent
+from openmodelica_microgrid_gym.agents.util import MutableFloat
+from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \
+    MultiPhaseDQ0PIPIController
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.env.plotmanager import PlotManager
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import FullHistory, RandProcess
+
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+import gym
+
+# np.random.seed(0)
+
+show_plots = True
+save_results = False
+
+# folder_name = 'saves/OMG_DDPGActor_wo_integrator_butPastVals_3_Deterministic'  # cfg['STUDY_NAME']
+folder_name = 'saves/paper'  # cfg['STUDY_NAME']
+#  folder_name = 'saves/OMG_i_load_feature_0_Deterministic'  # cfg['STUDY_NAME']
+node = platform.uname().node
+
+# model_name = 'model_retrain_pastVals12.zip'
+number_past_vals = [5]  # , 5, 0, 0]  # [0, 5, 10, 16, 25]  # [30, 0]
+# use_past_vals = [True]  # [False, True, True, True, True]  # [True, False]
+wrapper = ['past']  #, 'no-I-term', 'past', 'i_load']  # ['past', 'future', 'no-I-term', 'I-controller']
+
+# model_name = ['model.zip']
+# model_path = 'OMG_Integrator_Actor_i_load_feature_2/1/'
+# model_path = 'OMG_DDPG_Actor/3/'
+model_path = 'experiments/hp_tune/trained_models/paper/'
+model_path = 'experiments/hp_tune/trained_models/NoPhaseFeature_1427/'
+# model_path = 'OMG_Integrator_Actor/32/'
+
+model_name = ['model_OMG_DDPG_Integrator_no_pastVals.zip', 'model_OMG_DDPG_Actor.zip',
+              'model_OMG_DDPG_Integrator_no_pastVals_corr.zip',
+              'model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip']
+
+model_name = ['model_5_pastVals.zip']
+# model_name = ['model.zip']
+################DDPG Config Stuff#########################################################################
+gamma = 0.946218
+integrator_weight = 0.311135
+antiwindup_weight = 0.660818
+error_exponent = 0.5
+use_gamma_in_rew = 1
+n_trail = 50001
+actor_number_layers = 2
+critic_number_layers = 4
+alpha_relu_actor = 0.208098
+alpha_relu_critic = 0.00678497
+"""
+################DDPG Config Stuff#########################################################################
+gamma = 0.984421  # 0.946218
+integrator_weight = 0  # 0.311135
+antiwindup_weight = 0  # 0.660818
+error_exponent = 0.5
+use_gamma_in_rew = 1
+n_trail = 50001
+actor_number_layers = 2
+critic_number_layers = 3  # 4
+alpha_relu_actor = 0.0034719  # 0.208098
+alpha_relu_critic = 0.00613757  # 0.00678497
+
+print('HPs für DDPG ohne I-Anteil!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
+"""
+mongo_recorder = Recorder(node=node, database_name=folder_name)
+
+num_average = 1
+max_episode_steps_list = [100000]  # [1000, 5000, 10000, 20000, 50000, 100000]
+
+data_str = 'experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl'
+data_str = 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl'
+# data_str = 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl'
+
+result_list = []
+ret_list = []
+mean_list = []
+std_list = []
+ret_array = np.zeros(num_average)
+
+df = pd.DataFrame()
+ret_dict = dict()
+
+#################PI Config stuff##############################################################################
+
+current_directory = os.getcwd()
+# folder_name = 'Pipi_safeopt_best_run4d'
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+# Simulation definitions
+# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+net = Network.load('net/net_vctrl_single_inv.yaml')
+delta_t = 1e-4  # simulation time step size / s
+undersample = 1
+# max_episode_steps = 1002  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
+DroopGain = 0.0  # virtual droop gain for active power / W/Hz
+QDroopGain = 0.0  # virtual droop gain for reactive power / VAR/V
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+
+"""
+print("P10 stuff!")
+L_filter = 70e-6  # / H
+R_filter = 1.1e-3  # / Ohm
+C_filter = 250e-6  # / F
+"""
+
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+#####################################
+# Definitions for the GP
+prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the initial set
+noise_var = 0.001  # ** 2  # measurement noise sigma_omega
+prior_var = 2  # prior variance of the GP
+
+bounds = [(0.000, 0.045), (4, 450)]  # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp
+lengthscale = [.003, 50.]  # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP
+
+safe_threshold = 0
+j_min = -5  # cal min allowed performance
+
+explore_threshold = 0
+
+# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
+# limit exceeded
+abort_reward = 100 * j_min
+
+# Definition of the kernel
+kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True)
+
+#####################################
+# Definition of the controllers
+# kp_v = 0.002
+# ki_v = 143
+
+# Old optimized parameters:
+kp_v = 0  # 0.0095  # 0.0
+ki_v = 182  # 173.22  # 200
+kp_c = 0.0308  # 0.0404  # 0.04
+ki_c = 13.3584  # 4.065  # 11.8
+
+"""
+#P10:
+print('using p10 setting')
+kp_v = 0.2972
+ki_v = 142.7
+kp_c = 0.00068
+ki_c = 0.731
+"""
+# Choose Kp and Ki for the current and voltage controller as mutable parameters
+mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v))  # 300Hz
+# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105))  # 300Hz
+voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
+                                limits=(-i_lim * 10, i_lim * 10))
+
+current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1))  # Current controller values
+droop_param = DroopParams(DroopGain, 0.005, net.freq_nom)
+qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom)
+
+ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
+                                   ts_sim=delta_t,
+                                   ts_ctrl=undersample * delta_t,
+                                   name='master')
+
+agent = SafeOptAgent(mutable_params,
+                     abort_reward,
+                     j_min,
+                     kernel,
+                     dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean,
+                          safe_threshold=safe_threshold, explore_threshold=explore_threshold),
+                     [ctrl],
+                     dict(master=[[f'lc.inductor{k}.i' for k in '123'],
+                                  [f'lc.capacitor{k}.v' for k in '123']
+                                  ]),
+                     history=FullHistory(),
+                     )
+
+
+for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+    for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False):
+
+        rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                     use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent,
+                     i_lim=net['inverter1'].i_lim,
+                     i_nom=net['inverter1'].i_nom)
+
+        ####################################PI Stuff################################################
+        R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+        gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                          bounds=(lower_bound_load, upper_bound_load))
+
+        rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                     bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                                     bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+        rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                    load_curve=pd.read_pickle(
+                                        # 'experiments/hp_tune/data/R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_deterministic_test_case_25_ohm_1_seconds.pkl'))
+                                        data_str))
+
+        cb = CallbackList()
+        # set initial = None to reset load random in range of bounds
+        cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+        cb.append(rand_load_train.reset)
+
+        plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder,
+                              show_plots=show_plots)
+
+
+        # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+        #                            load_curve=pd.read_pickle(
+        #                                'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl'))
+
+        def xylables_R(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            ax.grid(which='both')
+            # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+            ts = time.gmtime()
+            # fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_i(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+            ax.grid(which='both')
+            # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_v(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+            ax.grid(which='both')
+            # ax.set_xlim([0, 0.005])
+            ts = time.gmtime()
+            # fig.savefig(
+            #    f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                       reward_fun=rew.rew_fun_PIPI_MRE,
+                       viz_cols=[
+                           PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']],
+                                    callback=plotter.xylables_v_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_v_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']],
+                                    callback=plotter.xylables_i_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                    callback=xylables_R,
+                                    color=[['b', 'r', 'g']],
+                                    style=[[None]]
+                                    ),
+                           PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_i_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    )
+                       ],
+                       viz_mode='episode',
+                       max_episode_steps=max_episode_steps_list[max_eps_steps],
+                       model_params={'lc.resistor1.R': R_filter,
+                                     'lc.resistor2.R': R_filter,
+                                     'lc.resistor3.R': R_filter,
+                                     'lc.resistor4.R': 0.0000001,
+                                     'lc.resistor5.R': 0.0000001,
+                                     'lc.resistor6.R': 0.0000001,
+                                     'lc.inductor1.L': L_filter,
+                                     'lc.inductor2.L': L_filter,
+                                     'lc.inductor3.L': L_filter,
+                                     'lc.capacitor1.C': C_filter,
+                                     'lc.capacitor2.C': C_filter,
+                                     'lc.capacitor3.C': C_filter,
+                                     'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor1.R'),
+                                     'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor2.R'),
+                                     'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor3.R'),
+                                     # 'lc.capacitor1.v': 0,
+                                     # 'lc.capacitor2.v': 0,
+                                     # 'lc.capacitor3.v': 0,
+                                     # 'lc.inductor1.i': 0,
+                                     # 'lc.inductor2.i': 0,
+                                     # 'lc.inductor3.i': 0,
+                                     },
+                       net=net,
+                       model_path='omg_grid/grid.paper_loadstep.fmu',
+                       history=FullHistory(),
+                       # on_episode_reset_callback=cb.fire,
+                       action_time_delay=1 * undersample
+                       )
+
+        rew.gamma = 0
+        return_sum_PI = 0.0
+        rew_list_PI = []
+        v_d_PI = []
+        v_q_PI = []
+        v_0_PI = []
+        R_load_PI = []
+        limit_exceeded_in_test_PI = False
+        limit_exceeded_penalty_PI = 0
+        """
+        agent.reset()
+        agent.obs_varnames = env.history.cols
+        env.history.cols = env.history.structured_cols(None) + agent.measurement_cols
+        env.measure = agent.measure
+        agent_fig = None
+        obs_PI = env.reset()
+
+        for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False):
+            # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+            agent.observe(None, False)
+            act_PI = agent.act(obs_PI)
+            obs_PI, r_PI, done_PI, info_PI = env.step(act_PI)
+            rew_list_PI.append(r_PI)
+            env.render()
+            return_sum_PI += r_PI
+            if r_PI == -1 and not limit_exceeded_in_test_PI:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test_PI = True
+                limit_exceeded_penalty_PI = -1
+
+        _, env_fig = env.close()
+        agent.observe(r_PI, done_PI)
+
+        v_a_PI = env.history.df['lc.capacitor1.v']
+        v_b_PI = env.history.df['lc.capacitor2.v']
+        v_c_PI = env.history.df['lc.capacitor3.v']
+        i_a_PI = env.history.df['lc.inductor1.i']
+        i_b_PI = env.history.df['lc.inductor2.i']
+        i_c_PI = env.history.df['lc.inductor3.i']
+        R_load_PI = (env.history.df['r_load.resistor1.R'].tolist())
+        phase_PI = env.history.df['inverter1.phase.0']  # env.net.components[0].phase
+
+        i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI)
+        v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+
+        i_d_PI = i_dq0_PI[0].tolist()
+        i_q_PI = i_dq0_PI[1].tolist()
+        i_0_PI = i_dq0_PI[2].tolist()
+        v_d_PI = (v_dq0_PI[0].tolist())
+        v_q_PI = (v_dq0_PI[1].tolist())
+        v_0_PI = (v_dq0_PI[2].tolist())
+
+        ts = time.gmtime()
+        compare_result = {"Name": "comparison_PI_DDPG",
+                          "time": ts,
+                          "PI_Kp_c": kp_c,
+                          "PI_Ki_c": ki_c,
+                          "PI_Kp_v": kp_v,
+                          "PI_Ki_v": ki_v,
+                          "DDPG_model_path": model_path,
+                          "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI),
+                          "Reward PI": rew_list_PI,
+                          "env_hist_PI": env.history.df,
+                          "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                          "number of averages per run": num_average,
+                          "info": "PI result for comparison with RL agent",
+                          "optimization node": 'Thinkpad',
+                          "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2'
+                          }
+        store_df = pd.DataFrame([compare_result])
+        store_df.to_pickle(f'{folder_name}/PI_{max_episode_steps_list[max_eps_steps]}steps')
+        """
+        ####################################DDPG Stuff##############################################
+
+        rew.gamma = 0
+        # episodes will not abort, if limit is exceeded reward = -1
+        rew.det_run = True
+        rew.exponent = 0.5  # 1
+
+        net = Network.load('net/net_vctrl_single_inv_dq0.yaml')  # is used from vctrl_single_env, not needed here
+
+        for used_model, wrapper_mode, used_number_past_vales in zip(model_name, wrapper, number_past_vals):
+
+            if wrapper_mode == 'i_load':
+                env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                                    reward_fun=rew.rew_fun_dq0,
+                                    abort_reward=-1,  # no needed if in rew no None is given back
+                                    # on_episode_reset_callback=cb.fire  # needed?
+                                    obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                                'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                                'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'  # ],
+                                        , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'],
+                                    max_episode_steps=max_episode_steps_list[max_eps_steps],
+                                    model_params={'lc.resistor1.R': R_filter,
+                                                  'lc.resistor2.R': R_filter,
+                                                  'lc.resistor3.R': R_filter,
+                                                  'lc.resistor4.R': 0.0000001,
+                                                  'lc.resistor5.R': 0.0000001,
+                                                  'lc.resistor6.R': 0.0000001,
+                                                  'lc.inductor1.L': L_filter,
+                                                  'lc.inductor2.L': L_filter,
+                                                  'lc.inductor3.L': L_filter,
+                                                  'lc.capacitor1.C': C_filter,
+                                                  'lc.capacitor2.C': C_filter,
+                                                  'lc.capacitor3.C': C_filter,
+                                                  'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor1.R'),
+                                                  'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor2.R'),
+                                                  'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor3.R'),
+                                                  # 'lc.capacitor1.v': 0,
+                                                  # 'lc.capacitor2.v': 0,
+                                                  # 'lc.capacitor3.v': 0,
+                                                  # 'lc.inductor1.i': 0,
+                                                  # 'lc.inductor2.i': 0,
+                                                  # 'lc.inductor3.i': 0,
+                                                  },
+                                    )
+            else:
+                env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                                    reward_fun=rew.rew_fun_dq0,
+                                    abort_reward=-1,  # no needed if in rew no None is given back
+                                    # on_episode_reset_callback=cb.fire  # needed?
+                                    obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                                'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                                'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'],
+                                    # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'],
+                                    max_episode_steps=max_episode_steps_list[max_eps_steps],
+                                    model_params={'lc.resistor1.R': R_filter,
+                                                  'lc.resistor2.R': R_filter,
+                                                  'lc.resistor3.R': R_filter,
+                                                  'lc.resistor4.R': 0.0000001,
+                                                  'lc.resistor5.R': 0.0000001,
+                                                  'lc.resistor6.R': 0.0000001,
+                                                  'lc.inductor1.L': L_filter,
+                                                  'lc.inductor2.L': L_filter,
+                                                  'lc.inductor3.L': L_filter,
+                                                  'lc.capacitor1.C': C_filter,
+                                                  'lc.capacitor2.C': C_filter,
+                                                  'lc.capacitor3.C': C_filter,
+                                                  'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor1.R'),
+                                                  'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor2.R'),
+                                                  'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                                col='r_load.resistor3.R'),
+                                                  # 'lc.capacitor1.v': 0,
+                                                  # 'lc.capacitor2.v': 0,
+                                                  # 'lc.capacitor3.v': 0,
+                                                  # 'lc.inductor1.i': 0,
+                                                  # 'lc.inductor2.i': 0,
+                                                  # 'lc.inductor3.i': 0,
+                                                  },
+                                    )
+
+            if wrapper_mode in ['past', 'i_load']:
+                env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + used_number_past_vales * 3,
+                                                   # training_episode_length=training_episode_length, (da aus pickle!)
+                                                   recorder=mongo_recorder, n_trail=n_trail,
+                                                   integrator_weight=integrator_weight,
+                                                   antiwindup_weight=antiwindup_weight, gamma=1,
+                                                   penalty_I_weight=0, penalty_P_weight=0,
+                                                   number_past_vals=used_number_past_vales)
+
+            elif wrapper_mode == 'future':
+                env_test = FeatureWrapper_futureVals(env_test, number_of_features=9,
+                                                     recorder=mongo_recorder, n_trail=n_trail,
+                                                     integrator_weight=integrator_weight,
+                                                     antiwindup_weight=antiwindup_weight, gamma=1,
+                                                     penalty_I_weight=0, penalty_P_weight=0, number_future_vals=10,
+                                                     future_data=data_str)
+
+            elif wrapper_mode == 'I-controller':
+                env_test = FeatureWrapper_I_controller(env_test, number_of_features=14 + used_number_past_vales * 3,
+                                                       recorder=mongo_recorder, n_trail=n_trail,
+                                                       integrator_weight=integrator_weight,
+                                                       antiwindup_weight=antiwindup_weight, gamma=1,
+                                                       penalty_I_weight=0, penalty_P_weight=0,
+                                                       Ki=12,
+                                                       number_past_vals=number_past_vals)
+
+            elif wrapper_mode == 'no-I-term':
+                env_test = BaseWrapper(env_test, number_of_features=6 + used_number_past_vales * 3,
+                                       recorder=mongo_recorder, n_trail=n_trail, gamma=gamma,
+                                       number_past_vals=used_number_past_vales)
+
+            else:
+                env_test = FeatureWrapper(env_test, number_of_features=11,
+                                          recorder=mongo_recorder, integrator_weight=integrator_weight,
+                                          antiwindup_weight=antiwindup_weight, gamma=1,
+                                          penalty_I_weight=0,
+                                          penalty_P_weight=0)  # , use_past_vals=True, number_past_vals=30)
+
+            if wrapper_mode not in ['no-I-term', 'I-controller']:
+                env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+            # model2 = DDPG.load(model_path + f'model.zip')  # , env=env_test)
+            model = DDPG.load(model_path + f'{used_model}')  #, env=env_test)
+
+            count = 0
+            for kk in range(actor_number_layers + 1):
+
+                if kk < actor_number_layers:
+                    model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+                    model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+                count = count + 2
+
+            count = 0
+
+            for kk in range(critic_number_layers + 1):
+
+                if kk < critic_number_layers:
+                    model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+                    model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+                count = count + 2
+
+            if wrapper_mode not in ['no-I-term', 'I-controller']:
+                env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+            return_sum = 0.0
+            limit_exceeded_in_test = False
+            limit_exceeded_penalty = 0
+
+            rew_list = []
+            v_d = []
+            v_q = []
+            v_0 = []
+            action_P0 = []
+            action_P1 = []
+            action_P2 = []
+            action_I0 = []
+            action_I1 = []
+            action_I2 = []
+            integrator_sum0 = []
+            integrator_sum1 = []
+            integrator_sum2 = []
+            R_load = []
+
+            ####### Run Test #########
+            # agent ~ PI Controllerv using env
+            # model ~ RL Controller using env_test
+            # Both run in the same loop
+
+            obs = env_test.reset()
+
+            for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False):
+                # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+                action, _states = model.predict(obs, deterministic=True)
+                if step == 988:
+                    asd = 1
+                obs, rewards, done, info = env_test.step(action)
+                action_P0.append(np.float64(action[0]))
+                action_P1.append(np.float64(action[1]))
+                action_P2.append(np.float64(action[2]))
+                if wrapper_mode not in ['no-I-term', 'I-controller']:
+                    action_I0.append(np.float64(action[3]))
+                    action_I1.append(np.float64(action[4]))
+                    action_I2.append(np.float64(action[5]))
+                    integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+                    integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+                    integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+                if rewards == -1 and not limit_exceeded_in_test:
+                    # Set addidional penalty of -1 if limit is exceeded once in the test case
+                    limit_exceeded_in_test = True
+                    limit_exceeded_penalty = -1
+                env_test.render()
+                return_sum += rewards
+                rew_list.append(rewards)
+
+                # print(rewards)
+                if done:
+                    env_test.close()
+
+                    # print(limit_exceeded_in_test)
+                    break
+
+            env_test.close()
+
+            v_a = env_test.history.df['lc.capacitor1.v']
+            v_b = env_test.history.df['lc.capacitor2.v']
+            v_c = env_test.history.df['lc.capacitor3.v']
+            i_a = env_test.history.df['lc.inductor1.i']
+            i_b = env_test.history.df['lc.inductor2.i']
+            i_c = env_test.history.df['lc.inductor3.i']
+            R_load = (env_test.history.df['r_load.resistor1.R'].tolist())
+            phase = env_test.history.df['inverter1.phase.0']  # env_test.env.net.components[0].phase
+            v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+            i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+
+            i_d = i_dq0[0].tolist()
+            i_q = i_dq0[1].tolist()
+            i_0 = i_dq0[2].tolist()
+            v_d = (v_dq0[0].tolist())
+            v_q = (v_dq0[1].tolist())
+            v_0 = (v_dq0[2].tolist())
+
+            plt.plot(v_d_PI, 'b')
+            plt.plot(v_q_PI, 'r')
+            plt.plot(v_0_PI, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("v_dq0")
+            plt.title('PI')
+            plt.show()
+
+            plt.plot(rew_list_PI)
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("Reward")
+            plt.title('PI')
+            plt.show()
+
+            plt.plot(R_load_PI, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            plt.title('Test')
+            plt.show()
+
+            plt.plot(v_d, 'b')
+            plt.plot(v_q, 'r')
+            plt.plot(v_0, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("v_dq0")
+            plt.title(f'DDPG - {used_model}')
+            plt.show()
+
+            plt.plot(rew_list)
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("Reward")
+            plt.title(f'DDPG - {used_model}')
+            plt.show()
+
+            plt.plot(R_load, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            plt.title('DDPG')
+            plt.show()
+
+            plt.plot(integrator_sum0)
+            plt.plot(integrator_sum0)
+            plt.plot(integrator_sum0)
+            plt.ylabel('intergratorzustand')
+            plt.show()
+
+            plt.plot(action_I0)
+            plt.plot(action_I1)
+            plt.plot(action_I2)
+            plt.ylabel('action I')
+            plt.show()
+
+            plt.plot(action_P0)
+            plt.plot(action_P1)
+            plt.plot(action_P2)
+            plt.ylabel('action P')
+            plt.show()
+
+            # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+            print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}')
+            print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}')
+
+            ts = time.gmtime()
+            compare_result = {"Name": "comparison_PI_DDPG",
+                              "model name": model_name,
+                              "Wrapper": wrapper,
+                              "used_number_past_vales": used_number_past_vales,
+                              "time": ts,
+                              "ActionP0": action_P0,
+                              "ActionP1": action_P1,
+                              "ActionP2": action_P2,
+                              "ActionI0": action_I0,
+                              "ActionI1": action_I1,
+                              "ActionI2": action_I2,
+                              "integrator_sum0": integrator_sum0,
+                              "integrator_sum1": integrator_sum1,
+                              "integrator_sum2": integrator_sum2,
+                              "DDPG_model_path": model_path,
+                              "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty),
+                              "Reward DDPG": rew_list,
+                              "env_hist_DDPG": env_test.env.history.df,
+                              "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                              "number of averages per run": num_average,
+                              "info": "execution of RL agent on 10 s test case-loading values",
+                              "optimization node": 'Thinkpad',
+                              }
+            store_df = pd.DataFrame([compare_result])
+            store_df.to_pickle(
+                f'{folder_name}/' + used_model + f'_{max_episode_steps_list[max_eps_steps]}steps_NoPhaseFeature_1427')
+
+        ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty))
+        ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+        # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+        # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list)
+        # temp_dict = dict(zipped)
+    temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list}
+    result_list.append(temp_dict)
+    # ret_dict.append(zipped)
+    # df = df.append(ret_dict)
+
+    mean_list.append(np.mean(ret_array))
+    std_list.append(np.std(ret_array))
+
+# df = df.append(temp_list, True)
+print(mean_list)
+print(std_list)
+print(result_list)
+
+results = {
+    'Mean': mean_list,
+    'Std': std_list,
+    'All results': result_list,
+    'max_episode_steps_list': max_episode_steps_list
+}
+
+df = pd.DataFrame(results)
+# df.to_pickle("DDPG_study18_best_test_varianz.pkl")
+asd = 1
diff --git a/experiments/hp_tune/visualize_tests/Paper_plts.py b/experiments/hp_tune/visualize_tests/Paper_plts.py
new file mode 100644
index 00000000..04f86523
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/Paper_plts.py
@@ -0,0 +1,223 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+import matplotlib
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+save_results = True
+
+# Fuer den 10s Fall
+interval_list_x = [[0, 0.005], [7.145, 7.155]]
+interval_list_y = [[-5, 202], [-20, 345]]
+
+folder_name = 'saves/paper_new2'  # _deterministic'
+
+number_of_steps = '_100000steps'
+
+df = pd.read_pickle(folder_name + '/PI' + number_of_steps)
+
+env_hist_PI = df['env_hist_PI']
+v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist()
+v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist()
+v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist()
+R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist())
+phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+v_d_PI = (v_dq0_PI[0].tolist())
+v_q_PI = (v_dq0_PI[1].tolist())
+v_0_PI = (v_dq0_PI[2].tolist())
+
+i_a_PI = env_hist_PI[0]['lc.inductor1.i'].tolist()
+i_b_PI = env_hist_PI[0]['lc.inductor2.i'].tolist()
+i_c_PI = env_hist_PI[0]['lc.inductor3.i'].tolist()
+i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI)
+i_d_PI = (i_dq0_PI[0].tolist())
+i_q_PI = (i_dq0_PI[1].tolist())
+i_0_PI = (i_dq0_PI[2].tolist())
+
+reward_PI = df['Reward PI'][0]
+return_PI = df['Return PI'][0]
+kp_c = df['PI_Kp_c'][0]
+ki_c = df['PI_Ki_c'][0]
+kp_v = df['PI_Kp_v'][0]
+ki_v = df['PI_Ki_v'][0]
+
+model_names = ['model_OMG_DDPG_Actor.zip',
+               'model_OMG_DDPG_Integrator_no_pastVals.zip']
+ylabels = ['DDPG', 'DDPG-I']
+
+return_list_DDPG = []
+reward_list_DDPG = []
+
+ts = 1e-4  # if ts stored: take from db
+
+v_d_ref = [169.7] * len(v_0_PI)
+
+t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist()
+t_reward = np.arange(0, round((len(reward_PI)) * ts, 4), ts).tolist()
+
+# fig, axs = plt.subplots(len(model_names) + 4, len(interval_list_y),
+fig, axs = plt.subplots(3, len(interval_list_y),
+                        figsize=(9, 7))  # , sharex=True)  # a new figure window
+
+for i in range(len(interval_list_y)):
+    plt_count = 4
+    ############## Subplots
+    # fig = plt.figure(figsize=(10,12))  # a new figure window
+
+    df_DDPG = pd.read_pickle(folder_name + '/' + model_names[0] + number_of_steps)
+
+    if i == 0:
+        return_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 7))
+    #    reward_list_DDPG.append(df_DDPG['Reward DDPG'][0])
+
+    env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+    v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+    v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+    v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+    i_a = env_hist_DDPG[0]['lc.inductor1.i'].tolist()
+    i_b = env_hist_DDPG[0]['lc.inductor2.i'].tolist()
+    i_c = env_hist_DDPG[0]['lc.inductor3.i'].tolist()
+    phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+    v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+    i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+    v_d_DDPG = (v_dq0[0].tolist())
+    v_q_DDPG = (v_dq0[1].tolist())
+    v_0_DDPG = (v_dq0[2].tolist())
+    i_d_DDPG = (i_dq0[0].tolist())
+    i_q_DDPG = (i_dq0[1].tolist())
+    i_0_DDPG = (i_dq0[2].tolist())
+
+    DDPG_reward = df_DDPG['Reward DDPG'][0]
+
+    df_DDPG_I = pd.read_pickle(folder_name + '/' + model_names[1] + number_of_steps)
+
+    if i == 0:
+        return_list_DDPG.append(round(df_DDPG_I['Return DDPG'][0], 7))
+    #    reward_list_DDPG.append(df_DDPG['Reward DDPG'][0])
+
+    env_hist_DDPG_I = df_DDPG_I['env_hist_DDPG']
+
+    v_a_I = env_hist_DDPG_I[0]['lc.capacitor1.v'].tolist()
+    v_b_I = env_hist_DDPG_I[0]['lc.capacitor2.v'].tolist()
+    v_c_I = env_hist_DDPG_I[0]['lc.capacitor3.v'].tolist()
+    i_a_I = env_hist_DDPG_I[0]['lc.inductor1.i'].tolist()
+    i_b_I = env_hist_DDPG_I[0]['lc.inductor2.i'].tolist()
+    i_c_I = env_hist_DDPG_I[0]['lc.inductor3.i'].tolist()
+    phase_I = env_hist_DDPG_I[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+    v_dq0_I = abc_to_dq0(np.array([v_a_I, v_b_I, v_c_I]), phase_I)
+    i_dq0_I = abc_to_dq0(np.array([i_a_I, i_b_I, i_c_I]), phase_I)
+    v_d_DDPG_I = (v_dq0_I[0].tolist())
+    v_q_DDPG_I = (v_dq0_I[1].tolist())
+    v_0_DDPG_I = (v_dq0_I[2].tolist())
+    i_d_DDPG_I = (i_dq0_I[0].tolist())
+    i_q_DDPG_I = (i_dq0_I[1].tolist())
+    i_0_DDPG_I = (i_dq0_I[2].tolist())
+
+    DDPG_reward_I = df_DDPG_I['Reward DDPG'][0]
+
+    axs[2, i].plot(t_reward, reward_PI, 'b', label=f'          PI: '
+                                                   f'{round(sum(reward_PI[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+    axs[2, i].plot(t_reward, DDPG_reward, 'r', label=f'    DDPG: '
+                                                     f'{round(sum(DDPG_reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+    axs[2, i].plot(t_reward, DDPG_reward_I, 'g', label=f'DDPG+I: '
+                                                       f'{round(sum(DDPG_reward_I[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}')
+
+    axs[2, i].grid()
+    axs[2, i].set_xlim(interval_list_x[i])
+    # axs[1, i].set_ylim(interval_list_y[i])
+    axs[2, i].legend()
+    axs[2, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+    if i == 0:
+        axs[2, i].set_ylabel("Reward")
+
+    axs[0, i].plot(t_test, v_d_ref, '--', color='gray')
+    axs[0, i].plot(t_test, v_d_PI, 'b', label='PI')
+    axs[0, i].plot(t_test, v_d_DDPG, 'r', label='DDPG')
+    axs[0, i].plot(t_test, v_d_DDPG_I, 'g', label='DDPG+I')
+    # axs[2, i].plot(t_test, v_q_PI, 'r', label='v_q')
+    # axs[2, i].plot(t_test, v_0_PI, 'g', label='v_0')
+    axs[0, i].grid()
+    axs[0, i].legend()
+    axs[0, i].set_xlim(interval_list_x[i])
+    axs[0, i].set_ylim(interval_list_y[i])
+    if i == 0:
+        axs[0, i].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$")
+    # else:
+    #    axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$")
+
+    axs[1, i].plot(t_test, i_d_PI, 'b', label='PI')
+    axs[1, i].plot(t_test, i_d_DDPG, 'r', label='DDPG')
+    axs[1, i].plot(t_test, i_d_DDPG_I, 'g', label='DDPG+I')
+    # axs[1, i].plot(t_test, i_q_PI, 'r', label='v_q')
+    # axs[1, i].plot(t_test, i_0_PI, 'g', label='v_0')
+    axs[1, i].grid()
+    # axs[1, i].legend()
+    axs[1, i].set_xlim(interval_list_x[i])
+    # axs[3, i].set_ylim(interval_list_y[i])
+    if i == 0:
+        axs[1, i].set_ylim([0, 15])
+        axs[1, i].set_ylabel("$i_{\mathrm{d}}\,/\,\mathrm{A}$")
+
+    """
+    axs[plt_count, i].plot(t_test, v_d_DDPG, 'b')
+    axs[plt_count, i].plot(t_test, v_q_DDPG, 'r')
+    axs[plt_count, i].plot(t_test, v_0_DDPG, 'g')
+    axs[plt_count, i].grid()
+    axs[plt_count, i].set_xlim(interval_list_x[i])
+    axs[plt_count, i].set_ylim(interval_list_y[i])
+    axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+    if i == 0:
+
+        axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$")
+    # else:
+    #    axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$")
+    plt_count += 1
+
+    axs[plt_count, i].plot(t_test, i_d_DDPG, 'b')
+    axs[plt_count, i].plot(t_test, i_q_DDPG, 'r')
+    axs[plt_count, i].plot(t_test, i_0_DDPG, 'g')
+    axs[plt_count, i].grid()
+    axs[plt_count, i].set_xlim(interval_list_x[i])
+    #axs[plt_count, i].set_ylim(interval_list_y[i])
+    axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+    if i == 0:
+        axs[plt_count, i].set_ylabel("$i_{\mathrm{dq0, DDPG}}\,/\,\mathrm{A}$")
+    """
+
+# fig.suptitle(f'Model using pastVals:' + str(14) + ' \n '
+#                                                        f'Model-return(MRE)' + str(return_list_DDPG) + ' \n'
+#                                                                                                       f'  PI-return(MRE):     {round(return_PI, 7)} \n '
+#                                                                                                       f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}',
+#             fontsize=14)
+
+fig.subplots_adjust(wspace=0.2, hspace=0.2)
+plt.show()
+
+fig.savefig(f'{folder_name}/Ausschnitt_2pV_q0.pdf')
+if save_results:
+    # Plot setting
+    params = {'backend': 'ps',
+              'text.latex.preamble': [r'\usepackage{gensymb}'
+                                      r'\usepackage{amsmath,amssymb,mathtools}'
+                                      r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                      r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+              'axes.labelsize': 8,  # fontsize for x and y labels (was 10)
+              'axes.titlesize': 8,
+              'font.size': 8,  # was 10
+              'legend.fontsize': 8,  # was 10
+              'xtick.labelsize': 8,
+              'ytick.labelsize': 8,
+              'text.usetex': True,
+              'figure.figsize': [3.9, 3.1],
+              'font.family': 'serif',
+              'lines.linewidth': 1
+              }
+    matplotlib.rcParams.update(params)
+
+    fig.savefig(f'{folder_name}/_OMG_U_I.png')
+    fig.savefig(f'{folder_name}/_OMG_U_I.pdf')
+    fig.savefig(f'{folder_name}/_OMG_U_I.pgf')
diff --git a/experiments/hp_tune/visualize_tests/mongoDB_plt.py b/experiments/hp_tune/visualize_tests/mongoDB_plt.py
new file mode 100644
index 00000000..351d9523
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/mongoDB_plt.py
@@ -0,0 +1,206 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+import sshtunnel
+from bson import ObjectId
+from plotly import tools
+from pymongo import MongoClient
+
+from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0
+
+plt_train = True
+
+# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties'
+# db_name = 'DDPG_SplitActor_Best_study18_6462'
+db_name = 'P10_setting_best_study22_clipped_abort_newReward_design'
+trial = '0'
+show_episode_number = 19
+
+with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun:
+    with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+        db = client[db_name]
+
+        trial = db.Trial_number_43
+
+        train_data = trial.find_one({"Name": "After_Training"})
+        train_episode_data = trial.find_one({"Episode_number": show_episode_number})
+        trial_config = trial.find_one({"Name": "Config"})
+
+        ts = 1e-4  # if ts stored: take from db
+        # t_test = np.arange(0, len(trial_test['lc_capacitor1_v']) * ts, ts).tolist()
+
+        if train_episode_data is not None:
+            # only available if loglevel == 'train'
+            ##############################################################
+            # Plot example Training Episode
+            R_load = train_episode_data['R_load_training']
+            i_a = train_episode_data['i_a_training']
+            i_b = train_episode_data['i_b_training']
+            i_c = train_episode_data['i_c_training']
+            v_a = train_episode_data['v_a_training']
+            v_b = train_episode_data['v_b_training']
+            v_c = train_episode_data['v_c_training']
+            reward = train_episode_data['Rewards']
+            phase = train_episode_data['Phase']
+
+            plt.plot(R_load)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("R_load")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(i_a)
+            plt.plot(i_b)
+            plt.plot(i_c)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("i_abc")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(v_a)
+            plt.plot(v_b)
+            plt.plot(v_c)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("v_abc")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(reward)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("Reward")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(train_episode_data['Integrator0'])
+            plt.plot(train_episode_data['Integrator1'])
+            plt.plot(train_episode_data['Integrator2'])
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("Int Zustand")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(train_episode_data['actionP0'])
+            plt.plot(train_episode_data['actionP1'])
+            plt.plot(train_episode_data['actionP2'])
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("actionP")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            plt.plot(train_episode_data['actionI0'])
+            plt.plot(train_episode_data['actionI1'])
+            plt.plot(train_episode_data['actionI2'])
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("actionI")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            df = pd.DataFrame()
+            df['R_load'] = R_load
+
+            hist = df['R_load'].hist(bins=50)
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+            """
+            plot = px.Figure()
+            plot.add_trace(
+                px.Scatter(y=R_load)
+            """
+            # df2['v_0_SP'] = pd.DataFrame(test_data['inverter1_v_ref_0'])
+            # df2['v_1_SP'] = pd.DataFrame(test_data['inverter1_v_ref_1'])
+            # df2['v_2_SP'] = pd.DataFrame(test_data['inverter1_v_ref_2'])
+
+            # df2['phase'] = pd.DataFrame(test_data['Phase'])
+
+            # v_sp_abc = dq0_to_abc(np.array([df2['v_0_SP'], df2['v_1_SP'], df2['v_2_SP']]), np.array(df2['phase']))
+
+            v_mess_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), np.array(phase))
+
+            # x = df2['t']
+            v_d = v_mess_dq0[0][:]  # df2['v_a']
+            v_q = v_mess_dq0[1][:]  # df2['v_b']
+            v_0 = v_mess_dq0[2][:]  # df2['v_c']
+
+            plt.plot(v_d)
+            plt.plot(v_q)
+            plt.plot(v_0)
+            plt.grid()
+            plt.xlabel("steps")
+            plt.ylabel("v_dq0")
+            plt.title(f"Trainingepisode {show_episode_number}")
+            plt.show()
+
+            # v_a_SP = df2['v_0_SP']#v_sp_abc[0,:]
+            # v_b_SP = df2['v_1_SP']#v_sp_abc[1,:]
+            # v_c_SP = df2['v_2_SP']#v_sp_abc[2,:]
+
+            plot = px.Figure()
+            plot.add_trace(
+                px.Scatter(y=v_a))
+
+            plot.add_trace(
+                px.Scatter(y=v_b))
+
+            plot.add_trace(
+                px.Scatter(y=v_c))
+
+            plot.update_layout(
+                xaxis=dict(
+                    rangeselector=dict(
+                        buttons=list([
+                            dict(count=1,
+                                 step="day",
+                                 stepmode="backward"),
+                        ])
+                    ),
+                    rangeslider=dict(
+                        visible=True
+                    ),
+                )
+            )
+
+            plot.show()
+
+        ##############################################################
+        # After Training
+
+        train_reward_per_episode = train_data['Mean_eps_reward']
+
+        ax = plt.plot(train_reward_per_episode)
+        plt.grid()
+        plt.xlabel("Episodes")
+        # plt.yscale('log')
+        plt.ylabel("Mean episode Reward")
+        # plt.ylim([-0.06, -0.025])
+        # plt.title("1.000.000")
+        plt.show()
+
+        if True:
+            plot = px.Figure()
+            plot.add_trace(
+                px.Scatter(y=train_reward_per_episode))
+
+            plot.update_layout(
+                xaxis=dict(
+                    rangeselector=dict(
+                        buttons=list([
+                            dict(count=1,
+                                 step="day",
+                                 stepmode="backward"),
+                        ])
+                    ),
+                    rangeslider=dict(
+                        visible=True
+                    ),
+                )
+            )
+
+            plot.show()
diff --git a/experiments/hp_tune/visualize_tests/old/CompareModels.py b/experiments/hp_tune/visualize_tests/old/CompareModels.py
new file mode 100644
index 00000000..a26038c2
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/old/CompareModels.py
@@ -0,0 +1,633 @@
+import logging
+import os
+import platform
+import time
+from functools import partial
+
+import GPy
+import matplotlib.pyplot as plt
+import numpy as np
+import torch as th
+from stable_baselines3 import DDPG
+from stochastic.processes import VasicekProcess
+from tqdm import tqdm
+# imports net to define reward and executes script to register experiment
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+
+# from agents.my_ddpg import myDDPG
+from experiments.hp_tune.env.env_wrapper import FeatureWrapper, FeatureWrapper_pastVals
+from experiments.hp_tune.env.rewards import Reward
+from experiments.hp_tune.env.vctrl_single_inv import net  # , folder_name
+from experiments.hp_tune.util.config import cfg
+from experiments.hp_tune.util.recorder import Recorder
+
+# imports for PIPI
+from experiments.hp_tune.env.random_load import RandomLoad
+from openmodelica_microgrid_gym.agents import SafeOptAgent
+from openmodelica_microgrid_gym.agents.util import MutableFloat
+from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \
+    MultiPhaseDQ0PIPIController
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.env.plotmanager import PlotManager
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import FullHistory, RandProcess
+
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+import gym
+
+# np.random.seed(0)
+
+show_plots = True
+save_results = False
+
+folder_name = 'saves/Comparison_study_22_best_pastVal_HPO_oldtestEnv_PI_SP_corr'  # cfg['STUDY_NAME']
+node = platform.uname().node
+
+# model_name = 'model_retrain_pastVals12.zip'
+number_past_vals = [2]  # [0, 5, 10, 16, 25]  # [30, 0]
+use_past_vals = [True]  # [False, True, True, True, True]  # [True, False]
+# model_name = ['model.zip']
+model_path = 'experiments/hp_tune/trained_models/study_22_best_pastVal_HPO_oldtestEnv/'
+# model_path = 'experiments/hp_tune/trained_models/study_22_best_iLoad_Feature/'
+
+model_name = [
+    'model_2_pastVals.zip']  # ['model_0_pastVals.zip', 'model_5_pastVals.zip', 'model_10_pastVals.zip', 'model_16_pastVals.zip', 'model_25_pastVals.zip', ]  # , 'model_noPastVals.zip']
+
+error_exponent = 0.5
+
+mongo_recorder = Recorder(node=node, database_name=folder_name)
+
+num_average = 1
+max_episode_steps_list = [20000]  # [1000, 5000, 10000, 20000, 50000, 100000]
+
+result_list = []
+ret_list = []
+mean_list = []
+std_list = []
+ret_array = np.zeros(num_average)
+
+df = pd.DataFrame()
+ret_dict = dict()
+
+#################PI Config stuff##############################################################################
+
+current_directory = os.getcwd()
+# folder_name = 'Pipi_safeopt_best_run4d'
+save_folder = os.path.join(current_directory, folder_name)
+os.makedirs(save_folder, exist_ok=True)
+
+# Simulation definitions
+# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+net = Network.load('net/net_vctrl_single_inv.yaml')
+delta_t = 1e-4  # simulation time step size / s
+undersample = 1
+# max_episode_steps = 1002  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
+DroopGain = 0.0  # virtual droop gain for active power / W/Hz
+QDroopGain = 0.0  # virtual droop gain for reactive power / VAR/V
+
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+
+lower_bound_load = -10  # to allow maximal load that draws i_limit
+upper_bound_load = 200  # to apply symmetrical load bounds
+lower_bound_load_clip = 14  # to allow maximal load that draws i_limit (let exceed?)
+upper_bound_load_clip = 200  # to apply symmetrical load bounds
+lower_bound_load_clip_std = 2
+upper_bound_load_clip_std = 0
+#####################################
+# Definitions for the GP
+prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the initial set
+noise_var = 0.001  # ** 2  # measurement noise sigma_omega
+prior_var = 2  # prior variance of the GP
+
+bounds = [(0.000, 0.045), (4, 450)]  # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp
+lengthscale = [.003, 50.]  # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP
+
+safe_threshold = 0
+j_min = -5  # cal min allowed performance
+
+explore_threshold = 0
+
+# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
+# limit exceeded
+abort_reward = 100 * j_min
+
+# Definition of the kernel
+kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True)
+
+#####################################
+# Definition of the controllers
+# kp_v = 0.002
+# ki_v = 143
+kp_v = 0  # 0.0095  # 0.0
+ki_v = 182  # 173.22  # 200
+# Choose Kp and Ki for the current and voltage controller as mutable parameters
+mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v))  # 300Hz
+# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105))  # 300Hz
+voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
+                                limits=(-i_lim, i_lim))
+
+kp_c = 0.0308  # 0.0404  # 0.04
+ki_c = 13.3584  # 4.065  # 11.8
+current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1))  # Current controller values
+droop_param = DroopParams(DroopGain, 0.005, net.freq_nom)
+qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom)
+
+ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
+                                   ts_sim=delta_t,
+                                   ts_ctrl=undersample * delta_t,
+                                   name='master')
+
+agent = SafeOptAgent(mutable_params,
+                     abort_reward,
+                     j_min,
+                     kernel,
+                     dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean,
+                          safe_threshold=safe_threshold, explore_threshold=explore_threshold),
+                     [ctrl],
+                     dict(master=[[f'lc.inductor{k}.i' for k in '123'],
+                                  [f'lc.capacitor{k}.v' for k in '123']
+                                  ]),
+                     history=FullHistory(),
+                     )
+
+################DDPG Config Stuff#########################################################################
+gamma = 0.946218
+integrator_weight = 0.311135
+antiwindup_weight = 0.660818
+error_exponent = error_exponent
+use_gamma_in_rew = 1
+n_trail = 50001
+actor_number_layers = 2
+critic_number_layers = 4
+alpha_relu_actor = 0.208098
+alpha_relu_critic = 0.00678497
+
+for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+    for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False):
+
+        rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma,
+                     use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent,
+                     i_lim=net['inverter1'].i_lim,
+                     i_nom=net['inverter1'].i_nom)
+
+        ####################################PI Stuff################################################
+        R = np.random.uniform(low=lower_bound_load, high=upper_bound_load)
+
+        gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R,
+                          bounds=(lower_bound_load, upper_bound_load))
+
+        rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                     bounds=(lower_bound_load_clip, upper_bound_load_clip),
+                                     bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std))
+        rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+                                    load_curve=pd.read_pickle(
+                                        # 'experiments/hp_tune/data/R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl'))
+                                        # 'experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl'))
+                                        'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl'))
+
+        cb = CallbackList()
+        # set initial = None to reset load random in range of bounds
+        cb.append(partial(gen.reset))  # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load)))
+        cb.append(rand_load_train.reset)
+
+        plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder,
+                              show_plots=show_plots)
+
+
+        # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen,
+        #                            load_curve=pd.read_pickle(
+        #                                'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl'))
+
+        def xylables_R(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            ax.grid(which='both')
+            # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+            ts = time.gmtime()
+            # fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_i(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+            ax.grid(which='both')
+            # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        def xylables_v(fig):
+            ax = fig.gca()
+            ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+            ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+            ax.grid(which='both')
+            # ax.set_xlim([0, 0.005])
+            ts = time.gmtime()
+            # fig.savefig(
+            #    f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            if show_plots:
+                plt.show()
+            else:
+                plt.close()
+
+
+        env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                       reward_fun=rew.rew_fun_PIPI_MRE,
+                       viz_cols=[
+                           PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']],
+                                    callback=plotter.xylables_v_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_v_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']],
+                                    callback=plotter.xylables_i_abc,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    ),
+                           PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                    callback=xylables_R,
+                                    color=[['b', 'r', 'g']],
+                                    style=[[None]]
+                                    ),
+                           PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
+                                    callback=plotter.xylables_i_dq0,
+                                    color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                    style=[[None], ['--']]
+                                    )
+                       ],
+                       viz_mode='episode',
+                       max_episode_steps=max_episode_steps_list[max_eps_steps],
+                       model_params={'lc.resistor1.R': R_filter,
+                                     'lc.resistor2.R': R_filter,
+                                     'lc.resistor3.R': R_filter,
+                                     'lc.resistor4.R': 0.0000001,
+                                     'lc.resistor5.R': 0.0000001,
+                                     'lc.resistor6.R': 0.0000001,
+                                     'lc.inductor1.L': L_filter,
+                                     'lc.inductor2.L': L_filter,
+                                     'lc.inductor3.L': L_filter,
+                                     'lc.capacitor1.C': C_filter,
+                                     'lc.capacitor2.C': C_filter,
+                                     'lc.capacitor3.C': C_filter,
+                                     'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor1.R'),
+                                     'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor2.R'),
+                                     'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value,
+                                                                   col='r_load.resistor3.R'),
+                                     # 'lc.capacitor1.v': 0,
+                                     # 'lc.capacitor2.v': 0,
+                                     # 'lc.capacitor3.v': 0,
+                                     # 'lc.inductor1.i': 0,
+                                     # 'lc.inductor2.i': 0,
+                                     # 'lc.inductor3.i': 0,
+                                     },
+                       net=net,
+                       model_path='omg_grid/grid.paper_loadstep.fmu',
+                       history=FullHistory(),
+                       # on_episode_reset_callback=cb.fire,
+                       action_time_delay=1 * undersample
+                       )
+
+        rew.gamma = 0
+        return_sum_PI = 0.0
+        rew_list_PI = []
+        v_d_PI = []
+        v_q_PI = []
+        v_0_PI = []
+        R_load_PI = []
+        limit_exceeded_in_test_PI = False
+        limit_exceeded_penalty_PI = 0
+
+        agent.reset()
+        agent.obs_varnames = env.history.cols
+        env.history.cols = env.history.structured_cols(None) + agent.measurement_cols
+        env.measure = agent.measure
+        agent_fig = None
+        obs_PI = env.reset()
+
+        for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False):
+            # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+            agent.observe(None, False)
+            act_PI = agent.act(obs_PI)
+            obs_PI, r_PI, done_PI, info_PI = env.step(act_PI)
+            rew_list_PI.append(r_PI)
+            env.render()
+            return_sum_PI += r_PI
+            if r_PI == -1 and not limit_exceeded_in_test_PI:
+                # Set addidional penalty of -1 if limit is exceeded once in the test case
+                limit_exceeded_in_test_PI = True
+                limit_exceeded_penalty_PI = -1
+
+        _, env_fig = env.close()
+        agent.observe(r_PI, done_PI)
+
+        v_a_PI = env.history.df['lc.capacitor1.v']
+        v_b_PI = env.history.df['lc.capacitor2.v']
+        v_c_PI = env.history.df['lc.capacitor3.v']
+        i_a_PI = env.history.df['lc.inductor1.i']
+        i_b_PI = env.history.df['lc.inductor2.i']
+        i_c_PI = env.history.df['lc.inductor3.i']
+        R_load_PI = (env.history.df['r_load.resistor1.R'].tolist())
+        phase_PI = env.history.df['inverter1.phase.0']  # env.net.components[0].phase
+
+        i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI)
+        v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+
+        i_d_PI = i_dq0_PI[0].tolist()
+        i_q_PI = i_dq0_PI[1].tolist()
+        i_0_PI = i_dq0_PI[2].tolist()
+        v_d_PI = (v_dq0_PI[0].tolist())
+        v_q_PI = (v_dq0_PI[1].tolist())
+        v_0_PI = (v_dq0_PI[2].tolist())
+
+        ts = time.gmtime()
+        compare_result = {"Name": "comparison_PI_DDPG",
+                          "time": ts,
+                          "PI_Kp_c": kp_c,
+                          "PI_Ki_c": ki_c,
+                          "PI_Kp_v": kp_v,
+                          "PI_Ki_v": ki_v,
+                          "DDPG_model_path": model_path,
+                          "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI),
+                          "env_hist_PI": env.history.df,
+                          "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                          "number of averages per run": num_average,
+                          "info": "PI result for comparison with RL agent",
+                          "optimization node": 'Thinkpad',
+                          "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2'
+                          }
+        store_df = pd.DataFrame([compare_result])
+        store_df.to_pickle(f'{folder_name}/PI_{max_episode_steps_list[max_eps_steps]}steps')
+
+        ####################################DDPG Stuff##############################################
+
+        rew.gamma = 0
+        # episodes will not abort, if limit is exceeded reward = -1
+        rew.det_run = True
+        rew.exponent = 0.5  # 1
+
+        net = Network.load('net/net_vctrl_single_inv_dq0.yaml')  # is used from vctrl_single_env, not needed here
+
+        for used_model, used_past_vals, used_number_past_vales in zip(model_name, use_past_vals, number_past_vals):
+
+            env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0',
+                                reward_fun=rew.rew_fun_dq0,
+                                abort_reward=-1,  # no needed if in rew no None is given back
+                                # on_episode_reset_callback=cb.fire  # needed?
+                                obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i',
+                                            'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v',
+                                            'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'],
+                                # ,'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'],
+                                max_episode_steps=max_episode_steps_list[max_eps_steps]
+                                )
+
+            if used_past_vals:
+                env_test = FeatureWrapper_pastVals(env_test, number_of_features=11 + used_number_past_vales,
+                                                   # training_episode_length=training_episode_length, (da aus pickle!)
+                                                   recorder=mongo_recorder, n_trail=n_trail,
+                                                   integrator_weight=integrator_weight,
+                                                   antiwindup_weight=antiwindup_weight, gamma=1,
+                                                   penalty_I_weight=0, penalty_P_weight=0,
+                                                   number_past_vals=used_number_past_vales)
+            else:
+                env_test = FeatureWrapper(env_test, number_of_features=11,
+                                          recorder=mongo_recorder, integrator_weight=integrator_weight,
+                                          antiwindup_weight=antiwindup_weight, gamma=1,
+                                          penalty_I_weight=0,
+                                          penalty_P_weight=0)  # , use_past_vals=True, number_past_vals=30)
+
+            # env_test = FeatureWrapper(env_test, number_of_features=11+used_number_past_vales, integrator_weight=integrator_weight,
+            #                          recorder=mongo_recorder, antiwindup_weight=antiwindup_weight,
+            #                          gamma=1, penalty_I_weight=0, penalty_P_weight=0)#, use_past_vals=used_past_vals)
+            # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties
+
+            env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1))
+
+            # model2 = DDPG.load(model_path + f'model.zip')  # , env=env_test)
+            model = DDPG.load(model_path + f'{used_model}')  # , env=env_test)
+
+            count = 0
+            for kk in range(actor_number_layers + 1):
+
+                if kk < actor_number_layers:
+                    model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+                    model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor
+
+                count = count + 2
+
+            count = 0
+
+            for kk in range(critic_number_layers + 1):
+
+                if kk < critic_number_layers:
+                    model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+                    model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic
+
+                count = count + 2
+
+            env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1))
+
+            return_sum = 0.0
+            limit_exceeded_in_test = False
+            limit_exceeded_penalty = 0
+
+            rew_list = []
+            v_d = []
+            v_q = []
+            v_0 = []
+            action_P0 = []
+            action_P1 = []
+            action_P2 = []
+            action_I0 = []
+            action_I1 = []
+            action_I2 = []
+            integrator_sum0 = []
+            integrator_sum1 = []
+            integrator_sum2 = []
+            R_load = []
+
+            ####### Run Test #########
+            # agent ~ PI Controllerv using env
+            # model ~ RL Controller using env_test
+            # Both run in the same loop
+
+            obs = env_test.reset()
+
+            for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False):
+                # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False):
+
+                action, _states = model.predict(obs, deterministic=True)
+                obs, rewards, done, info = env_test.step(action)
+                action_P0.append(np.float64(action[0]))
+                action_P1.append(np.float64(action[1]))
+                action_P2.append(np.float64(action[2]))
+                action_I0.append(np.float64(action[3]))
+                action_I1.append(np.float64(action[4]))
+                action_I2.append(np.float64(action[5]))
+                integrator_sum0.append(np.float64(env_test.integrator_sum[0]))
+                integrator_sum1.append(np.float64(env_test.integrator_sum[1]))
+                integrator_sum2.append(np.float64(env_test.integrator_sum[2]))
+
+                if rewards == -1 and not limit_exceeded_in_test:
+                    # Set addidional penalty of -1 if limit is exceeded once in the test case
+                    limit_exceeded_in_test = True
+                    limit_exceeded_penalty = -1
+                env_test.render()
+                return_sum += rewards
+                rew_list.append(rewards)
+
+                # print(rewards)
+                if done:
+                    env_test.close()
+
+                    # print(limit_exceeded_in_test)
+                    break
+
+            env_test.close()
+
+            v_a = env_test.history.df['lc.capacitor1.v']
+            v_b = env_test.history.df['lc.capacitor2.v']
+            v_c = env_test.history.df['lc.capacitor3.v']
+            i_a = env_test.history.df['lc.inductor1.i']
+            i_b = env_test.history.df['lc.inductor2.i']
+            i_c = env_test.history.df['lc.inductor3.i']
+            R_load = (env_test.history.df['r_load.resistor1.R'].tolist())
+            phase = env_test.history.df['inverter1.phase.0']  # env_test.env.net.components[0].phase
+            v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+            i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+
+            i_d = i_dq0[0].tolist()
+            i_q = i_dq0[1].tolist()
+            i_0 = i_dq0[2].tolist()
+            v_d = (v_dq0[0].tolist())
+            v_q = (v_dq0[1].tolist())
+            v_0 = (v_dq0[2].tolist())
+
+            plt.plot(v_d_PI, 'b')
+            plt.plot(v_q_PI, 'r')
+            plt.plot(v_0_PI, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("v_dq0")
+            plt.title('PI')
+            plt.show()
+
+            plt.plot(R_load_PI, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            plt.title('Test')
+            plt.show()
+
+            plt.plot(v_d, 'b')
+            plt.plot(v_q, 'r')
+            plt.plot(v_0, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel("v_dq0")
+            plt.title(f'DDPG - {used_model}')
+            plt.show()
+
+            plt.plot(R_load, 'g')
+            plt.xlabel("")
+            plt.grid()
+            plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+            plt.title('PI')
+            plt.show()
+
+            # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+            print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}')
+            print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}')
+
+            ts = time.gmtime()
+            compare_result = {"Name": "comparison_PI_DDPG",
+                              "model name": model_name,
+                              "used past values as Features": used_past_vals,
+                              "used_number_past_vales": used_number_past_vales,
+                              "time": ts,
+                              "ActionP0": action_P0,
+                              "ActionP1": action_P1,
+                              "ActionP2": action_P2,
+                              "ActionI0": action_I0,
+                              "ActionI1": action_I1,
+                              "ActionI2": action_I2,
+                              "integrator_sum0": integrator_sum0,
+                              "integrator_sum1": integrator_sum1,
+                              "integrator_sum2": integrator_sum2,
+                              "DDPG_model_path": model_path,
+                              "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty),
+                              "env_hist_DDPG": env_test.env.history.df,
+                              "max_episode_steps": str(max_episode_steps_list[max_eps_steps]),
+                              "number of averages per run": num_average,
+                              "info": "execution of RL agent on 10 s test case-loading values",
+                              "optimization node": 'Thinkpad',
+                              }
+            store_df = pd.DataFrame([compare_result])
+            store_df.to_pickle(f'{folder_name}/' + used_model + f'_{max_episode_steps_list[max_eps_steps]}steps')
+
+        ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty))
+        ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty)
+
+        # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty)
+
+        # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list)
+        # temp_dict = dict(zipped)
+    temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list}
+    result_list.append(temp_dict)
+    # ret_dict.append(zipped)
+    # df = df.append(ret_dict)
+
+    mean_list.append(np.mean(ret_array))
+    std_list.append(np.std(ret_array))
+
+# df = df.append(temp_list, True)
+print(mean_list)
+print(std_list)
+print(result_list)
+
+results = {
+    'Mean': mean_list,
+    'Std': std_list,
+    'All results': result_list,
+    'max_episode_steps_list': max_episode_steps_list
+}
+
+df = pd.DataFrame(results)
+# df.to_pickle("DDPG_study18_best_test_varianz.pkl")
+asd = 1
diff --git a/experiments/hp_tune/visualize_tests/old/CompareModelsPlotting.py b/experiments/hp_tune/visualize_tests/old/CompareModelsPlotting.py
new file mode 100644
index 00000000..8b529486
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/old/CompareModelsPlotting.py
@@ -0,0 +1,477 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+make_pyplot = False
+show_load = True
+interval_plt = True
+# interval_list_x = [[0, 0.015], [6.32, 6.42], [6.4, 6.42]]#, [6.4, 6.42]]#[0.993, 0.997], [0.993, 0.997]]
+# interval_list_y = [[-20, 310], [-20, 330], [50, 330]]#, [50, 325]]
+interval_list_x = [[6.405, 6.41], [0, 0.015], [0.173, 0.177]]  # [0.993, 0.997], [0.993, 0.997]]
+interval_list_y = [[110, 330], [-20, 310], [-5, 210]]  # , [50, 325]]
+folder_name = 'saves/Comparison_PI_DDPG_iLoad_Feature'
+# folder_name = 'saves/Comparison_PI_DDPGs_oneLoadstepPerEpisode'
+# name = 'DDPG_PI_local_PastVals_10000steps'
+df = pd.read_pickle(folder_name + '/PI_100000steps')
+# df_DDPG_past_vals = pd.read_pickle(folder_name + '/model_pastVals.zip_10000steps')
+df_DDPG_past_vals = pd.read_pickle(
+    'saves/Comparison_PI_DDPGs_oneLoadstepPerEpisode' + '/model_pastVals.zip_100000steps')
+# df_DDPG = pd.read_pickle(folder_name + '/model_noPastVals_10000steps')
+df_DDPG = pd.read_pickle(folder_name + '/model.zip_100000steps')
+# df_PI = pd.read_pickle(folder_name+'/PI_10000steps')
+# df = pd.read_pickle('DDPG_PI_local_10000steps')
+
+env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+i_a = env_hist_DDPG[0]['lc.inductor1.i'].tolist()
+i_b = env_hist_DDPG[0]['lc.inductor2.i'].tolist()
+i_c = env_hist_DDPG[0]['lc.inductor3.i'].tolist()
+R_load = (env_hist_DDPG[0]['r_load.resistor1.R'].tolist())
+phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+
+i_d_DDPG = i_dq0[0].tolist()
+i_q_DDPG = i_dq0[1].tolist()
+i_0_DDPG = i_dq0[2].tolist()
+v_d_DDPG = (v_dq0[0].tolist())
+v_q_DDPG = (v_dq0[1].tolist())
+v_0_DDPG = (v_dq0[2].tolist())
+
+env_hist_DDPG_pastVals = df_DDPG_past_vals['env_hist_DDPG']
+
+v_a_pastVals = env_hist_DDPG_pastVals[0]['lc.capacitor1.v'].tolist()
+v_b_pastVals = env_hist_DDPG_pastVals[0]['lc.capacitor2.v'].tolist()
+v_c_pastVals = env_hist_DDPG_pastVals[0]['lc.capacitor3.v'].tolist()
+i_a_pastVals = env_hist_DDPG_pastVals[0]['lc.inductor1.i'].tolist()
+i_b_pastVals = env_hist_DDPG_pastVals[0]['lc.inductor2.i'].tolist()
+i_c_pastVals = env_hist_DDPG_pastVals[0]['lc.inductor3.i'].tolist()
+R_load_DDPG_pastVals = (env_hist_DDPG_pastVals[0]['r_load.resistor1.R'].tolist())
+phase_pastVals = env_hist_DDPG_pastVals[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_pastVals = abc_to_dq0(np.array([v_a_pastVals, v_b_pastVals, v_c_pastVals]), phase_pastVals)
+i_dq0_pastVals = abc_to_dq0(np.array([i_a_pastVals, i_b_pastVals, i_c_pastVals]), phase_pastVals)
+
+i_d_DDPG_pastVals = i_dq0_pastVals[0].tolist()
+i_q_DDPG_pastVals = i_dq0_pastVals[1].tolist()
+i_0_DDPG_pastVals = i_dq0_pastVals[2].tolist()
+v_d_DDPG_pastVals = (v_dq0_pastVals[0].tolist())
+v_q_DDPG_pastVals = (v_dq0_pastVals[1].tolist())
+v_0_DDPG_pastVals = (v_dq0_pastVals[2].tolist())
+
+env_hist_PI = df['env_hist_PI']
+v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist()
+v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist()
+v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist()
+i_a_PI = env_hist_PI[0]['lc.inductor1.i'].tolist()
+i_b_PI = env_hist_PI[0]['lc.inductor2.i'].tolist()
+i_c_PI = env_hist_PI[0]['lc.inductor3.i'].tolist()
+R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist())
+phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI)
+i_d_PI = i_dq0_PI[0].tolist()
+i_q_PI = i_dq0_PI[1].tolist()
+i_0_PI = i_dq0_PI[2].tolist()
+v_d_PI = (v_dq0_PI[0].tolist())
+v_q_PI = (v_dq0_PI[1].tolist())
+v_0_PI = (v_dq0_PI[2].tolist())
+
+m_d_PI = env_hist_PI[0]['master.md'].tolist()
+m_q_PI = env_hist_PI[0]['master.mq'].tolist()
+m_0_PI = env_hist_PI[0]['master.m0'].tolist()
+
+m_a_PI = env_hist_PI[0]['master.ma'].tolist()
+m_b_PI = env_hist_PI[0]['master.mb'].tolist()
+m_c_PI = env_hist_PI[0]['master.mc'].tolist()
+
+return_PR = df['Return PI'][0]
+return_DDPG = df_DDPG['Return DDPG'][0]
+return_DDPG_pastVals = df_DDPG_past_vals['Return DDPG'][0]
+
+kp_c = df['PI_Kp_c'][0]
+ki_c = df['PI_Ki_c'][0]
+kp_v = df['PI_Kp_v'][0]
+ki_v = df['PI_Ki_v'][0]
+
+ts = 1e-4  # if ts stored: take from db
+
+# t_test_R = np.arange(ts, (len(testcase_100k['v_d_PI'])) * ts, ts).tolist()
+
+t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist()
+
+# plot first interval once
+plt.plot(t_test, R_load)
+plt.grid()
+# plt.xlim([0, 0.025])
+plt.xlabel("time")
+plt.ylabel("R_load")
+plt.title('')
+plt.show()
+
+plt.plot(t_test, R_load_DDPG_pastVals)
+plt.grid()
+# plt.xlim([0, 0.025])
+plt.xlabel("time")
+plt.ylabel("R_load_DDPG_pastVals")
+plt.title('')
+plt.show()
+
+plt.plot(t_test, R_load_PI)
+plt.grid()
+# plt.xlim([0, 0.025])
+plt.xlabel("time")
+plt.ylabel("R_load_PI")
+plt.title('')
+plt.show()
+
+plt.plot(t_test, v_d_PI, 'b', label='v_d')
+plt.plot(t_test, v_q_PI, 'r', label='v_q')
+plt.plot(t_test, v_0_PI, 'g', label='v_0')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+plt.xlim([0, 0.025])
+plt.ylim([-50, 250])
+plt.xlabel("time")
+plt.ylabel("v_dq0_PI")
+plt.title(f' PI-return(MRE): {return_PR}')
+plt.show()
+
+plt.plot(t_test, v_d_DDPG, 'b')
+plt.plot(t_test, v_q_DDPG, 'r')
+plt.plot(t_test, v_0_DDPG, 'g')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+plt.xlim([0, 0.025])
+plt.ylim([-50, 250])
+plt.xlabel("time")
+plt.ylabel("v_dq0_DDPG")
+plt.title(f'DDPG-return(MRE): {return_DDPG}')
+plt.show()
+
+plt.plot(t_test, v_d_DDPG_pastVals, 'b')
+plt.plot(t_test, v_q_DDPG_pastVals, 'r')
+plt.plot(t_test, v_0_DDPG_pastVals, 'g')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+plt.xlim([0, 0.025])
+plt.ylim([-50, 250])
+plt.xlabel("time")
+plt.ylabel("v_dq0_DDPG_pV")
+plt.title(f'DDPG-return(MRE) using past observations: {return_DDPG_pastVals}')
+plt.show()
+
+plt.plot(t_test, v_d_PI, 'b', label='v_d')
+plt.plot(t_test, v_q_PI, 'r', label='v_q')
+plt.plot(t_test, v_0_PI, 'g', label='v_0')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+# plt.xlim([0, 0.025])
+plt.ylim([-50, 300])
+plt.xlabel("time")
+plt.ylabel("v_dq0_PI")
+plt.title(f' PI-return(MSE): {return_PR}')
+plt.show()
+
+plt.plot(t_test, v_d_DDPG, 'b')
+plt.plot(t_test, v_q_DDPG, 'r')
+plt.plot(t_test, v_0_DDPG, 'g')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+# plt.xlim([0, 0.025])
+plt.ylim([-50, 300])
+plt.xlabel("time")
+plt.ylabel("v_dq0_DDPG")
+plt.title(f'DDPG-return(MSE): {return_DDPG}')
+plt.show()
+
+plt.plot(t_test, v_d_DDPG_pastVals, 'b')
+plt.plot(t_test, v_q_DDPG_pastVals, 'r')
+plt.plot(t_test, v_0_DDPG_pastVals, 'g')
+# plt.plot(t_test, v_sp_abc[0, :])
+plt.grid()
+# plt.xlim([0, 0.025])
+plt.ylim([-50, 300])
+plt.xlabel("time")
+plt.ylabel("v_dq0_DDPG_pV")
+plt.title(f'DDPG-return(MRE) using past observations: {return_DDPG_pastVals}')
+plt.show()
+#############################
+############## Subplots
+# fig = plt.figure(figsize=(10,12))  # a new figure window
+fig, axs = plt.subplots(4, 3, figsize=(16, 12))  # , sharex=True)  # a new figure window
+fig.suptitle(f'DDPG-return using i_load-feature(MRE): {return_DDPG} \n '
+             f'DDPG-return(MRE) using past observations: {return_DDPG_pastVals}  \n'
+             f'PI-return(MRE): {return_PR} \n '
+             f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', fontsize=14)
+
+plt_count = 1
+
+i = 0
+
+# ax = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+axs[0, 0].plot(t_test, R_load)
+axs[0, 0].grid()
+axs[0, 0].set_xlim(interval_list_x[i])
+# axs[0, i].set_xlabel("time")
+axs[0, 0].set_ylabel("R_load")
+# axs[0, i].set_title(f'#{plt_count}')
+# plt.show()
+plt_count += 1
+
+# ax2 = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+axs[1, 0].plot(t_test, v_d_PI, 'b', label='v_d')
+axs[1, 0].plot(t_test, v_q_PI, 'r', label='v_q')
+axs[1, 0].plot(t_test, v_0_PI, 'g', label='v_0')
+# plt.plot(t_test, v_sp_abc[0, :])
+axs[1, 0].grid()
+axs[1, 0].set_xlim(interval_list_x[i])
+axs[1, 0].set_ylim(interval_list_y[i])
+
+axs[1, 0].set_ylabel("v_dq0_PI")
+
+# ax3 = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+axs[2, 0].plot(t_test, v_d_DDPG, 'b')
+axs[2, 0].plot(t_test, v_q_DDPG, 'r')
+axs[2, 0].plot(t_test, v_0_DDPG, 'g')
+# plt.plot(t_test, v_sp_abc[0, :])
+axs[2, 0].grid()
+axs[2, 0].set_xlim(interval_list_x[i])
+axs[2, 0].set_ylim(interval_list_y[i])
+axs[2, 0].set_xlabel("time")
+axs[2, 0].set_ylabel("v_dq0_DDPG")
+
+#######
+# ax = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+axs[0, 1].plot(t_test, m_d_PI, 'b')
+axs[0, 1].plot(t_test, m_q_PI, 'r')
+axs[0, 1].plot(t_test, m_0_PI, 'g')
+axs[0, 1].grid()
+axs[0, 1].set_xlim(interval_list_x[i])
+axs[0, 1].set_ylim([-0.2, 0.8])
+axs[0, 1].set_ylabel("m_dq0_PI")
+
+# ax2 = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+axs[1, 1].plot(t_test, i_d_PI, 'b', label='i_d')
+axs[1, 1].plot(t_test, i_q_PI, 'r', label='i_q')
+axs[1, 1].plot(t_test, i_0_PI, 'g', label='i_0')
+# plt.plot(t_test, v_sp_abc[0, :])
+axs[1, 1].grid()
+axs[1, 1].set_xlim(interval_list_x[i])
+axs[1, 1].set_ylim([-10, 14])
+# axs[1, i].set_xlabel("time")
+
+axs[1, 1].set_ylabel("i_dq0_PI")
+
+axs[2, 1].plot(t_test, i_d_DDPG, 'b')
+axs[2, 1].plot(t_test, i_q_DDPG, 'r')
+axs[2, 1].plot(t_test, i_0_DDPG, 'g')
+# plt.plot(t_test, v_sp_abc[0, :])
+axs[2, 1].grid()
+axs[2, 1].set_xlim(interval_list_x[i])
+axs[2, 1].set_ylim([-10, 14])
+axs[2, 1].set_xlabel("time")
+axs[2, 1].set_ylabel("i_dq0_DDPG")
+# axs[2, i].set_title(f'#{plt_count}')
+# plt.show()
+
+
+#######
+actionP0 = df_DDPG['ActionP0'][0]
+t_action = np.arange(0, round((len(actionP0)) * ts, 4), ts).tolist()
+
+axs[2, 2].plot(t_action, actionP0, 'b')
+axs[2, 2].plot(t_action, df_DDPG['ActionP1'][0], 'r')
+axs[2, 2].plot(t_action, df_DDPG['ActionP2'][0], 'g')
+axs[2, 2].grid()
+axs[2, 2].set_xlim(interval_list_x[i])
+axs[2, 2].set_ylabel("action_P_012_DDPG")
+
+"""
+axs[1, 2].plot(t_action, df['ActionI0'][0], 'b')
+axs[1, 2].plot(t_action, df['ActionI1'][0], 'r')
+axs[1, 2].plot(t_action, df['ActionI2'][0], 'g')
+axs[1, 2].grid()
+axs[1, 2].set_xlim(interval_list_x[i])
+#axs[1, 2].set_ylim(interval_list_x[i])
+axs[1, 2].set_ylabel("action_I_012_DDPG")
+"""
+axs[0, 2].plot(t_action, [sum(x) for x in zip(df_DDPG['integrator_sum0'][0], actionP0)], 'b')
+axs[0, 2].plot(t_action, [sum(x) for x in zip(df_DDPG['integrator_sum1'][0], df_DDPG['ActionP1'][0])], 'r')
+axs[0, 2].plot(t_action, [sum(x) for x in zip(df_DDPG['integrator_sum2'][0], df_DDPG['ActionP2'][0])], 'g')
+axs[0, 2].grid()
+axs[0, 2].set_xlim(interval_list_x[i])
+axs[0, 2].set_ylim([-0.2, 0.8])
+axs[0, 2].set_ylabel("m_dq0_DDPG")
+
+axs[1, 2].plot(t_action, df_DDPG['integrator_sum0'][0], 'b')
+axs[1, 2].plot(t_action, df_DDPG['integrator_sum1'][0], 'r')
+axs[1, 2].plot(t_action, df_DDPG['integrator_sum2'][0], 'g')
+axs[1, 2].grid()
+axs[1, 2].set_xlim(interval_list_x[i])
+axs[1, 2].set_ylim([-0.2, 0.8])
+axs[1, 2].set_ylabel("Intergrator_sum_dq0_DDPG")
+"""
+# ax = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+axs[0, 2].plot(t_test, m_a_PI, 'b')
+axs[0, 2].plot(t_test, m_b_PI, 'r')
+axs[0, 2].plot(t_test, m_c_PI, 'g')
+axs[0, 2].grid()
+axs[0, 2].set_xlim(interval_list_x[i])
+axs[0, 2].set_ylabel("m_abc_PI")
+
+
+
+# ax2 = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+axs[1, 1].plot(t_test, i_d_PI, 'b', label='i_d')
+axs[1, 1].plot(t_test, i_q_PI, 'r', label='i_q')
+axs[1, 1].plot(t_test, i_0_PI, 'g', label='i_0')
+# plt.plot(t_test, v_sp_abc[0, :])
+axs[1, 1].grid()
+axs[1, 1].set_xlim(interval_list_x[i])
+axs[1, 1].set_ylim(interval_list_y[i])
+# axs[1, i].set_xlabel("time")
+
+axs[1, 1].set_ylabel("i_dq0_PI")
+
+
+axs[2, 1].plot(t_test, i_d_DDPG, 'b')
+axs[2, 1].plot(t_test, i_q_DDPG, 'r')
+axs[2, 1].plot(t_test, i_0_DDPG, 'g')
+# plt.plot(t_test, v_sp_abc[0, :])
+axs[2, 1].grid()
+axs[2, 1].set_xlim(interval_list_x[i])
+axs[2, 1].set_ylim(interval_list_y[i])
+axs[2, 1].set_xlabel("time")
+axs[2, 1].set_ylabel("i_dq0_DDPG")
+# axs[2, i].set_title(f'#{plt_count}')
+# plt.show()
+
+
+
+"""
+
+#fig.savefig(f'{folder_name}/overview.pdf')
+
+fig.subplots_adjust(wspace=0.4, hspace=0.2)
+plt.show()
+
+if interval_plt:
+    ############## Subplots
+    # fig = plt.figure(figsize=(10,12))  # a new figure window
+    fig, axs = plt.subplots(4, len(interval_list_y), figsize=(16, 12))  # , sharex=True)  # a new figure window
+    fig.suptitle(f'DDPG-return(MRE): {return_DDPG} \n '
+                 f'DDPG-return(MRE) using past observations: {return_DDPG_pastVals}  \n'
+                 f'  PI-return(MRE): {return_PR} \n '
+                 f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', fontsize=14)
+
+    plt_count = 1
+
+    for i in range(len(interval_list_y)):
+
+        # ax = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+        axs[0, i].plot(t_test, R_load)
+        axs[0, i].grid()
+        axs[0, i].set_xlim(interval_list_x[i])
+        # axs[0, i].set_xlabel("time")
+        if i == 0:
+            axs[0, i].set_ylabel("R_load")
+        # axs[0, i].set_title(f'#{plt_count}')
+        # plt.show()
+        plt_count += 1
+
+        # ax2 = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+        axs[1, i].plot(t_test, v_d_PI, 'b', label='v_d')
+        axs[1, i].plot(t_test, v_q_PI, 'r', label='v_q')
+        axs[1, i].plot(t_test, v_0_PI, 'g', label='v_0')
+        # plt.plot(t_test, v_sp_abc[0, :])
+        axs[1, i].grid()
+        axs[1, i].set_xlim(interval_list_x[i])
+        axs[1, i].set_ylim(interval_list_y[i])
+        # axs[1, i].set_xlabel("time")
+        if i == 0:
+            axs[1, i].set_ylabel("v_dq0_PI")
+        # axs[1, i].set_title(f'#{plt_count}')
+        # plt.show()
+        plt_count += 1
+
+        # ax3 = fig.add_subplot(3, len(interval_list_y), plt_count)  # a new axes
+        axs[2, i].plot(t_test, v_d_DDPG, 'b')
+        axs[2, i].plot(t_test, v_q_DDPG, 'r')
+        axs[2, i].plot(t_test, v_0_DDPG, 'g')
+        # plt.plot(t_test, v_sp_abc[0, :])
+        axs[2, i].grid()
+        axs[2, i].set_xlim(interval_list_x[i])
+        axs[2, i].set_ylim(interval_list_y[i])
+        axs[2, i].set_xlabel("time")
+        if i == 0:
+            axs[2, i].set_ylabel("v_dq0_DDPG i_load_feature")
+        # axs[2, i].set_title(f'#{plt_count}')
+        # plt.show()
+        plt_count += 1
+
+        axs[3, i].plot(t_test, v_d_DDPG_pastVals, 'b')
+        axs[3, i].plot(t_test, v_q_DDPG_pastVals, 'r')
+        axs[3, i].plot(t_test, v_0_DDPG_pastVals, 'g')
+        axs[3, i].grid()
+        axs[3, i].set_xlim(interval_list_x[i])
+        axs[3, i].set_ylim(interval_list_y[i])
+        axs[3, i].set_xlabel("time")
+        if i == 0:
+            axs[3, i].set_ylabel("v_dq0_DDPG_pastVals")
+        # axs[2, i].set_title(f'#{plt_count}')
+        # plt.show()
+        plt_count += 1
+
+    fig.subplots_adjust(wspace=0.2, hspace=0.2)
+    plt.show()
+
+    fig.savefig(f'{folder_name}/Ausschnitt2.pdf')
+
+if make_pyplot:
+    # pyplot PI
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_d_PI))
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_q_PI))
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_0_PI))
+    # plot.add_trace(
+    #    px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
+
+    # pyplot ddpg
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_d_DDPG))
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_q_DDPG))
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_0_DDPG))
+    # plot.add_trace(
+    #    px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
+
+    # pyplot Load
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_test, y=R_load))
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
diff --git a/experiments/hp_tune/visualize_tests/old/CompareModelsPlottingLoop.py b/experiments/hp_tune/visualize_tests/old/CompareModelsPlottingLoop.py
new file mode 100644
index 00000000..db0a9336
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/old/CompareModelsPlottingLoop.py
@@ -0,0 +1,179 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+make_pyplot = True
+show_load = True
+interval_plt = True
+
+# interval_list_x = [[0.992, 1], [0.992, 1]]
+# interval_list_y = [[150, 230], [-10, 10]]
+interval_list_x = [[0, 0.02], [0, 0.02]]
+interval_list_y = [[-10, 310], [-10, 40]]
+folder_name = 'saves/Comparison_study_22_best_pastVal_HPO_oldtestEnv'
+
+df = pd.read_pickle(folder_name + '/PI_20000steps')
+
+env_hist_PI = df['env_hist_PI']
+v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist()
+v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist()
+v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist()
+R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist())
+phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+v_d_PI = (v_dq0_PI[0].tolist())
+v_q_PI = (v_dq0_PI[1].tolist())
+v_0_PI = (v_dq0_PI[2].tolist())
+
+return_PI = df['Return PI'][0]
+kp_c = df['PI_Kp_c'][0]
+ki_c = df['PI_Ki_c'][0]
+kp_v = df['PI_Kp_v'][0]
+ki_v = df['PI_Ki_v'][0]
+
+model_names = [
+    'model_2_pastVals.zip']  # ['model_0_pastVals.zip','model_2_pastVals.zip', 'model_5_pastVals.zip', 'model_10_pastVals.zip', 'model_16_pastVals.zip', 'model_25_pastVals.zip', ]  # , 'model_noPastVals.zip']
+pastVals = ['2']  # ['0', '2', '5', '10', '16', '25']
+reward_list_DDPG = []
+
+ts = 1e-4  # if ts stored: take from db
+
+# t_test_R = np.arange(ts, (len(testcase_100k['v_d_PI'])) * ts, ts).tolist()
+
+t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist()
+
+# fig, axs = plt.subplots(len(model_names)+2, len(interval_list_y), figsize=(16, 12))  # , sharex=True)  # a new figure window
+fig, axs = plt.subplots(len(model_names) + 2, len(interval_list_y),
+                        figsize=(12, 10))  # , sharex=True)  # a new figure window
+
+for i in range(len(interval_list_y)):
+    plt_count = 2
+    ############## Subplots
+    # fig = plt.figure(figsize=(10,12))  # a new figure window
+
+    for model_name, pV in zip(model_names, pastVals):
+
+        df_DDPG = pd.read_pickle(folder_name + '/' + model_name + '_20000steps')
+
+        if i == 0:
+            reward_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 4))
+
+        env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+        v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+        v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+        v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+        phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+        v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+        v_d_DDPG = (v_dq0[0].tolist())
+        v_q_DDPG = (v_dq0[1].tolist())
+        v_0_DDPG = (v_dq0[2].tolist())
+
+        axs[0, i].plot(t_test, R_load_PI)
+        axs[0, i].grid()
+        axs[0, i].set_xlim(interval_list_x[i])
+        # axs[0, i].set_ylim([15, 75])
+        # if i == 0:
+        axs[0, i].set_ylabel("$R_{\mathrm{load}}\,/\,\mathrm{\Omega}$")
+        # ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+
+        axs[1, i].plot(t_test, v_d_PI, 'b', label='v_d')
+        axs[1, i].plot(t_test, v_q_PI, 'r', label='v_q')
+        axs[1, i].plot(t_test, v_0_PI, 'g', label='v_0')
+        axs[1, i].grid()
+        axs[1, i].set_xlim(interval_list_x[i])
+        axs[1, i].set_ylim(interval_list_y[i])
+        if i == 0:
+            axs[1, i].set_ylabel("$v_{\mathrm{dq0, PI}}\,/\,\mathrm{V}$")
+        else:
+            axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$")
+
+        axs[plt_count, i].plot(t_test, v_d_DDPG, 'b')
+        axs[plt_count, i].plot(t_test, v_q_DDPG, 'r')
+        axs[plt_count, i].plot(t_test, v_0_DDPG, 'g')
+        axs[plt_count, i].grid()
+        axs[plt_count, i].set_xlim(interval_list_x[i])
+        axs[plt_count, i].set_ylim(interval_list_y[i])
+        axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$')
+        if i == 0:
+            # axs[plt_count, i].set_ylabel(pV)
+            axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$")
+        else:
+            axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$")
+        plt_count += 1
+
+
+fig.suptitle(f'Model using pastVals:' +str(pastVals)+' \n '
+                 f'Model-return(MRE)'+ str(reward_list_DDPG) +' \n'
+                 f'  PI-return(MRE): {return_PI} \n '
+                 f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', fontsize=14)
+
+fig.subplots_adjust(wspace=0.2, hspace=0.2)
+plt.show()
+
+#fig.savefig(f'{folder_name}/Ausschnitt_2pV_blackstart.pdf')
+
+if make_pyplot:
+    # pyplot Load
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_test, y=R_load_PI))  # , title='R_load')
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
+
+    # pyplot PI
+    plot = px.Figure()
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_d_PI))
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_q_PI))
+    plot.add_trace(
+        px.Scatter(x=t_test, y=v_0_PI))
+    # plot.add_trace(
+    #    px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+
+    plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+        dict(count=1, step="day", stepmode="backward"), ])),
+        rangeslider=dict(visible=True), ))
+    plot.show()
+
+    for model_name in model_names:
+        df_DDPG = pd.read_pickle(folder_name + '/' + model_name + '_20000steps')
+
+        env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+        v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+        v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+        v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+        phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+        v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+        v_d_DDPG = (v_dq0[0].tolist())
+        v_q_DDPG = (v_dq0[1].tolist())
+        v_0_DDPG = (v_dq0[2].tolist())
+        # pyplot ddpg
+        plot = px.Figure()
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_d_DDPG))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_q_DDPG))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_0_DDPG))
+        # plot.add_trace(
+        #    px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_d_PI))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_q_PI))
+        plot.add_trace(
+            px.Scatter(x=t_test, y=v_0_PI))
+
+        plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([
+            dict(count=1, step="day", stepmode="backward"), ])),
+            rangeslider=dict(visible=True), ))
+        plot.show()
diff --git a/experiments/hp_tune/visualize_tests/paper_lpt_single.py b/experiments/hp_tune/visualize_tests/paper_lpt_single.py
new file mode 100644
index 00000000..8fd1cb9e
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/paper_lpt_single.py
@@ -0,0 +1,487 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from openmodelica_microgrid_gym.util import abc_to_dq0
+
+save_results = True
+
+# Fuer den 10s Fall
+interval_list_x = [7.1465, 7.1505]
+interval_list_y = [80, 345]
+
+folder_name = 'saves/paper_new2'  # _deterministic'
+
+number_of_steps = '_100000steps'
+
+df = pd.read_pickle(folder_name + '/PI' + number_of_steps)
+
+env_hist_PI = df['env_hist_PI']
+v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist()
+v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist()
+v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist()
+R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist())
+phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI)
+v_d_PI = (v_dq0_PI[0].tolist())
+v_q_PI = (v_dq0_PI[1].tolist())
+v_0_PI = (v_dq0_PI[2].tolist())
+
+i_a_PI = env_hist_PI[0]['lc.inductor1.i'].tolist()
+i_b_PI = env_hist_PI[0]['lc.inductor2.i'].tolist()
+i_c_PI = env_hist_PI[0]['lc.inductor3.i'].tolist()
+i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI)
+i_d_PI = (i_dq0_PI[0].tolist())
+i_q_PI = (i_dq0_PI[1].tolist())
+i_0_PI = (i_dq0_PI[2].tolist())
+
+reward_PI = df['Reward PI'][0]
+return_PI = df['Return PI'][0]
+kp_c = df['PI_Kp_c'][0]
+ki_c = df['PI_Ki_c'][0]
+kp_v = df['PI_Kp_v'][0]
+ki_v = df['PI_Ki_v'][0]
+
+model_names = ['model_OMG_DDPG_Actor.zip',
+               'model_OMG_DDPG_Integrator_no_pastVals.zip',
+               'model_OMG_DDPG_Integrator_no_pastVals_corr.zip',
+               'model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip']
+ylabels = ['DDPG', 'DDPG-I']
+
+return_list_DDPG = []
+reward_list_DDPG = []
+
+ts = 1e-4  # if ts stored: take from db
+
+v_d_ref = [169.7] * len(v_0_PI)
+
+t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist()
+t_reward = np.arange(0, round((len(reward_PI)) * ts, 4), ts).tolist()
+
+# fig, axs = plt.subplots(len(model_names) + 4, len(interval_list_y),
+fig = plt.figure()
+
+############## Subplots
+# fig = plt.figure(figsize=(10,12))  # a new figure window
+
+df_DDPG = pd.read_pickle(folder_name + '/' + model_names[0] + number_of_steps)
+
+return_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 7))
+#    reward_list_DDPG.append(df_DDPG['Reward DDPG'][0])
+
+env_hist_DDPG = df_DDPG['env_hist_DDPG']
+
+v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist()
+v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist()
+v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist()
+i_a = env_hist_DDPG[0]['lc.inductor1.i'].tolist()
+i_b = env_hist_DDPG[0]['lc.inductor2.i'].tolist()
+i_c = env_hist_DDPG[0]['lc.inductor3.i'].tolist()
+phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase)
+i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase)
+v_d_DDPG = (v_dq0[0].tolist())
+v_q_DDPG = (v_dq0[1].tolist())
+v_0_DDPG = (v_dq0[2].tolist())
+i_d_DDPG = (i_dq0[0].tolist())
+i_q_DDPG = (i_dq0[1].tolist())
+i_0_DDPG = (i_dq0[2].tolist())
+
+DDPG_reward = df_DDPG['Reward DDPG'][0]
+
+df_DDPG_I = pd.read_pickle(folder_name + '/' + model_names[1] + number_of_steps)
+
+return_list_DDPG.append(round(df_DDPG_I['Return DDPG'][0], 7))
+#    reward_list_DDPG.append(df_DDPG['Reward DDPG'][0])
+
+env_hist_DDPG_I = df_DDPG_I['env_hist_DDPG']
+
+v_a_I = env_hist_DDPG_I[0]['lc.capacitor1.v'].tolist()
+v_b_I = env_hist_DDPG_I[0]['lc.capacitor2.v'].tolist()
+v_c_I = env_hist_DDPG_I[0]['lc.capacitor3.v'].tolist()
+i_a_I = env_hist_DDPG_I[0]['lc.inductor1.i'].tolist()
+i_b_I = env_hist_DDPG_I[0]['lc.inductor2.i'].tolist()
+i_c_I = env_hist_DDPG_I[0]['lc.inductor3.i'].tolist()
+phase_I = env_hist_DDPG_I[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_I = abc_to_dq0(np.array([v_a_I, v_b_I, v_c_I]), phase_I)
+i_dq0_I = abc_to_dq0(np.array([i_a_I, i_b_I, i_c_I]), phase_I)
+v_d_DDPG_I = (v_dq0_I[0].tolist())
+v_q_DDPG_I = (v_dq0_I[1].tolist())
+v_0_DDPG_I = (v_dq0_I[2].tolist())
+i_d_DDPG_I = (i_dq0_I[0].tolist())
+i_q_DDPG_I = (i_dq0_I[1].tolist())
+i_0_DDPG_I = (i_dq0_I[2].tolist())
+
+DDPG_reward_I = df_DDPG_I['Reward DDPG'][0]
+
+df_DDPG_I = pd.read_pickle(folder_name + '/' + model_names[2] + number_of_steps)
+
+return_list_DDPG.append(round(df_DDPG_I['Return DDPG'][0], 7))
+#    reward_list_DDPG.append(df_DDPG['Reward DDPG'][0])
+
+env_hist_DDPG_I = df_DDPG_I['env_hist_DDPG']
+
+v_a_I_noPV = env_hist_DDPG_I[0]['lc.capacitor1.v'].tolist()
+v_b_I_noPV = env_hist_DDPG_I[0]['lc.capacitor2.v'].tolist()
+v_c_I_noPV = env_hist_DDPG_I[0]['lc.capacitor3.v'].tolist()
+i_a_I_noPV = env_hist_DDPG_I[0]['lc.inductor1.i'].tolist()
+i_b_I_noPV = env_hist_DDPG_I[0]['lc.inductor2.i'].tolist()
+i_c_I_noPV = env_hist_DDPG_I[0]['lc.inductor3.i'].tolist()
+phase_I_noPV = env_hist_DDPG_I[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_I_noPV = abc_to_dq0(np.array([v_a_I_noPV, v_b_I_noPV, v_c_I_noPV]), phase_I_noPV)
+i_dq0_I_noPV = abc_to_dq0(np.array([i_a_I_noPV, i_b_I_noPV, i_c_I_noPV]), phase_I_noPV)
+v_d_DDPG_I_noPV = (v_dq0_I_noPV[0].tolist())
+v_q_DDPG_I_noPV = (v_dq0_I_noPV[1].tolist())
+v_0_DDPG_I_noPV = (v_dq0_I_noPV[2].tolist())
+i_d_DDPG_I_noPV = (i_dq0_I_noPV[0].tolist())
+i_q_DDPG_I_noPV = (i_dq0_I_noPV[1].tolist())
+i_0_DDPG_I_noPV = (i_dq0_I_noPV[2].tolist())
+
+DDPG_reward_I_noPV = df_DDPG_I['Reward DDPG'][0]
+
+df_DDPG_I = pd.read_pickle(folder_name + '/' + model_names[3] + number_of_steps)
+
+return_list_DDPG.append(round(df_DDPG_I['Return DDPG'][0], 7))
+#    reward_list_DDPG.append(df_DDPG['Reward DDPG'][0])
+
+env_hist_DDPG_I = df_DDPG_I['env_hist_DDPG']
+
+v_a_I_load = env_hist_DDPG_I[0]['lc.capacitor1.v'].tolist()
+v_b_I_load = env_hist_DDPG_I[0]['lc.capacitor2.v'].tolist()
+v_c_I_load = env_hist_DDPG_I[0]['lc.capacitor3.v'].tolist()
+i_a_I_load = env_hist_DDPG_I[0]['lc.inductor1.i'].tolist()
+i_b_I_load = env_hist_DDPG_I[0]['lc.inductor2.i'].tolist()
+i_c_I_load = env_hist_DDPG_I[0]['lc.inductor3.i'].tolist()
+phase_I_load = env_hist_DDPG_I[0]['inverter1.phase.0'].tolist()  # env_test.env.net.components[0].phase
+v_dq0_I_load = abc_to_dq0(np.array([v_a_I_load, v_b_I_load, v_c_I_load]), phase_I_load)
+i_dq0_I_load = abc_to_dq0(np.array([i_a_I_load, i_b_I_load, i_c_I_load]), phase_I_load)
+v_d_DDPG_I_load = (v_dq0_I_load[0].tolist())
+v_q_DDPG_I_load = (v_dq0_I_load[1].tolist())
+v_0_DDPG_I_load = (v_dq0_I_load[2].tolist())
+i_d_DDPG_I_load = (i_dq0_I_load[0].tolist())
+i_q_DDPG_I_load = (i_dq0_I_load[1].tolist())
+i_0_DDPG_I_load = (i_dq0_I_load[2].tolist())
+
+DDPG_reward_I_load = df_DDPG_I['Reward DDPG'][0]
+
+"""
+if save_results:
+    # Plot setting
+    
+
+fig, axs = plt.subplots(3, 1)
+axs[0].plot(t_test, R_load_PI, 'g')
+axs[0].grid()
+axs[0].tick_params(axis='x', colors='w')
+axs[0].set_xlim([0, 10])
+axs[0].set_ylabel('$R_\mathrm{load}\,/\,\mathrm{\Omega}$')
+# axs[0].setxlabel(r'$t\,/\,\mathrm{s}$')
+
+axs[1].plot(t_test, v_d_PI, 'b', label='PI')
+axs[1].plot(t_test, v_q_PI, 'r')
+axs[1].plot(t_test, v_0_PI, 'g')
+axs[1].grid()
+axs[1].legend()
+axs[1].tick_params(axis='x', colors='w')
+axs[1].set_xlim([0, 10])
+axs[1].set_ylabel('$v_{\mathrm{dq0}}\,/\,\mathrm{V}$')
+# axs[1].setxlabel(r'$t\,/\,\mathrm{s}$')
+
+axs[2].plot(t_test, v_d_DDPG_I, 'b', label='$\mathrm{DDPG}_\mathrm{I,pv}$')
+axs[2].plot(t_test, v_q_DDPG_I, 'r')
+axs[2].plot(t_test, v_0_DDPG_I, 'g')
+axs[2].grid()
+axs[2].legend()
+axs[2].set_xlim([0, 10])
+axs[2].set_ylabel('$v_{\mathrm{dq0}}\,/\,\mathrm{V}$')
+axs[2].set_xlabel(r'$t\,/\,\mathrm{s}$')
+
+plt.show()
+
+if save_results:
+    fig.savefig(f'{folder_name}/OMG_testcase.png')
+    fig.savefig(f'{folder_name}/OMG_testcase.pdf')
+    fig.savefig(f'{folder_name}/OMG_testcase.pgf')
+"""
+if save_results:
+    # Plot setting
+    params = {'backend': 'ps',
+              'text.latex.preamble': [r'\usepackage{gensymb}'
+                                      r'\usepackage{amsmath,amssymb,mathtools}'
+                                      r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                      r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+              'axes.labelsize': 13,  # fontsize for x and y labels (was 10)
+              'axes.titlesize': 13,
+              'font.size': 13,  # was 10
+              'legend.fontsize': 13,  # was 10
+              'xtick.labelsize': 13,
+              'ytick.labelsize': 13,
+              'text.usetex': True,
+              'figure.figsize': [8.5, 2.4],  # [3.9, 3.1],
+              'font.family': 'serif',
+              'lines.linewidth': 1
+              }
+    matplotlib.rcParams.update(params)
+
+    params = {'backend': 'ps',
+              'text.latex.preamble': [r'\usepackage{gensymb}'
+                                      r'\usepackage{amsmath,amssymb,mathtools}'
+                                      r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                      r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+              'axes.labelsize': 12,  # fontsize for x and y labels (was 10)
+              'axes.titlesize': 12,
+              'font.size': 12,  # was 10
+              'legend.fontsize': 12,  # was 10
+              'xtick.labelsize': 12,
+              'ytick.labelsize': 12,
+              'text.usetex': True,
+              'figure.figsize': [5.3, 3.5],  # [3.9, 3.1],
+              'font.family': 'serif',
+              'lines.linewidth': 1
+              }
+    matplotlib.rcParams.update(params)
+
+fig = plt.figure()  # figsize =(6, 5))
+plt.plot(t_test, R_load_PI, 'g')
+plt.grid()
+plt.xlim([0, 10])
+plt.ylabel('$R_\mathrm{load}\,/\,\mathrm{\Omega}$')
+plt.xlabel(r'$t\,/\,\mathrm{s}$')
+plt.tick_params(direction='in')
+plt.show()
+
+if save_results:
+    fig.savefig(f'{folder_name}/OMG_R_loadsmall.png')
+    fig.savefig(f'{folder_name}/OMG_R_loadsmall.pdf')
+    fig.savefig(f'{folder_name}/OMG_R_loadsmall.pgf')
+
+    # Plot setting
+    params = {'backend': 'ps',
+              'text.latex.preamble': [r'\usepackage{gensymb}'
+                                      r'\usepackage{amsmath,amssymb,mathtools}'
+                                      r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                      r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+              'axes.labelsize': 10,  # fontsize for x and y labels (was 10)
+              'axes.titlesize': 10,
+              'font.size': 10,  # was 10
+              'legend.fontsize': 10,  # was 10
+              'xtick.labelsize': 10,
+              'ytick.labelsize': 10,
+              'text.usetex': True,
+              'figure.figsize': [4.5, 6],  # [3.9, 3.1],
+              'font.family': 'serif',
+              'lines.linewidth': 1
+              }
+    matplotlib.rcParams.update(params)
+
+fig, axs = plt.subplots(5, 1)
+axs[0].plot(t_test, v_d_PI, 'b', label='PI')
+axs[0].plot(t_test, v_d_ref, '--', color='gray')
+axs[0].grid()
+axs[0].legend()
+axs[0].set_xlim(interval_list_x)
+axs[0].tick_params(axis='x', colors='w')
+axs[0].set_ylim(interval_list_y)
+# axs[0].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$")
+
+axs[1].plot(t_test, v_d_DDPG, 'b', label='$\mathrm{DDPG}$')
+axs[1].plot(t_test, v_d_ref, '--', color='gray')
+axs[1].grid()
+axs[1].legend()
+axs[1].set_xlim(interval_list_x)
+axs[1].tick_params(axis='x', colors='w')
+axs[1].set_ylim(interval_list_y)
+# axs[1].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$")
+
+axs[4].plot(t_test, v_d_DDPG_I, 'b', label='$\mathrm{DDPG}_\mathrm{I,pv}$')
+axs[4].plot(t_test, v_d_ref, '--', color='gray')
+axs[4].grid()
+axs[4].legend()
+axs[4].set_xlim(interval_list_x)
+axs[4].set_ylim(interval_list_y)
+# axs[4].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$")
+axs[4].set_xlabel(r'$t\,/\,\mathrm{s}$')
+
+axs[3].plot(t_test, v_d_DDPG_I_load, 'b', label='$\mathrm{DDPG}_\mathrm{I,i_{load}}$')
+axs[3].plot(t_test, v_d_ref, '--', color='gray')
+axs[3].grid()
+axs[3].legend()
+axs[3].set_xlim(interval_list_x)
+axs[3].tick_params(axis='x', colors='w')
+# axs[3].set_xticks(color='w')
+axs[3].set_ylim(interval_list_y)
+# axs[3].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$")
+
+axs[2].plot(t_test, v_d_DDPG_I_noPV, 'b', label='$\mathrm{DDPG}_\mathrm{I}$')
+axs[2].plot(t_test, v_d_ref, '--', color='gray')
+axs[2].grid()
+axs[2].legend()
+axs[2].set_xlim(interval_list_x)
+axs[2].tick_params(axis='x', colors='w')
+axs[2].set_ylim(interval_list_y)
+axs[2].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$")
+
+plt.show()
+
+if save_results:
+    fig.savefig(f'{folder_name}/OMG_v_d_compare.png')
+    fig.savefig(f'{folder_name}/OMG_v_d_compare.pdf')
+    fig.savefig(f'{folder_name}/OMG_v_d_compare.pgf')
+
+    # Plot setting
+    params = {'backend': 'ps',
+              'text.latex.preamble': [r'\usepackage{gensymb}'
+                                      r'\usepackage{amsmath,amssymb,mathtools}'
+                                      r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                      r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+              'axes.labelsize': 10,  # fontsize for x and y labels (was 10)
+              'axes.titlesize': 10,
+              'font.size': 10,  # was 10
+              'legend.fontsize': 10,  # was 10
+              'xtick.labelsize': 10,
+              'ytick.labelsize': 10,
+              'text.usetex': True,
+              'figure.figsize': [4.5, 6],  # [3.9, 3.1],
+              'font.family': 'serif',
+              'lines.linewidth': 1
+              }
+    matplotlib.rcParams.update(params)
+
+fig, axs = plt.subplots(2, 1)
+axs[0].plot(t_test, v_d_DDPG_I, 'b', label='$\mathrm{SEC-DDPG}$')
+axs[0].plot(t_test, v_q_DDPG_I, 'r')
+axs[0].plot(t_test, v_0_DDPG_I, 'g')
+axs[0].plot(t_test, v_d_PI, '--b', label='PI')
+axs[0].plot(t_test, v_q_PI, '--r')
+axs[0].plot(t_test, v_0_PI, '--g')
+axs[0].plot(t_test, v_d_ref, '--', color='gray')
+axs[0].grid()
+axs[0].legend()
+axs[0].set_xlim(interval_list_x)
+# axs[0].set_ylim(interval_list_y)
+# axs[0].set_xlabel(r'$t\,/\,\mathrm{s}$')
+axs[0].tick_params(axis='x', colors='w')
+axs[0].set_ylabel("$v_{\mathrm{dq0}}\,/\,\mathrm{V}$")
+
+axs[1].plot(t_test, i_d_DDPG_I, 'b', label='$i_\mathrm{d}$')
+axs[1].plot(t_test, i_q_DDPG_I, 'r', label='$i_\mathrm{q}$')
+axs[1].plot(t_test, i_0_DDPG_I, 'g', label='$i_\mathrm{0}$')
+axs[1].plot(t_test, i_d_PI, '--b')
+axs[1].plot(t_test, i_q_PI, '--r')
+axs[1].plot(t_test, i_0_PI, '--g')
+axs[1].grid()
+axs[1].set_xlim(interval_list_x)
+# axs[1].set_ylim(interval_list_y)
+axs[1].set_xlabel(r'$t\,/\,\mathrm{s}$')
+axs[1].set_ylabel("$i_{\mathrm{dq0}}\,/\,\mathrm{A}$")
+plt.show()
+
+if save_results:
+    fig.savefig(f'{folder_name}/OMG_DDPGpv_PI_compare.png')
+    fig.savefig(f'{folder_name}/OMG_DDPGpv_PI_compare.pdf')
+    fig.savefig(f'{folder_name}/OMG_DDPGpv_PI_compare.pgf')
+
+plt.plot(t_reward, reward_PI, 'b', label=f'          PI: '
+                                         f'{round(sum(reward_PI[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}')
+plt.plot(t_reward, DDPG_reward, 'r', label=f'    DDPG: '
+                                           f'{round(sum(DDPG_reward[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}')
+plt.plot(t_reward, DDPG_reward_I, 'g', label=f'DDPG+I,pv: '
+                                             f'{round(sum(DDPG_reward_I[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}')
+plt.plot(t_reward, DDPG_reward_I_noPV, 'g', label=f'DDPG+I: '
+                                                  f'{round(sum(DDPG_reward_I_noPV[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}')
+plt.plot(t_reward, DDPG_reward_I_load, 'g', label=f'DDPG+I,iLoad: '
+                                                  f'{round(sum(DDPG_reward_I_load[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}')
+
+plt.grid()
+plt.xlim(interval_list_x)
+# axs[1, i].set_ylim(interval_list_y[i])
+plt.legend()
+plt.xlabel(r'$t\,/\,\mathrm{s}$')
+
+plt.ylabel("Reward")
+plt.show()
+
+"""
+plt.plot(t_test, v_d_DDPG_I_noPV, 'b', label='$v_\mathrm{d}')
+plt.plot(t_test, v_q_DDPG_I_noPV, 'b', label='$v_\mathrm{q}')
+plt.plot(t_test, v_0_DDPG_I_noPV, 'b', label='$v_\mathrm{0}')
+plt.grid()
+plt.xlim(interval_list_x)
+plt.ylim(interval_list_y)
+plt.xlabel(r'$t\,/\,\mathrm{s}$')
+plt.ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$")
+plt.show()
+
+
+plt.plot(t_test, i_d_DDPG_I_noPV, 'b', label='$v_\mathrm{d}')
+plt.plot(t_test, i_q_DDPG_I_noPV, 'b', label='$v_\mathrm{q}')
+plt.plot(t_test, i_0_DDPG_I_noPV, 'b', label='$v_\mathrm{0}')
+plt.grid()
+plt.xlim(interval_list_x)
+plt.ylim(interval_list_y)
+plt.xlabel(r'$t\,/\,\mathrm{s}$')
+plt.ylabel("$i_{\mathrm{dq0, DDPG}}\,/\,\mathrm{A}$")
+plt.show()
+
+
+
+fig = plt.figure()
+plt.plot(t_test, i_d_PI, 'b', label='PI')
+plt.plot(t_test, i_d_DDPG, 'r', label='DDPG')
+plt.plot(t_test, i_d_DDPG_I, 'g', label='DDPG+I,pv')
+plt.plot(t_test, i_d_DDPG_I_load, 'm', label='DDPG+I,iLoad')
+plt.plot(t_test, i_d_DDPG_I_noPV, 'c', label='DDPG+I')
+# axs[1, i].plot(t_test, i_q_PI, 'r', label='v_q')
+# axs[1, i].plot(t_test, i_0_PI, 'g', label='v_0')
+plt.grid()
+# axs[1, i].legend()
+plt.xlim(interval_list_x)
+# axs[3, i].set_ylim(interval_list_y[i])
+plt.ylabel("$i_{\mathrm{d}}\,/\,\mathrm{A}$")
+
+fig = plt.figure()
+plt.plot(t_test, v_d_ref, '--', color='gray')
+plt.plot(t_test, v_d_PI, 'b', label='PI')
+plt.plot(t_test, v_q_PI, 'b', label='PI')
+plt.plot(t_test, v_0_PI, 'b', label='PI')
+plt.plot(t_test, v_d_DDPG, 'r', label='DDPG')
+plt.plot(t_test, v_q_DDPG, 'r', label='DDPG')
+plt.plot(t_test, v_0_DDPG, 'r', label='DDPG')
+plt.plot(t_test, v_d_DDPG_I, 'g', label='DDPG+I')
+plt.plot(t_test, v_q_DDPG_I, 'g', label='DDPG+I')
+plt.plot(t_test, v_0_DDPG_I, 'g', label='DDPG+I')
+# axs[2, i].plot(t_test, v_q_PI, 'r', label='v_q')
+# axs[2, i].plot(t_test, v_0_PI, 'g', label='v_0')
+plt.grid()
+plt.legend()
+plt.xlim(interval_list_x)
+plt.ylim(interval_list_y)
+plt.ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$")
+# else:
+#    axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$")
+
+fig = plt.figure()
+plt.plot(t_test, i_d_PI, 'b', label='PI')
+plt.plot(t_test, i_q_PI, 'b')
+plt.plot(t_test, i_0_PI, 'b')
+plt.plot(t_test, i_d_DDPG, 'r', label='DDPG')
+plt.plot(t_test, i_q_DDPG, 'r')
+plt.plot(t_test, i_0_DDPG, 'r')
+plt.plot(t_test, i_d_DDPG_I, 'g', label='DDPG+I')
+plt.plot(t_test, i_q_DDPG_I, 'g')
+plt.plot(t_test, i_0_DDPG_I, 'g')
+# axs[1, i].plot(t_test, i_q_PI, 'r', label='v_q')
+# axs[1, i].plot(t_test, i_0_PI, 'g', label='v_0')
+plt.grid()
+# axs[1, i].legend()
+plt.xlim(interval_list_x)
+# axs[3, i].set_ylim(interval_list_y[i])
+plt.ylabel("$i_{\mathrm{d}}\,/\,\mathrm{A}$")
+           
+fig.subplots_adjust(wspace=0.2, hspace=0.2)
+plt.show()
+
+#fig.savefig(f'{folder_name}/Ausschnitt_2pV_q0.pdf')
+
+"""
diff --git a/experiments/hp_tune/visualize_tests/plt_errorbar.py b/experiments/hp_tune/visualize_tests/plt_errorbar.py
new file mode 100644
index 00000000..b317cbe4
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/plt_errorbar.py
@@ -0,0 +1,158 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+# Plot setting
+params = {'backend': 'ps',
+          'text.latex.preamble': [r'\usepackage{gensymb}'
+                                  r'\usepackage{amsmath,amssymb,mathtools}'
+                                  r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                  r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+          'axes.labelsize': 8,  # fontsize for x and y labels (was 10)
+          'axes.titlesize': 8,
+          'font.size': 10,  # was 10
+          'legend.fontsize': 10,  # was 10
+          'xtick.labelsize': 10,
+          'ytick.labelsize': 10,
+          'text.usetex': True,
+          'figure.figsize': [5.8, 3.8],  # [3.9, 3.1],
+          'font.family': 'serif',
+          'lines.linewidth': 1
+          }
+
+# I_term = pd.read_pickle('GEM_I_term_3mean_over_50_agents.pkl')
+# no_I_term = pd.read_pickle('GEM_no_I_term_3mean_over_50_agents.pkl')
+
+asd = 1
+
+save_results = False
+folder_name = 'errorbar_plots/'
+
+# da json zu groß, kopie aus dashboard....
+
+OMG_DDPG_Actor = [-0.226037, -0.128363, -0.139432, -0.121386, -0.137367,
+                  -0.216827, -0.116579, -0.0831927, -0.112777, -0.127669,
+                  -0.185162, -0.128747, -0.113952, -0.122981, -0.114832,
+                  -0.120671, -0.226531, -0.118882, -0.134699, -0.118027,
+                  -0.149192, -0.121207, -0.253065, -0.219944, -0.1244,
+                  -0.0993589, -0.12237, -0.143523, -0.244333, -0.124357,
+                  -0.152193, -0.118973, -0.0955573, -0.114242, -0.111534,
+                  -0.127907, -0.102504, -0.225466, -0.219972, -0.120333,
+                  -0.134156, -0.116749, -0.122513, -0.167896, -0.062778,
+                  -0.239305, -0.110423, -0.103946, -0.160686, -0.127362]
+
+OMG_DDPG_Actor_500 = pd.read_pickle('OMG_DDPG_Actorreturn_500_agents.pkl')['return'].tolist()[
+                     :-1]  # einen zu viel geladen!
+
+# typo! das sind die mit 5 pastVals!
+OMG_DDPG_Integrator_no_pastVals = [-0.0566483, -0.177257, -0.22384, -0.0566379, -0.0613575,
+                                   -0.866927, -0.0591551, -0.0409672, -0.0410715, -0.0405743,
+                                   -0.0481607, -1.00176, -0.0398449, -0.0584291, -0.0428567,
+                                   -0.754902, -0.0499666, -0.346553, -0.0448563, -0.0424514,
+                                   -0.19927, -0.0424081, -0.0613121, -0.0501086, -0.287048,
+                                   -0.214733, -0.0421697, -0.0474572, -0.0464294, -0.0467267,
+                                   -0.0483718, -0.0584424, -0.354886, -0.0451979, -0.04627,
+                                   -0.047793, -0.0471481, -0.0846913, -0.0446951, -0.0500306,
+                                   -0.043155, -0.0718899, -0.039992, -0.0453119, -0.0673279,
+                                   -0.0408377, -0.047179, -0.0438636, -0.0430013, -0.0595805]
+
+OMG_DDPG_Integrator_no_pastVals_500 = \
+    pd.read_pickle('OMG_DDPG_Integrator_no_pastValsreturn_500_agents.pkl')['return'].tolist()
+
+OMG_DDPG_Integrator_no_pastVals_corr = [-0.048334, -0.251245, -0.0688722, -0.0565136, -0.202199,
+                                        -0.042535, -0.0408258, -0.0480982, -0.0423354, -0.0461098,
+                                        -0.543109, -0.0444726, -0.134507, -0.101061, -0.0410615,
+                                        -0.0423758, -0.0732737, -0.0531188, -0.0451057, -0.0557529,
+                                        -0.0516102, -0.272256, -0.0494411, -0.0453498, -0.049296,
+                                        -0.0524428, -0.0417263, -0.0453462, -0.0466777, -0.0772813,
+                                        -0.217484, -0.0407658, -0.0403833, -0.0795559, -0.0393357,
+                                        -0.0526313, -0.0443727, -0.0455981, -0.049839, -0.046536,
+                                        -0.0453199, -0.0421393, -0.0469275, -0.0441136, -0.0426031,
+                                        -0.162181, -0.0523912, -0.0403753, -0.0412137, -0.770299]
+
+OMG_DDPG_Integrator_no_pastVals_corr_500 = \
+    pd.read_pickle('OMG_DDPG_Integrator_no_pastVals_corrreturn_500_agents.pkl')['return'].tolist()[
+    :-1]  # einen zu viel geladen!
+
+OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr = [-0.0387997, -0.0409335, -0.0685522, -0.164238, -0.0409236,
+                                                       -0.0410673, -0.039469, -0.0399732, -0.13207, -0.0415697,
+                                                       -0.122869, -0.0611268, -0.306491, -0.0992046, -0.044661,
+                                                       -0.0458972, -0.043849, -0.0500543, -0.0531591, -0.0679286,
+                                                       -0.20993, -0.0497402, -0.0405819, -0.0746702, -0.203728,
+                                                       -0.0408563, -0.0708935, -0.0409779, -0.0438561, -0.0432274,
+                                                       -0.0395637, -0.0404426, -0.0377221, -0.0404959, -0.0465647,
+                                                       -0.0612425, -0.0409127, -0.0416884, -0.198034, -0.0523231,
+                                                       -0.2017, -0.0414555, -0.0422072, -0.0398287, -0.0400683,
+                                                       -0.0461625, -0.264055, -0.0453719, -0.0396692, -0.0411879]
+
+OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr_500 = \
+    pd.read_pickle('OMG_DDPG_Integrator_no_pastVals_i_load_feature_corrreturn_500_agents.pkl')['return'].tolist()
+# m = np.array(I_term['return_Mean'])
+# s = np.array(I_term['return_Std'])
+agents = np.arange(0, 550)
+agents = np.arange(0, 500)
+
+OMG_DDPG_Actor = OMG_DDPG_Actor + OMG_DDPG_Actor_500
+OMG_DDPG_Integrator_no_pastVals = OMG_DDPG_Integrator_no_pastVals + OMG_DDPG_Integrator_no_pastVals_500
+OMG_DDPG_Integrator_no_pastVals_corr = OMG_DDPG_Integrator_no_pastVals_corr + OMG_DDPG_Integrator_no_pastVals_corr_500
+OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr = OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr + \
+                                                      OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr_500
+
+OMG_DDPG_Actor = OMG_DDPG_Actor_500
+OMG_DDPG_Integrator_no_pastVals = OMG_DDPG_Integrator_no_pastVals_500
+OMG_DDPG_Integrator_no_pastVals_corr = OMG_DDPG_Integrator_no_pastVals_corr_500
+OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr = OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr_500
+
+plt.plot(agents, OMG_DDPG_Actor)
+plt.plot(agents, OMG_DDPG_Integrator_no_pastVals, 'r')
+plt.plot(agents, OMG_DDPG_Integrator_no_pastVals_corr, 'g')
+plt.plot(agents, OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, 'm')
+# plt.fill_between(agents, m - s, m + s, facecolor='r')
+plt.ylabel('Average return ')
+plt.xlabel('Agents')
+plt.ylim([-0.6, 0.2])
+plt.grid()
+plt.title('I_term')
+plt.show()
+
+if save_results:
+    matplotlib.rcParams.update(params)
+
+fig, ax = plt.subplots()  # figsize =(6, 5))
+# plt.boxplot((OMG_DDPG_Actor, OMG_DDPG_Integrator_no_pastVals_corr,
+#             OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, OMG_DDPG_Integrator_no_pastVals))
+ax.boxplot((OMG_DDPG_Integrator_no_pastVals, OMG_DDPG_Actor))
+# ax.plot( 3, 0.0332, marker='o' )
+plt.grid()
+plt.ylim([-0.4, 0])
+plt.xticks([1, 2], ['$\mathrm{SEC}$', '$\mathrm{DDPG}$'])
+plt.ylabel('$\overline{\sum{r_k}}$')
+plt.tick_params(direction='in')
+
+if save_results:
+    fig.savefig(f'{folder_name}/OMG_Errorbar_lim.pgf')
+    fig.savefig(f'{folder_name}/OMG_Errorbar_lim.png')
+    fig.savefig(f'{folder_name}/OMG_Errorbar_lim.pdf')
+
+fig = plt.figure()  # figsize =(6, 5))
+plt.boxplot((OMG_DDPG_Actor, OMG_DDPG_Integrator_no_pastVals_corr,
+             OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, OMG_DDPG_Integrator_no_pastVals))
+plt.grid()
+# plt.ylim([-0.75, 0])
+plt.xticks([1, 2, 3, 4], ['$\mathrm{DDPG}$', '$\mathrm{DDPG}_\mathrm{I}$',
+                          '$\mathrm{DDPG}_\mathrm{I,i_{load}}$', '$\mathrm{DDPG}_\mathrm{I,pv}$'])
+plt.ylabel('$\overline{\sum{r_k}}$')
+plt.show()
+if save_results:
+    fig.savefig(f'{folder_name}/OMG_Errorbar.png')
+    fig.savefig(f'{folder_name}/OMG_Errorbar.pdf')
+    fig.savefig(f'{folder_name}/OMG_Errorbar.pgf')
+
+plt.boxplot((OMG_DDPG_Actor, OMG_DDPG_Integrator_no_pastVals_corr,
+             OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, OMG_DDPG_Integrator_no_pastVals))
+plt.grid()
+plt.ylim([-0.06, 0])
+plt.xticks([1, 2, 3, 4], ['$\mathrm{DDPG}$', '$\mathrm{DDPG}_\mathrm{I}$',
+                          '$\mathrm{DDPG}_\mathrm{I,i_{load}}$', '$\mathrm{DDPG}_\mathrm{I,pv}$'])
+plt.show()
diff --git a/experiments/hp_tune/visualize_tests/plt_learningCurve.py b/experiments/hp_tune/visualize_tests/plt_learningCurve.py
new file mode 100644
index 00000000..85d400f7
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/plt_learningCurve.py
@@ -0,0 +1,180 @@
+import matplotlib
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+
+save_results = True
+folder_name = 'errorbar_plots/'
+
+# Plot setting
+params = {'backend': 'ps',
+          'text.latex.preamble': [r'\usepackage{gensymb}'
+                                  r'\usepackage{amsmath,amssymb,mathtools}'
+                                  r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                  r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+          'axes.labelsize': 12,  # fontsize for x and y labels (was 10)
+          'axes.titlesize': 12,
+          'font.size': 12,  # was 10
+          'legend.fontsize': 12,  # was 10
+          'xtick.labelsize': 12,
+          'ytick.labelsize': 12,
+          'text.usetex': True,
+          'figure.figsize': [5.5, 3.7],  # [3.9, 3.1],
+          'font.family': 'serif',
+          'lines.linewidth': 1
+          }
+
+"""
+I_term = pd.read_pickle('GEM_I_term_4_1250_agents_data_with_rewards.pkl')
+no_I_term = pd.read_pickle('GEM_no_I_term_4_1250_agents_data_with_rewards.pkl')
+
+asd = 1
+
+m = np.array(I_term['return_Mean'])
+s = np.array(I_term['return_Std'])
+agents = np.arange(0,1250)
+
+# take the best 50 and the worst 50 and and 450 random
+
+idxs = np.random.randint(low=50, high=1200, size=450)
+m_sort = np.sort(m)
+m550 = np.concatenate([m_sort[0:50],m_sort[1200:1250], np.take(m_sort, idxs)])
+"""
+
+# typo! das sind die mit 5 pastVals!
+OMG_DDPG_Integrator_no_pastVals = [-0.0566483, -0.177257, -0.22384, -0.0566379, -0.0613575,
+                                   -0.866927, -0.0591551, -0.0409672, -0.0410715, -0.0405743,
+                                   -0.0481607, -1.00176, -0.0398449, -0.0584291, -0.0428567,
+                                   -0.754902, -0.0499666, -0.346553, -0.0448563, -0.0424514,
+                                   -0.19927, -0.0424081, -0.0613121, -0.0501086, -0.287048,
+                                   -0.214733, -0.0421697, -0.0474572, -0.0464294, -0.0467267,
+                                   -0.0483718, -0.0584424, -0.354886, -0.0451979, -0.04627,
+                                   -0.047793, -0.0471481, -0.0846913, -0.0446951, -0.0500306,
+                                   -0.043155, -0.0718899, -0.039992, -0.0453119, -0.0673279,
+                                   -0.0408377, -0.047179, -0.0438636, -0.0430013, -0.0595805]
+
+OMG_DDPG_Integrator_no_pastVals_500 = \
+    pd.read_pickle('OMG_DDPG_Integrator_no_pastValsreturn_500_agents.pkl')['return'].tolist()
+OMG_SEC_return = OMG_DDPG_Integrator_no_pastVals + OMG_DDPG_Integrator_no_pastVals_500
+
+# OMG_DDPG_return_798 = pd.read_pickle('OMG_DDPG_Actorreturn_8XX_agents.pkl')['return'].tolist()
+OMG_DDPG_return_798 = pd.read_pickle('OMG_DDPG_Actorreturn_8XX_agents.pkl')['return_Mean'].tolist()
+
+idxs = np.random.randint(low=50, high=748, size=450)
+m_sort = np.sort(OMG_DDPG_return_798)
+OMG_DDPG_return = np.concatenate([m_sort[0:50], m_sort[747:798], np.take(m_sort, idxs)])
+
+idx_DDPG_sort = np.argsort(OMG_DDPG_return_798)
+
+# OMG_DDPG_return = OMG_DDPG_return_798
+
+if save_results:
+    matplotlib.rcParams.update(params)
+
+fig, ax = plt.subplots()  # figsize =(6, 5))
+# plt.boxplot((OMG_DDPG_Actor, OMG_DDPG_Integrator_no_pastVals_corr,
+#             OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, OMG_DDPG_Integrator_no_pastVals))
+ax.boxplot((OMG_SEC_return, OMG_DDPG_return))
+# ax.plot( 3, 0.0332, marker='o' )
+plt.grid()
+plt.ylim([-0.4, 0])
+plt.xticks([1, 2], ['$\mathrm{SEC}$', '$\mathrm{DDPG}$'])
+plt.ylabel('$\overline{r}_{k,v}$')
+# plt.ylabel('$1/K\,\sum_{k=0}^K{r_{k,v}}$')
+plt.tick_params(direction='in')
+plt.show()
+
+if save_results:
+    fig.savefig(f'{folder_name}/OMG_Errorbar_lim.pgf')
+    fig.savefig(f'{folder_name}/OMG_Errorbar_lim.png')
+    fig.savefig(f'{folder_name}/OMG_Errorbar_lim.pdf')
+
+##########################LearningCurve###############
+
+params = {'backend': 'ps',
+          'text.latex.preamble': [r'\usepackage{gensymb}'
+                                  r'\usepackage{amsmath,amssymb,mathtools}'
+                                  r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}'
+                                  r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'],
+          'axes.labelsize': 12,  # fontsize for x and y labels (was 10)
+          'axes.titlesize': 12,
+          'font.size': 12,  # was 10
+          'legend.fontsize': 12,  # was 10
+          'xtick.labelsize': 12,
+          'ytick.labelsize': 12,
+          'text.usetex': True,
+          'figure.figsize': [5.5, 3.7],  # [3.9, 3.1],
+          'font.family': 'serif',
+          'lines.linewidth': 1
+          }
+
+matplotlib.rcParams.update(params)
+
+SEC_train_data = pd.read_pickle('OMG_DDPG_Integrator_no_pastVals_8XX_agents_train_data.pkl')
+DDPG_train_data = pd.read_pickle('OMG_DDPG_Actor_8XX_agents_train_data.pkl')
+
+# DDPG data to long -> sort by mean -> take best/worst 50 and 450 random
+
+SEC_mean_learningCurve_550 = SEC_train_data.mean(axis=1)
+SEC_std_learningCurve_550 = SEC_train_data.std(axis=1)
+
+# sort df by idx (return of test case from above) - not needed, just for doublecheck
+df3 = DDPG_train_data.iloc[:, idx_DDPG_sort]
+
+# get the best/worst idx2 out ouf sort_idx and snip the df to 550 based on that idx2
+idx2 = np.concatenate([idx_DDPG_sort[0:50], idx_DDPG_sort[idxs], idx_DDPG_sort[747:798]])
+df550 = DDPG_train_data.iloc[:, idx2]
+
+DDPG_mean_learningCurve_550 = df3.mean(axis=1)
+DDPG_std_learningCurve_550 = df3.std(axis=1)
+
+low = (SEC_mean_learningCurve_550 - SEC_std_learningCurve_550).to_numpy()
+up = (SEC_mean_learningCurve_550 + SEC_std_learningCurve_550).to_numpy()
+SEC = SEC_mean_learningCurve_550.to_numpy()
+DDPG = DDPG_mean_learningCurve_550.to_numpy()
+episode = np.array([list(range(0, 177))]).squeeze()
+
+fig, ax = plt.subplots()
+plt.fill_between(episode, up, low, facecolor='b', alpha=0.25)
+plt.fill_between(episode, (DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550).to_numpy(),
+                 (DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550).to_numpy(), facecolor='r', alpha=0.25)
+plt.plot(episode, SEC, 'b', label='$\mathrm{SEC}$', linewidth=2)
+plt.plot(episode, low, '--b', linewidth=0.5)
+plt.plot(episode, up, '--b', linewidth=0.5)
+plt.plot(episode, DDPG, 'r', label='$\mathrm{DDPG}$', linewidth=2)
+plt.plot(episode, (DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550).to_numpy(), '--r', linewidth=0.5)
+plt.plot(episode, (DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550).to_numpy(), '--r', linewidth=0.5)
+plt.grid()
+plt.xlim([0, 176])
+plt.tick_params(direction='in')
+plt.legend()
+# plt.set_xlim([0, 10])
+plt.ylabel('$\overline{r}_{k,v}$')
+plt.xlabel(r'$\mathrm{Episode}$')
+plt.show()
+
+if save_results:
+    matplotlib.rcParams.update(params)
+
+    fig.savefig(f'{folder_name}/OMG_learning_curve.pgf')
+    fig.savefig(f'{folder_name}/OMG_learning_curve.png')
+    fig.savefig(f'{folder_name}/OMG_learning_curve.pdf')
+
+plt.plot(SEC_mean_learningCurve_550, 'b', label='$\mathrm{SEC}$')
+plt.plot(DDPG_mean_learningCurve_550, '-.b', label='$\mathrm{DDPG}$')
+plt.fill_between(SEC_mean_learningCurve_550 - SEC_std_learningCurve_550,
+                 SEC_mean_learningCurve_550 + SEC_std_learningCurve_550, facecolor='r')
+plt.fill_between(DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550,
+                 DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550, facecolor='r')
+plt.grid()
+plt.legend()
+plt.xlim([0, 177])
+# plt.set_xlim([0, 10])
+plt.ylabel('$\overline{\sum{r}}$')
+plt.xlabel(r'$\mathrm{Episode}$')
+plt.show()
+asd = 1
+
+# not needed, but maybe interesting for futuer to reorder df:
+# df2 = DDPG_train_data.iloc[:,idx_DDPG_sort]
+# idx2 = np.concatenate([np.array([list(range(0,50))]).squeeze(), np.array([list(range(748,798))]).squeeze(), idxs])
diff --git a/experiments/hp_tune/visualize_tests/trial_analysis.py b/experiments/hp_tune/visualize_tests/trial_analysis.py
new file mode 100644
index 00000000..5e223611
--- /dev/null
+++ b/experiments/hp_tune/visualize_tests/trial_analysis.py
@@ -0,0 +1,432 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as px
+import sshtunnel
+from bson import ObjectId
+from plotly import tools
+from pymongo import MongoClient
+
+from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0
+
+# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties'
+db_name = 'PC2_TD3_Vctrl_single_inv_3'
+# db_name = 'PC2_TD3_Vctrl_single_inv_2'
+# db_name = 'DDPG_Retrain_Best_study18_6462'
+# trial = '834'
+show_episode_number = 10
+make_pyplot = False
+
+with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun:
+    with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client:
+        db = client[db_name]
+
+        trial = db.Trial_number_5
+        # trial = db.Trial_number_6462
+
+        trial_config = trial.find_one({"Name": "Config"})
+        trial_test = trial.find_one({"Name": "Test"})
+        train_data = trial.find_one({"Name": "After_Training"})
+        train_episode_data = trial.find_one({"Episode_number": show_episode_number})
+
+        print(f'Starttime = {trial_config["Start time"]}')
+        print(f'Starttime = {trial_test["End time"]}')
+        print(' ')
+        print(f'Node = {trial_config["Node"]}')
+        print(' ')
+
+        print('Config-Params:')
+        print(*trial_config.items(), sep='\n')
+
+        ts = 1e-4  # if ts stored: take from db
+        t_test = np.arange(0, len(trial_test['lc_capacitor1_v']) * ts, ts).tolist()
+        v_a_test = trial_test['lc_capacitor1_v']
+        v_b_test = trial_test['lc_capacitor2_v']
+        v_c_test = trial_test['lc_capacitor3_v']
+        i_a_test = trial_test['lc_inductor1_i']
+        i_b_test = trial_test['lc_inductor2_i']
+        i_c_test = trial_test['lc_inductor3_i']
+        R_load = trial_test['r_load_resistor1_R']
+
+        v_sp_d_test = trial_test['inverter1_v_ref_0']
+        v_sp_q_test = trial_test['inverter1_v_ref_1']
+        v_sp_0_test = trial_test['inverter1_v_ref_2']
+
+        phase_test = trial_test['Phase']
+
+        v_sp_abc = dq0_to_abc(np.array([v_sp_d_test, v_sp_q_test, v_sp_0_test]), np.array(phase_test[:-1]))
+
+        v_mess_dq0 = abc_to_dq0(np.array([v_a_test, v_b_test, v_c_test]), np.array(phase_test[:-1]))
+
+        plt.plot(t_test, R_load)
+        plt.grid()
+        # plt.xlim([0, 0.1])
+        plt.xlabel("time")
+        plt.ylabel("R_load")
+        plt.title('Test')
+        plt.show()
+
+        plt.plot(t_test, v_a_test)
+        plt.plot(t_test, v_b_test)
+        plt.plot(t_test, v_c_test)
+        # plt.plot(t_test, v_sp_abc[0, :])
+        plt.grid()
+        # plt.xlim([0, 0.1])
+        plt.xlabel("time")
+        plt.ylabel("v_abc")
+        plt.title('Test')
+        plt.show()
+
+        plt.plot(t_test, v_mess_dq0[0, :])
+        plt.plot(t_test, v_mess_dq0[1, :])
+        plt.plot(t_test, v_mess_dq0[2, :])
+        plt.plot(t_test, v_sp_d_test)
+        #plt.ylim([-30, 300])
+        plt.xlim([0, 0.1])
+        plt.grid()
+        plt.xlabel("time")
+        plt.ylabel("v_dq0")
+        plt.title('Test')
+        plt.show()
+
+
+        plt.plot(t_test, i_a_test)
+        plt.plot(t_test, i_b_test)
+        plt.plot(t_test, i_c_test)
+        plt.grid()
+        plt.xlabel("time")
+        plt.ylabel("v_abc")
+        plt.title('Test')
+        plt.show()
+
+        if 1:
+            actionP0_test = trial_test['ActionP0']
+            actionP1_test = trial_test['ActionP1']
+            actionP2_test = trial_test['ActionP2']
+            actionI0_test = trial_test['ActionI0']
+            actionI1_test = trial_test['ActionI1']
+            actionI2_test = trial_test['ActionI2']
+
+            plt.plot(t_test[1:], actionP0_test)
+            plt.plot(t_test[1:], actionP1_test)
+            plt.plot(t_test[1:], actionP2_test)
+            # plt.xlim([0, 0.1])
+            plt.grid()
+            plt.xlabel("time")
+            plt.ylabel("action_P")
+            plt.title('Test')
+            plt.show()
+
+            plt.plot(t_test[1:], actionI0_test)
+            plt.plot(t_test[1:], actionI1_test)
+            plt.plot(t_test[1:], actionI2_test)
+            # plt.xlim([0, 0.1])
+            plt.grid()
+            plt.xlabel("time")
+            plt.ylabel("action_I")
+            plt.title('Test')
+            plt.show()
+
+            integrator_sum0 = trial_test['integrator_sum0']  # np.cumsum(
+            # np.array(actionI0_test) * trial_config['integrator_weight'])  # trial_test['integrator_sum0']#
+            integrator_sum1 = trial_test[
+                'integrator_sum1']  # np.cumsum(np.array(actionI1_test) * trial_config['integrator_weight'])
+            integrator_sum2 = trial_test[
+                'integrator_sum2']  # np.cumsum(np.array(actionI2_test) * trial_config['integrator_weight'])
+
+            plt.plot(t_test[1:], integrator_sum0)
+            plt.plot(t_test[1:], integrator_sum1)
+            plt.plot(t_test[1:], integrator_sum2)
+            # plt.xlim([0, 0.1])
+            plt.grid()
+            plt.xlabel("time")
+            plt.ylabel("Integratorzustand")
+            plt.title('Test')
+            plt.show()
+
+            if make_pyplot:
+                plot = px.Figure()
+                plot.add_trace(
+                    px.Scatter(y=actionI0_test))
+                plot.add_trace(
+                    px.Scatter(y=actionI1_test))
+                plot.add_trace(
+                    px.Scatter(y=actionI2_test))
+
+                plot.update_layout(
+                    xaxis=dict(
+                        rangeselector=dict(
+                            buttons=list([
+                                dict(count=1,
+                                     step="day",
+                                     stepmode="backward"),
+                            ])
+                        ),
+                        rangeslider=dict(
+                            visible=True
+                        ),
+                    )
+                )
+
+                plot.show()
+
+                plot = px.Figure()
+                plot.add_trace(
+                    px.Scatter(y=actionP0_test))
+                plot.add_trace(
+                    px.Scatter(y=actionP1_test))
+                plot.add_trace(
+                    px.Scatter(y=actionP2_test))
+
+                plot.update_layout(
+                    xaxis=dict(
+                        rangeselector=dict(
+                            buttons=list([
+                                dict(count=1,
+                                     step="day",
+                                     stepmode="backward"),
+                            ])
+                        ),
+                        rangeslider=dict(
+                            visible=True
+                        ),
+                    )
+                )
+
+                plot.show()
+
+        if make_pyplot:
+            # pyplot v_abc
+            plot = px.Figure()
+            plot.add_trace(
+                px.Scatter(x=t_test, y=v_a_test))
+            # px.Scatter(x=x, y=v_mess_dq0[0][:]))
+
+            plot.add_trace(
+                px.Scatter(x=t_test, y=v_b_test))
+            # px.Scatter(x=x, y=v_mess_dq0[1][:]))
+            plot.add_trace(
+                px.Scatter(x=t_test, y=v_c_test))
+            # px.Scatter(x=x, y=v_mess_dq0[2][:]))
+
+            plot.add_trace(
+                px.Scatter(x=t_test, y=v_sp_abc[1, :]))
+            # px.Scatter(x=x, y=df2['v_1_SP']))
+
+            plot.add_trace(
+                px.Scatter(x=t_test, y=v_sp_abc[2, :]))
+            # px.Scatter(x=x, y=df2['v_2_SP']))
+
+            plot.update_layout(
+                xaxis=dict(
+                    rangeselector=dict(
+                        buttons=list([
+                            dict(count=1,
+                                 step="day",
+                                 stepmode="backward"),
+                        ])
+                    ),
+                    rangeslider=dict(
+                        visible=True
+                    ),
+                )
+            )
+
+            plot.show()
+
+    ##############################################################
+    # After Training
+
+    train_reward_per_episode = train_data['Mean_eps_reward']
+    number_learning_steps = trial_config['Number_learning_Steps']
+    episode_len = 2000  # trial_config['training_episode_length']
+    learning_rate = trial_config['learning_rate']
+    lr_decay_start = trial_config['lr_decay_start']
+    lr_decay_duration = trial_config['lr_decay_duration']
+    final_lr = trial_config['final_lr'] * learning_rate
+
+    ax = plt.plot(train_reward_per_episode)
+    plt.grid()
+    plt.xlabel("Episodes")
+    # plt.yscale('log')
+    plt.ylabel("Mean episode Reward")
+    # plt.ylim([-0.06, -0.025])
+    # plt.title("1.000.000")
+    plt.show()
+
+    if make_pyplot:
+        plot = px.Figure()
+        plot.add_trace(
+            px.Scatter(y=train_reward_per_episode))
+
+        plot.update_layout(
+            xaxis=dict(
+                rangeselector=dict(
+                    buttons=list([
+                        dict(count=1,
+                             step="day",
+                             stepmode="backward"),
+                    ])
+                ),
+                rangeslider=dict(
+                    visible=True
+                ),
+            )
+        )
+
+        plot.show()
+
+    t = np.arange(number_learning_steps)
+
+    progress_remaining = 1.0 - (t / number_learning_steps)
+
+    t_start = int(lr_decay_start * number_learning_steps)
+    t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps,
+                           number_learning_steps))
+
+    lr_curve = np.maximum(
+        np.minimum(learning_rate, learning_rate + (t_start * (learning_rate - final_lr)) / (t_end - t_start) \
+                   - (learning_rate - final_lr) / (t_end - t_start) * ((1.0 - progress_remaining) \
+                                                                       * number_learning_steps)), final_lr)
+
+    # no step-vise MA needed but episode-vise!
+    # lr_ma = np.convolve(lr_curve, np.ones(episode_len), 'valid') / episode_len
+    num_episodes = int(number_learning_steps / episode_len)
+    lr_ma = np.zeros(num_episodes)
+    count = 0
+
+    for i in range(num_episodes):
+        lr_ma[i] = np.mean(lr_curve[count:count + episode_len])
+        count += episode_len
+
+    # plt.plot(lr_curve)
+    # plt.show()
+
+    fig = plt.figure()  # a new figure window
+    ax = fig.add_subplot(2, 1, 1)  # a new axes
+    ax = plt.plot(train_reward_per_episode)
+    plt.grid()
+    # plt.xlabel("Episodes")
+    # plt.yscale('log')
+    plt.ylabel("Mean episode Reward")
+
+    ax2 = fig.add_subplot(2, 1, 2)  # a new axes
+    ax2 = plt.plot(lr_ma)
+    plt.grid()
+    plt.xlabel("Episodes")
+    plt.ylabel("Mean episode LR")
+    plt.show()
+
+    plt.show()
+
+    if train_episode_data is not None:
+        # only available if loglevel == 'train'
+        ##############################################################
+        # Plot example Training Episode
+        R_load = train_episode_data['R_load_training']
+        i_a = train_episode_data['i_a_training']
+        i_b = train_episode_data['i_b_training']
+        i_c = train_episode_data['i_c_training']
+        v_a = train_episode_data['v_a_training']
+        v_b = train_episode_data['v_b_training']
+        v_c = train_episode_data['v_c_training']
+        reward = train_episode_data['Rewards']
+        phase = train_episode_data['Phase']
+
+        plt.plot(R_load)
+        plt.grid()
+        plt.xlabel("steps")
+        plt.ylabel("R_load")
+        plt.title(f"Trainingepisode {show_episode_number}")
+        plt.show()
+
+        plt.plot(i_a)
+        plt.plot(i_b)
+        plt.plot(i_c)
+        plt.grid()
+        plt.xlabel("steps")
+        plt.ylabel("i_abc")
+        plt.title(f"Trainingepisode {show_episode_number}")
+        plt.show()
+
+        plt.plot(v_a)
+        plt.plot(v_b)
+        plt.plot(v_c)
+        plt.grid()
+        plt.xlabel("steps")
+        plt.ylabel("v_abc")
+        plt.title(f"Trainingepisode {show_episode_number}")
+        plt.show()
+
+        plt.plot(reward)
+        plt.grid()
+        plt.xlabel("steps")
+        plt.ylabel("Reward")
+        plt.title(f"Trainingepisode {show_episode_number}")
+        plt.show()
+
+        df = pd.DataFrame()
+        df['R_load'] = R_load
+
+        hist = df['R_load'].hist(bins=50)
+        plt.title(f"Trainingepisode {show_episode_number}")
+        plt.show()
+        """
+        plot = px.Figure()
+        plot.add_trace(
+            px.Scatter(y=R_load)
+        """
+        # df2['v_0_SP'] = pd.DataFrame(test_data['inverter1_v_ref_0'])
+        # df2['v_1_SP'] = pd.DataFrame(test_data['inverter1_v_ref_1'])
+        # df2['v_2_SP'] = pd.DataFrame(test_data['inverter1_v_ref_2'])
+
+        # df2['phase'] = pd.DataFrame(test_data['Phase'])
+
+        # v_sp_abc = dq0_to_abc(np.array([df2['v_0_SP'], df2['v_1_SP'], df2['v_2_SP']]), np.array(df2['phase']))
+
+        v_mess_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), np.array(phase))
+
+        # x = df2['t']
+        v_d = v_mess_dq0[0][:]  # df2['v_a']
+        v_q = v_mess_dq0[1][:]  # df2['v_b']
+        v_0 = v_mess_dq0[2][:]  # df2['v_c']
+
+        plt.plot(v_d)
+        plt.plot(v_q)
+        plt.plot(v_0)
+        plt.grid()
+        plt.xlabel("steps")
+        plt.ylabel("v_dq0")
+        plt.title(f"Trainingepisode {show_episode_number}")
+        plt.show()
+
+        # v_a_SP = df2['v_0_SP']#v_sp_abc[0,:]
+        # v_b_SP = df2['v_1_SP']#v_sp_abc[1,:]
+        # v_c_SP = df2['v_2_SP']#v_sp_abc[2,:]
+        if make_pyplot:
+            plot = px.Figure()
+            plot.add_trace(
+                px.Scatter(y=v_a))
+
+            plot.add_trace(
+                px.Scatter(y=v_b))
+
+            plot.add_trace(
+                px.Scatter(y=v_c))
+
+            plot.update_layout(
+                xaxis=dict(
+                    rangeselector=dict(
+                        buttons=list([
+                            dict(count=1,
+                                 step="day",
+                                 stepmode="backward"),
+                        ])
+                    ),
+                    rangeslider=dict(
+                        visible=True
+                    ),
+                )
+            )
+
+            plot.show()
diff --git a/experiments/issue51_new/env/rewards.py b/experiments/issue51_new/env/rewards.py
new file mode 100644
index 00000000..f557fce8
--- /dev/null
+++ b/experiments/issue51_new/env/rewards.py
@@ -0,0 +1,113 @@
+import numpy as np
+from openmodelica_microgrid_gym.util import nested_map
+from typing import List
+
+
+class Reward:
+    def __init__(self, nom, lim, v_DC, gamma, det_run=False, nom_region=1.1, use_gamma_normalization=1):
+        self._idx = None
+        self.nom = nom
+        self.lim = lim
+        self.v_DC = v_DC
+        self.use_gamma_normalization = use_gamma_normalization
+        if self.use_gamma_normalization == 1:
+            self.gamma = gamma
+        else:
+            self.gamma = 0
+        self.nom_region = nom_region
+        self.det_run = det_run
+
+    def set_idx(self, obs):
+        if self._idx is None:
+            self._idx = nested_map(
+                lambda n: obs.index(n),
+                [[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'],
+                 [f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012']])
+
+    def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        Defines the reward function for the environment. Uses the observations and set-points to evaluate the quality of
+        the used parameters.
+        Takes current and voltage measurements and set-points to calculate the mean-root control error and uses a
+        logarithmic barrier function in case of violating the current limit. Barrier function is adjustable using
+        parameter mu.
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        iabc_master = data[idx[0]]  # 3 phase currents at LC inductors
+        vabc_master = data[idx[2]]  # 3 phase currents at LC inductors
+
+        # set points (sp)
+        isp_abc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+        vsp_abc_master = data[idx[3]]  # convert dq set-points into three-phase abc coordinates
+
+        SP = vsp_abc_master * self.lim
+        mess = vabc_master * self.lim
+
+        if all(np.abs(mess) <= self.nom*1.1):
+        #if all(np.abs(mess) <= self.lim*self.nom_region):
+            """
+            1st area - inside wanted (nom) operation range
+            -v_nom -> + v_nom
+                rew = 1; if mess = SP
+                rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area)
+            """
+            rew = np.sum((1 - np.abs(SP - mess) / (2 * self.nom)) * 2 * (1 - self.gamma) / 3 + (1 - self.gamma) / 3) / 3
+
+        elif any(np.abs(mess) > self.lim):
+            """
+            3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS
+            +-v_lim -> +-v_DC
+            
+            V1:
+            @ SP = +v_nom AND mess = -v_DC:
+                rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured
+            @ SP = -v_nom AND mess = -v_lim
+                rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)]
+                rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC))
+                The latter fraction is quite small but leads to depending on the system less then 2/3 is
+                substracted and we have a gap to the 2nd area! :) 
+                
+            V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?)
+            
+            V3: rew = -1
+            """
+
+            # V1:
+            # rew = np.sum(
+            #    (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3
+
+            # V2:
+            # if return -> rew = None and in env abort_reward is given to agent
+            if self.det_run:
+                return -(1 - self.gamma)
+            else:
+                return
+
+            # V3:
+            # rew = (1 - gamma)
+
+        # elif any(np.abs(vabc_master) > v_DC):
+        #    rew = (1-gamma)
+
+        else:
+            """
+            2nd area
+            +-v_nom -> +- v_lim
+
+            @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?)
+                rew = 1/3
+            @ SP = v_nom AND mess = -v_lim
+                rew = -1/3
+
+            """
+            rew = np.sum(
+                (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3
+
+        return rew  # * (1-0.9)
+        # return -np.clip(error.squeeze(), 0, 1e5)
diff --git a/experiments/issue51_new/omniboard_docker_compose.yaml b/experiments/issue51_new/omniboard_docker_compose.yaml
new file mode 100644
index 00000000..1e84922d
--- /dev/null
+++ b/experiments/issue51_new/omniboard_docker_compose.yaml
@@ -0,0 +1,28 @@
+version: '3'
+services:
+
+  mongo:
+    image: mongo
+    ports:
+      - 127.0.0.1:27017:27017
+    environment:
+      MONGO_INITDB_ROOT_USERNAME: sample
+      MONGO_INITDB_ROOT_PASSWORD: password
+      MONGO_INITDB_DATABASE: db
+    expose:
+      - 27017
+    networks:
+      - omniboard
+
+  omniboard:
+    image: vivekratnavel/omniboard:latest
+    command: [ "--mu", "mongodb://sample:password@mongo:27017/db?authSource=admin" ]
+    ports:
+      - 127.0.0.1:9000:9000
+    networks:
+      - omniboard
+    depends_on:
+      - mongo
+
+networks:
+  omniboard:
\ No newline at end of file
diff --git a/experiments/issue51_new/policy.py b/experiments/issue51_new/policy.py
new file mode 100644
index 00000000..489e3c9a
--- /dev/null
+++ b/experiments/issue51_new/policy.py
@@ -0,0 +1,122 @@
+from collections import Callable
+from typing import Optional, Type, Union, List, Dict, Any
+
+import gym
+import torch as th
+from stable_baselines.common.schedules import get_schedule_fn
+from stable_baselines.td3.policies import TD3Policy
+from stable_baselines3 import DDPG
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, FlattenExtractor, get_actor_critic_arch
+from stable_baselines3.common.utils import update_learning_rate
+from torch import nn
+
+
+class MultiLRPolicy(TD3Policy):
+    def __init__(
+            self,
+            observation_space: gym.spaces.Space,
+            action_space: gym.spaces.Space,
+            lr_schedule: Callable,
+            net_arch: Optional[Union[List[int], Dict[str, List[int]]]] = None,
+            activation_fn: Type[nn.Module] = nn.ReLU,
+            features_extractor_class: Type[BaseFeaturesExtractor] = FlattenExtractor,
+            features_extractor_kwargs: Optional[Dict[str, Any]] = None,
+            normalize_images: bool = True,
+            optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam,
+            optimizer_kwargs: Optional[Dict[str, Any]] = None,
+            n_critics: int = 2,
+            share_features_extractor: bool = True,
+    ):
+        super().__init__(
+            observation_space,
+            action_space,
+            features_extractor_class,
+            features_extractor_kwargs,
+            optimizer_class=optimizer_class,
+            optimizer_kwargs=optimizer_kwargs,
+            squash_output=True,
+        )
+
+        # Default network architecture, from the original paper
+        if net_arch is None:
+            if features_extractor_class == FlattenExtractor:
+                net_arch = [400, 300]
+            else:
+                net_arch = []
+
+        actor_arch, critic_arch = get_actor_critic_arch(net_arch)
+
+        self.net_arch = net_arch
+        self.activation_fn = activation_fn
+        self.net_args = {
+            "observation_space": self.observation_space,
+            "action_space": self.action_space,
+            "net_arch": actor_arch,
+            "activation_fn": self.activation_fn,
+            "normalize_images": normalize_images,
+        }
+        self.actor_kwargs = self.net_args.copy()
+        self.critic_kwargs = self.net_args.copy()
+        self.critic_kwargs.update(
+            {
+                "n_critics": n_critics,
+                "net_arch": critic_arch,
+                "share_features_extractor": share_features_extractor,
+            }
+        )
+
+        self.actor, self.actor_target = None, None
+        self.critic, self.critic_target = None, None
+        self.share_features_extractor = share_features_extractor
+
+        self._build(lr_schedule)
+
+    def _build(self, lr_schedule: Callable) -> None:
+        # Create actor and target
+        # the features extractor should not be shared
+        self.actor = self.make_actor(features_extractor=None)
+        self.actor_target = self.make_actor(features_extractor=None)
+        # Initialize the target to have the same weights as the actor
+        self.actor_target.load_state_dict(self.actor.state_dict())
+
+        self.actor.optimizer = self.optimizer_class(self.actor.parameters(), lr=lr_schedule[0](1),
+                                                    **self.optimizer_kwargs)
+
+        if self.share_features_extractor:
+            self.critic = self.make_critic(features_extractor=self.actor.features_extractor)
+            # Critic target should not share the features extactor with critic
+            # but it can share it with the actor target as actor and critic are sharing
+            # the same features_extractor too
+            # NOTE: as a result the effective poliak (soft-copy) coefficient for the features extractor
+            # will be 2 * tau instead of tau (updated one time with the actor, a second time with the critic)
+            self.critic_target = self.make_critic(features_extractor=self.actor_target.features_extractor)
+        else:
+            # Create new features extractor for each network
+            self.critic = self.make_critic(features_extractor=None)
+            self.critic_target = self.make_critic(features_extractor=None)
+
+        self.critic_target.load_state_dict(self.critic.state_dict())
+        self.critic.optimizer = self.optimizer_class(self.critic.parameters(), lr=lr_schedule[1](1),
+                                                     **self.optimizer_kwargs)
+
+
+class MultiLRAlgorithm(DDPG):
+    def _setup_lr_schedule(self) -> None:
+        """Transform to callable if needed."""
+        self.lr_schedule = [get_schedule_fn(lr) for lr in self.learning_rate]
+
+    def _update_learning_rate(self, optimizers: Union[List[th.optim.Optimizer], th.optim.Optimizer]) -> None:
+        """
+        Update the optimizers learning rate using the current learning rate schedule
+        and the current progress remaining (from 1 to 0).
+
+        :param optimizers:
+            An optimizer or a list of optimizers.
+        """
+        # Log the current learning rate
+        logger.record("train/learning_rate", self.lr_schedule[0](self._current_progress_remaining))
+
+        if not isinstance(optimizers, list):
+            optimizers = [optimizers]
+        for i, optimizer in enumerate(optimizers):
+            update_learning_rate(optimizer, self.lr_schedule[i](self._current_progress_remaining))
diff --git a/experiments/issue51_new/stable_baselines.py b/experiments/issue51_new/stable_baselines.py
new file mode 100644
index 00000000..84395674
--- /dev/null
+++ b/experiments/issue51_new/stable_baselines.py
@@ -0,0 +1,112 @@
+from datetime import datetime
+from os import makedirs
+from typing import List
+
+import gym
+import numpy as np
+from stable_baselines3 import PPO
+from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps
+from stable_baselines3.common.monitor import Monitor
+
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import nested_map
+
+np.random.seed(0)
+
+timestamp = datetime.now().strftime(f'%Y.%b.%d %X ')
+makedirs(timestamp)
+
+# Simulation definitions
+net = Network.load('../../net/net_single-inv-curr.yaml')
+max_episode_steps = 300  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+iLimit = 30  # inverter current limit / A
+iNominal = 20  # nominal inverter current / A
+mu = 2  # factor for barrier function (see below)
+
+
+class Reward:
+    def __init__(self):
+        self._idx = None
+
+    def set_idx(self, obs):
+        if self._idx is None:
+            self._idx = nested_map(
+                lambda n: obs.index(n),
+                [[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']])
+
+    def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the
+        used parameters.
+        Takes current measurement and setpoints so calculate the mean-root-error control error and uses a logarithmic
+        barrier function in case of violating the current limit. Barrier function is adjustable using parameter mu.
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        Iabc_master = data[idx[0]]  # 3 phase currents at LC inductors
+        ISPabc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+
+        # control error = mean-root-error (MRE) of reference minus measurement
+        # (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides
+        #  better, i.e. more significant,  gradients)
+        # plus barrier penalty for violating the current constraint
+        error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \
+            # + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0)
+        error /= max_episode_steps
+
+        return -np.clip(error.squeeze(), 0, 1e5)
+
+
+def xylables(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+    ax.grid(which='both')
+    fig.savefig(f'{timestamp}/Inductor_currents.pdf')
+
+
+env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+               reward_fun=Reward().rew_fun,
+               viz_cols=[
+                   PlotTmpl([[f'lc1.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                            callback=xylables,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            ),
+               ],
+               viz_mode='episode',
+               max_episode_steps=max_episode_steps,
+               net=net,
+               model_path='../../omg_grid/grid.network_singleInverter.fmu')
+
+with open(f'{timestamp}/env.txt', 'w') as f:
+    print(str(env), file=f)
+env = Monitor(env)
+
+
+class RecordEnvCallback(BaseCallback):
+    def _on_step(self) -> bool:
+        obs = env.reset()
+        for _ in range(max_episode_steps):
+            env.render()
+            action, _states = model.predict(obs, deterministic=True)
+            obs, reward, done, info = env.step(action)
+            if done:
+                break
+        env.close()
+        env.reset()
+        return True
+
+
+model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f'{timestamp}/')
+checkpoint_on_event = CheckpointCallback(save_freq=100000, save_path=f'{timestamp}/checkpoints/')
+record_env = RecordEnvCallback()
+plot_callback = EveryNTimesteps(n_steps=2000, callback=record_env)
+model.learn(total_timesteps=5000000, callback=[checkpoint_on_event, plot_callback])
diff --git a/experiments/issue51_new/stable_baselinesDDPG.py b/experiments/issue51_new/stable_baselinesDDPG.py
new file mode 100644
index 00000000..b25199c2
--- /dev/null
+++ b/experiments/issue51_new/stable_baselinesDDPG.py
@@ -0,0 +1,266 @@
+import logging
+from datetime import datetime
+from os import makedirs
+from tempfile import NamedTemporaryFile
+from typing import List
+
+import torch as th
+import torch.nn as nn
+from labwatch.hyperparameters import UniformFloat
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+
+import gym
+import numpy as np
+from stable_baselines3 import DDPG
+from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
+from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps
+from stable_baselines3.common.monitor import Monitor
+
+import matplotlib.pyplot as plt
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import nested_map
+
+from sacred import Experiment
+from sacred.observers import FileStorageObserver, MongoObserver
+
+from labwatch.assistant import LabAssistant
+from labwatch.optimizers.random_search import RandomSearch
+
+np.random.seed(0)
+
+folder_name = 'DDPG/'
+experiment_name = 'DDPG_CC_Reward_MRE_randsearch'
+timestamp = datetime.now().strftime(f'%Y.%b.%d_%X')
+
+makedirs(folder_name + experiment_name + timestamp)
+
+train_steps = 20000
+
+# Simulation definitions
+net = Network.load('../../net/net_single-inv-curr.yaml')
+max_episode_steps = 300  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+iLimit = 30  # inverter current limit / A
+iNominal = 20  # nominal inverter current / A
+mu = 2  # factor for barrier function (see below)
+
+
+class Reward:
+    def __init__(self):
+        self._idx = None
+
+    def set_idx(self, obs):
+        if self._idx is None:
+            self._idx = nested_map(
+                lambda n: obs.index(n),
+                [[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']])
+
+    def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the
+        used parameters.
+        Takes current measurement and setpoints so calculate the mean-root-error control error and uses a logarithmic
+        barrier function in case of violating the current limit. Barrier function is adjustable using parameter mu.
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        Iabc_master = data[idx[0]]  # 3 phase currents at LC inductors
+        ISPabc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+
+        # control error = mean-root-error (MRE) of reference minus measurement
+        # (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides
+        #  better, i.e. more significant,  gradients)
+        # plus barrier penalty for violating the current constraint
+        error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \
+                + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0)
+        error /= max_episode_steps
+
+        return -np.clip(error.squeeze(), 0, 1e5)
+
+
+def xylables(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+    ax.grid(which='both')
+    fig.savefig(f'{folder_name + experiment_name + timestamp}/Inductor_currents{datetime.now()}.pdf')
+    plt.show()
+
+
+env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+               reward_fun=Reward().rew_fun,
+               viz_cols=[
+                   PlotTmpl([[f'lc1.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                            callback=xylables,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            ),
+               ],
+               viz_mode='episode',
+               max_episode_steps=max_episode_steps,
+               net=net,
+               model_path='../../omg_grid/grid.network_singleInverter.fmu',
+               is_normalized=True,
+               log_level=logging.WARNING)
+
+with open(f'{folder_name + experiment_name + timestamp}/env.txt', 'w') as f:
+    print(str(env), file=f)
+env = Monitor(env)
+
+n_actions = env.action_space.shape[-1]
+action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))
+
+
+class CustomMPL(BaseFeaturesExtractor):
+
+    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256):
+        super(CustomMPL, self).__init__(observation_space, features_dim)
+        # We assume CxHxW images (channels first)
+        # Re-ordering will be done by pre-preprocessing or wrapper
+        n_input_channels = observation_space.shape[0]
+        self.cnn = nn.Sequential(
+            nn.Linear(n_input_channels, 32),
+            nn.ReLU(),
+            nn.Linear(32, 64),
+            nn.ReLU(),
+        )
+
+        # Compute shape by doing one forward pass
+        with th.no_grad():
+            n_flatten = self.cnn(
+                th.as_tensor(observation_space.sample()[None]).float()
+            ).shape[1]
+
+        self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())
+
+    def forward(self, observations: th.Tensor) -> th.Tensor:
+        return self.linear(self.cnn(observations))
+
+
+policy_kwargs = dict(
+    features_extractor_class=CustomMPL,
+    features_extractor_kwargs=dict(features_dim=128, net_arch=[32, 32]),
+)
+ex = Experiment(experiment_name + timestamp)
+# ex.observers.append(FileStorageObserver('runs'))
+ex.observers.append(MongoObserver(url=f'mongodb://sample:password@localhost:27017/?authMechanism=SCRAM-SHA-1',
+                                  db_name='db', failure_dir='fail'))
+
+a = LabAssistant(ex, "labwatch_demo_keras", optimizer=RandomSearch)
+
+
+@a.search_space
+def search_space():
+    learning_rate = UniformFloat(lower=10e-6,
+                                 upper=10e-1,
+                                 default=10e-2,
+                                 log_scale=True)
+
+
+# MongoObserver.create(url=f'mongodb://sample:password@localhost:27017/?authMechanism=SCRAM-SHA-1',
+#                    db_name='db'))
+@ex.config
+def cfg():
+    # DDPG learning parameters
+    gamma = 0.9  # discount factor
+    batch_size = 128
+    memory_interval = 1
+    # alpha_actor = 5e-4#5e-6
+    learning_rate = 5e-3  # 5e-4
+    noise_var = 0.2
+    noise_theta = 5  # stiffness of OU
+    alpha_lRelu = 0.1
+    weigth_regularizer = 0.01
+
+    memory_lim = 5000  # = buffersize?
+    warm_up_steps_actor = 2048
+    warm_up_steps_critic = 1024
+    target_model_update = 1000
+
+    # NN architecture
+    actor_hidden_size = 100  # Using LeakyReLU
+    # output linear
+    critic_hidden_size_1 = 75  # Using LeakyReLU
+    critic_hidden_size_2 = 75  # Using LeakyReLU
+    critic_hidden_size_3 = 75  # Using LeakyReLU
+    # output linear
+
+    n_actions = env.action_space.shape[-1]
+
+    # description = experiment_name
+    # start_time = timestamp
+    # corresponding_data_in = folder_name + experiment_name + timestamp
+
+    max_learning_steps = train_steps
+
+
+@ex.automain
+def main(gamma, batch_size, memory_interval, learning_rate, noise_var, noise_theta, alpha_lRelu,
+         weigth_regularizer,
+         memory_lim, warm_up_steps_actor, warm_up_steps_critic, target_model_update, actor_hidden_size,
+         critic_hidden_size_1, critic_hidden_size_2, critic_hidden_size_3, n_actions,
+         max_learning_steps):  # description, start_time,
+    # corresponding_data_in):
+    class RecordEnvCallback(BaseCallback):
+        def _on_step(self) -> bool:
+            obs = env.reset()
+            for _ in range(max_episode_steps):
+                env.render()
+                action, _states = model.predict(obs, deterministic=True)
+                obs, reward, done, info = env.step(action)
+                if done:
+                    break
+            env.close()
+            env.reset()
+            return True
+
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1,
+                                                                                                  critic_hidden_size_2,
+                                                                                                  critic_hidden_size_3]))
+    # policy_kwargs = dict( activation_fn=th.nn.LeakyReLU(negative_slope=alpha_lRelu), net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1,
+    #                                                                                               critic_hidden_size_2,
+    #                                                                                               critic_hidden_size_3]))
+    model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name + experiment_name + timestamp}/',
+                 policy_kwargs=policy_kwargs,
+                 learning_rate=learning_rate, buffer_size=memory_lim, learning_starts=warm_up_steps_critic,
+                 batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise,
+                 train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False,
+                 create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    checkpoint_on_event = CheckpointCallback(save_freq=1000,
+                                             save_path=f'{folder_name + experiment_name + timestamp}/checkpoints/')
+    record_env = RecordEnvCallback()
+    plot_callback = EveryNTimesteps(n_steps=1000, callback=record_env)
+    model.learn(total_timesteps=max_learning_steps, callback=[checkpoint_on_event, plot_callback])
+
+    model.save(f'{folder_name + experiment_name + timestamp}/model.zip')
+    ex.add_artifact(f'{folder_name + experiment_name + timestamp}/model.zip')
+
+    # model.save(experiment_name)
+    # ex.add_artifact(f'{experiment_name}.zip')
+
+    # with NamedTemporaryFile() as t:
+
+    #   model.save(t.name)
+    #  ex.add_artifact(t.name, f'{experiment_name}.zip')
+
+    # del model  # remove to demonstrate saving and loading
+
+    # model = DDPG.load("ddpg_CC")
+
+    # obs = env.reset()
+    # while True:
+    #    action, _states = model.predict(obs)
+    #    obs, rewards, dones, info = env.step(action)
+    #    env.render()
+
+    return 0
diff --git a/experiments/issue51_new/stable_baselinesDDPG_double_lr.py b/experiments/issue51_new/stable_baselinesDDPG_double_lr.py
new file mode 100644
index 00000000..63efcfaa
--- /dev/null
+++ b/experiments/issue51_new/stable_baselinesDDPG_double_lr.py
@@ -0,0 +1,286 @@
+from datetime import datetime
+from functools import partial
+from itertools import accumulate
+from os import makedirs
+
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import optuna
+import torch as th
+from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
+from stochastic.processes import VasicekProcess
+
+from experiments.issue51_new.env.rewards import Reward
+from experiments.issue51_new.policy import MultiLRPolicy, MultiLRAlgorithm
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import RandProcess
+
+np.random.seed(0)
+
+folder_name = 'DDPG_VC_hyperoptTEST_Neu/'
+# experiment_name = 'DDPG_VC_Reward_MRE_reward_NOT_NORMED'
+experiment_name = 'DDPG_VC_'
+timestamp = datetime.now().strftime(f'_%Y.%b.%d_%X')
+
+makedirs(folder_name)
+
+# Simulation definitions
+net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+max_episode_steps = 1000  # number of simulation steps per episode
+# num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+# iLimit = 30  # inverter current limit / A
+# iNominal = 20  # nominal inverter current / A
+mu_c = 2  # factor for barrier function (see below)
+mu_v = 2  # factor for barrier function (see below)
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+# plant
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+R = 28  # nomVoltPeak / 7.5   # / Ohm
+lower_bound_load = 11  # to allow maximal load that draws i_limit (toDo: let exceed?)
+upper_bound_load = 45  # to apply symmetrical load bounds
+
+loadstep_timestep = max_episode_steps / 2
+
+gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=1000, vol=10, mean=R), initial=R,
+                  bounds=(lower_bound_load, upper_bound_load))
+
+
+def load_step(t, gain):
+    """
+    Changes the load parameters
+    :param t:
+    :param gain: device parameter
+    :return: Sample from SP
+    """
+    # Defines a load step after 0.01 s
+    # if loadstep_timestep*net.ts < t <= loadstep_timestep*net.ts + net.ts:
+    #    gen.proc.mean = gain * 0.55
+    #    gen.reserve = gain * 0.55
+    # elif t <= net.ts:
+    #    gen.proc.mean = gain
+
+    return gen.sample(t)
+
+
+def experiment_fit_DDPG(learning_rate, gamma, n_trail):
+    makedirs(folder_name + experiment_name + n_trail)
+
+    def xylables_i(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+        ax.grid(which='both')
+        fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+        fig.show()
+
+    def xylables_v(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+        ax.grid(which='both')
+        fig.savefig(f'{folder_name + experiment_name + n_trail}/Capacitor_voltages{datetime.now()}.pdf')
+        fig.show()
+
+    def xylables_R(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+        ax.grid(which='both')
+        ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+        fig.savefig(f'{folder_name + experiment_name + n_trail}/Load.pdf')
+        fig.show()
+
+    rew = Reward(v_nom, v_lim, v_DC, gamma)
+
+    env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                   reward_fun=rew.rew_fun,
+                   viz_cols=[
+                       PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                                callback=xylables_v,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                                callback=xylables_i,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       # PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                       #         callback=xylables_R,
+                       #         color=[['b', 'r', 'g']],
+                       #         style=[[None]]
+                       #         )
+                   ],
+                   viz_mode='episode',
+                   max_episode_steps=max_episode_steps,
+                   model_params={'lc.resistor1.R': R_filter,
+                                 'lc.resistor2.R': R_filter,
+                                 'lc.resistor3.R': R_filter,
+                                 'lc.resistor4.R': 0.0000001,
+                                 'lc.resistor5.R': 0.0000001,
+                                 'lc.resistor6.R': 0.0000001,
+                                 'lc.inductor1.L': L_filter,
+                                 'lc.inductor2.L': L_filter,
+                                 'lc.inductor3.L': L_filter,
+                                 'lc.capacitor1.C': C_filter,
+                                 'lc.capacitor2.C': C_filter,
+                                 'lc.capacitor3.C': C_filter,
+                                 'r_load.resistor1.R': partial(load_step, gain=R),
+                                 'r_load.resistor2.R': partial(load_step, gain=R),
+                                 'r_load.resistor3.R': partial(load_step, gain=R),
+                                 # 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_lim,
+                                 #                                              high=v_lim) if t == 0 else None,
+                                 # 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_lim,
+                                 #                                              high=v_lim) if t == 0 else None,
+                                 # 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_lim,
+                                 #                                              high=v_lim) if t == 0 else None,
+                                 # 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_lim,
+                                 #                                              high=i_lim) if t == 0 else None,
+                                 # 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_lim,
+                                 #                                              high=i_lim) if t == 0 else None,
+                                 # 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_lim,
+                                 #                                              high=i_lim) if t == 0 else None,
+                                 },
+                   net=net,
+                   model_path='../../omg_grid/grid.paper_loadstep.fmu',
+                   on_episode_reset_callback=partial(gen.reset, initial=R),
+                   is_normalized=True
+                   )
+
+    with open(f'{folder_name + experiment_name + n_trail}/env.txt', 'w') as f:
+        print(str(env), file=f)
+    env = Monitor(env)
+
+    # DDPG learning parameters
+    # gamma = 0.9  # discount factor
+    batch_size = 128
+    memory_interval = 1
+    # alpha_actor = 5e-6
+    # learning_rate = 5e-3
+
+    # learning_rate = trail.suggest_loguniform("lr", 1e-5, 1)
+
+    noise_var = 0.2
+    noise_theta = 5  # stiffness of OU
+    alpha_lRelu = 0.1
+    weigth_regularizer = 0.5
+
+    memory_lim = 5000  # = buffersize?
+    warm_up_steps_actor = 2048
+    warm_up_steps_critic = 1024
+    target_model_update = 1000
+
+    # NN architecture
+    actor_hidden_size = 100  # Using LeakyReLU
+    # output linear
+    critic_hidden_size_1 = 75  # Using LeakyReLU
+    critic_hidden_size_2 = 75  # Using LeakyReLU
+    critic_hidden_size_3 = 75  # Using LeakyReLU
+
+    # output linear
+
+    class RecordEnvCallback(BaseCallback):
+        def _on_step(self) -> bool:
+            rewards = []
+            obs = env.reset()
+            for _ in range(max_episode_steps):
+                env.render()
+                action, _states = model.predict(obs, deterministic=True)
+                obs, reward, done, info = env.step(action)
+                rewards.append(reward)
+                if done:
+                    break
+
+            acc_Reward = list(accumulate(rewards))
+
+            plt.plot(rewards)
+            plt.xlabel(r'$t\,/\,\mathrm{s}$')
+            plt.ylabel('$Reward$')
+            plt.grid(which='both')
+            plt.savefig(f'{folder_name + experiment_name + n_trail}/reward{datetime.now()}.pdf')
+            plt.show()
+
+            # plt.plot(acc_Reward)
+            # plt.xlabel(r'$t\,/\,\mathrm{s}$')
+            # plt.ylabel('$Reward_sum$')
+            # plt.grid(which='both')
+            # plt.savefig(f'{folder_name + experiment_name + timestamp}/reward_sum_{datetime.now()}.pdf')
+            # plt.show()
+
+            env.close()
+            env.reset()
+            return True
+
+    n_actions = env.action_space.shape[-1]
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1,
+                                                                                                  critic_hidden_size_2,
+                                                                                                  critic_hidden_size_3]))
+
+    # model = DDPG('', env, verbose=1, tensorboard_log=f'{folder_name + experiment_name + n_trail}/',
+    #              policy_kwargs=policy_kwargs,
+    #              learning_rate=learning_rate, buffer_size=memory_lim, learning_starts=warm_up_steps_critic,
+    #              batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise,
+    #              train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False,
+    #              create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+    #
+    model = MultiLRAlgorithm(MultiLRPolicy, env, verbose=1,
+                             tensorboard_log=f'{folder_name + experiment_name + n_trail}/',
+                             policy_kwargs=policy_kwargs,
+                             learning_rate=[0, learning_rate], buffer_size=memory_lim,
+                             learning_starts=warm_up_steps_critic,
+                             batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise,
+                             train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False,
+                             create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    checkpoint_on_event = CheckpointCallback(save_freq=10000,
+                                             save_path=f'{folder_name + experiment_name + n_trail}/checkpoints/')
+    record_env = RecordEnvCallback()
+    plot_callback = EveryNTimesteps(n_steps=10000, callback=record_env)
+    model.learn(total_timesteps=200000, callback=[checkpoint_on_event, plot_callback])
+
+    model.save(f'{folder_name + experiment_name + n_trail}/model.zip')
+
+    return_sum = 0.0
+    obs = env.reset()
+    while True:
+
+        action, _states = model.predict(obs)
+        obs, rewards, done, info = env.step(action)
+        env.render()
+        return_sum += rewards
+        if done:
+            break
+
+    return return_sum
+
+
+experiment_fit_DDPG(.001, .9, str(0))
+exit()
+
+
+def objective(trail):
+    learning_rate = trail.suggest_loguniform("lr", 1e-5, 1)
+    gamma = trail.suggest_loguniform("gamma", 0.8, 1)
+
+    return experiment_fit_DDPG(learning_rate, gamma, str(trail.number))
+
+
+study = optuna.create_study(direction='maximize', storage=f'sqlite:///{folder_name}optuna_data.sqlite3')
+
+study.optimize(objective, n_trials=50)
+print(study.best_params, study.best_value)
+
+# pd.Series(index=[trail.params['lr'] for trail in study.trials], data=[trail.value for trail in study.trials]).scatter()
diff --git a/experiments/issue51_new/stable_baselinesDDPG_optuna.py b/experiments/issue51_new/stable_baselinesDDPG_optuna.py
new file mode 100644
index 00000000..55de2174
--- /dev/null
+++ b/experiments/issue51_new/stable_baselinesDDPG_optuna.py
@@ -0,0 +1,198 @@
+import logging
+from datetime import datetime
+from os import makedirs
+from typing import List
+
+import pandas as pd
+import gym
+import matplotlib.pyplot as plt
+import numpy as np
+import optuna
+import torch as th
+import torch.nn as nn
+from stable_baselines3 import DDPG
+from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import nested_map
+
+np.random.seed(0)
+
+folder_name = 'DDPG/'
+experiment_name = 'DDPG_CC_Reward_MRE_randsearch'
+timestamp = datetime.now().strftime(f'%Y.%b.%d_%X')
+
+makedirs(folder_name + experiment_name + timestamp)
+
+train_steps = 5000
+
+# Simulation definitions
+net = Network.load('../../net/net_single-inv-curr.yaml')
+max_episode_steps = 300  # number of simulation steps per episode
+num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+iLimit = 30  # inverter current limit / A
+iNominal = 20  # nominal inverter current / A
+mu = 2  # factor for barrier function (see below)
+
+
+class Reward:
+    def __init__(self):
+        self._idx = None
+
+    def set_idx(self, obs):
+        if self._idx is None:
+            self._idx = nested_map(
+                lambda n: obs.index(n),
+                [[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']])
+
+    def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float:
+        """
+        Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the
+        used parameters.
+        Takes current measurement and setpoints so calculate the mean-root-error control error and uses a logarithmic
+        barrier function in case of violating the current limit. Barrier function is adjustable using parameter mu.
+
+        :param cols: list of variable names of the data
+        :param data: observation data from the environment (ControlVariables, e.g. currents and voltages)
+        :return: Error as negative reward
+        """
+        self.set_idx(cols)
+        idx = self._idx
+
+        Iabc_master = data[idx[0]]  # 3 phase currents at LC inductors
+        ISPabc_master = data[idx[1]]  # convert dq set-points into three-phase abc coordinates
+
+        # control error = mean-root-error (MRE) of reference minus measurement
+        # (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides
+        #  better, i.e. more significant,  gradients)
+        # plus barrier penalty for violating the current constraint
+        error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \
+                + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0)
+        error /= max_episode_steps
+
+        return -np.clip(error.squeeze(), 0, 1e5)
+
+
+def xylables(fig):
+    ax = fig.gca()
+    ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+    ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+    ax.grid(which='both')
+    fig.savefig(f'{folder_name + experiment_name + timestamp}/Inductor_currents{datetime.now()}.pdf')
+    plt.show()
+
+
+env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+               reward_fun=Reward().rew_fun,
+               viz_cols=[
+                   PlotTmpl([[f'lc1.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                            callback=xylables,
+                            color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                            style=[[None], ['--']]
+                            ),
+               ],
+               viz_mode='episode',
+               max_episode_steps=max_episode_steps,
+               net=net,
+               model_path='../../omg_grid/grid.network_singleInverter.fmu',
+               is_normalized=True,
+               log_level=logging.WARNING)
+
+with open(f'{folder_name + experiment_name + timestamp}/env.txt', 'w') as f:
+    print(str(env), file=f)
+env = Monitor(env)
+
+n_actions = env.action_space.shape[-1]
+action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))
+
+
+
+def objective(trail):
+    # DDPG learning parameters
+    gamma = 0.9  # discount factor
+    batch_size = 128
+    memory_interval = 1
+
+    noise_var = 0.2
+    noise_theta = 5  # stiffness of OU
+    alpha_lRelu = 0.1
+    weigth_regularizer = 0.01
+
+    memory_lim = 5000  # = buffersize?
+    warm_up_steps_actor = 2048
+    warm_up_steps_critic = 1024
+    target_model_update = 1000
+
+    # NN architecture
+    actor_hidden_size = 100  # Using LeakyReLU
+    # output linear
+    critic_hidden_size_1 = 75  # Using LeakyReLU
+    critic_hidden_size_2 = 75  # Using LeakyReLU
+    critic_hidden_size_3 = 75  # Using LeakyReLU
+    # output linear
+
+    n_actions = env.action_space.shape[-1]
+
+
+    max_learning_steps = train_steps
+
+    learning_rate = trail.suggest_loguniform("lr", 1e-5, 1)
+
+    class RecordEnvCallback(BaseCallback):
+        def _on_step(self) -> bool:
+            obs = env.reset()
+            for _ in range(max_episode_steps):
+                env.render()
+                action, _states = model.predict(obs, deterministic=True)
+                obs, reward, done, info = env.step(action)
+                if done:
+                    break
+            env.close()
+            env.reset()
+            return True
+
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1,
+                                                                                                  critic_hidden_size_2,
+                                                                                                  critic_hidden_size_3]))
+
+    model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name + experiment_name + timestamp}/',
+                 policy_kwargs=policy_kwargs,
+                 learning_rate=learning_rate, buffer_size=memory_lim, learning_starts=warm_up_steps_critic,
+                 batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise,
+                 train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False,
+                 create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    checkpoint_on_event = CheckpointCallback(save_freq=1000,
+                                             save_path=f'{folder_name + experiment_name + timestamp}/checkpoints/')
+    record_env = RecordEnvCallback()
+    plot_callback = EveryNTimesteps(n_steps=1000, callback=record_env)
+    model.learn(total_timesteps=max_learning_steps, callback=[checkpoint_on_event, plot_callback])
+
+    model.save(f'{folder_name + experiment_name + timestamp}/model.zip')
+
+    return_sum = 0.0
+    obs = env.reset()
+    while True:
+
+        action, _states = model.predict(obs)
+        obs, rewards, done, info = env.step(action)
+        env.render()
+        return_sum += rewards
+        if done:
+            break
+
+    return return_sum
+
+
+study = optuna.create_study(direction='maximize', storage='sqlite:///db.sqlite3')
+# change to MAXIMIZE
+study.optimize(objective, n_trials=2)
+
+pd.Series(index=[trail.params['lr'] for trail in study.trials], data=[trail.value for trail in study.trials]).scatter()
diff --git a/experiments/issue51_new/stable_baselinesDDPG_voltage_control.py b/experiments/issue51_new/stable_baselinesDDPG_voltage_control.py
new file mode 100644
index 00000000..cb394ed3
--- /dev/null
+++ b/experiments/issue51_new/stable_baselinesDDPG_voltage_control.py
@@ -0,0 +1,351 @@
+from datetime import datetime
+from functools import partial
+from itertools import accumulate
+from os import makedirs
+
+import time
+
+import torch as th
+import torch.nn as nn
+from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
+
+import gym
+import numpy as np
+import pandas as pd
+import optuna
+
+import matplotlib.pyplot as plt
+
+from stable_baselines3 import DDPG
+from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
+from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps
+from stable_baselines3.common.monitor import Monitor
+from stochastic.processes import VasicekProcess
+
+from experiments.issue51_new.env.rewards import Reward
+from openmodelica_microgrid_gym.env import PlotTmpl
+from openmodelica_microgrid_gym.net import Network
+from openmodelica_microgrid_gym.util import nested_map, RandProcess
+
+np.random.seed(0)
+
+folder_name = 'DDPG_VC_randLoad_exploringStarts/'
+# experiment_name = 'DDPG_VC_Reward_MRE_reward_NOT_NORMED'
+experiment_name = 'DDPG_VC_bestParamsTest'
+timestamp = datetime.now().strftime(f'_%Y.%b.%d_%X')
+
+
+
+makedirs(folder_name, exist_ok=True)
+
+# Simulation definitions
+net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml')
+max_episode_steps = 1000  # number of simulation steps per episode
+# num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+# iLimit = 30  # inverter current limit / A
+# iNominal = 20  # nominal inverter current / A
+mu_c = 2  # factor for barrier function (see below)
+mu_v = 2  # factor for barrier function (see below)
+i_lim = net['inverter1'].i_lim  # inverter current limit / A
+i_nom = net['inverter1'].i_nom  # nominal inverter current / A
+v_nom = net.v_nom
+v_lim = net['inverter1'].v_lim
+v_DC = net['inverter1'].v_DC
+# plant
+L_filter = 2.3e-3  # / H
+R_filter = 400e-3  # / Ohm
+C_filter = 10e-6  # / F
+R = 28  # nomVoltPeak / 7.5   # / Ohm
+lower_bound_load = 11  # to allow maximal load that draws i_limit (toDo: let exceed?)
+upper_bound_load = 45  # to apply symmetrical load bounds
+
+loadstep_timestep = max_episode_steps / 2
+
+gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=1000, vol=10, mean=R), initial=R,
+                  bounds=(lower_bound_load, upper_bound_load))
+
+
+class RandomLoad:
+    def __init__(self, max_episode_steps, ts, loadstep_time=None):
+        self.max_episode_steps = max_episode_steps
+        self.ts = ts
+        if loadstep_time is None:
+            self.loadstep_time = np.random.randint(0, self.max_episode_steps)
+        else:
+            self.loadstep_time = loadstep_time
+
+    def reset(self, loadstep_time=None):
+        if loadstep_time is None:
+            self.loadstep_time = np.random.randint(0, self.max_episode_steps)
+        else:
+            self.loadstep_time = loadstep_time
+
+    def load_step(self, t, gain):
+        """
+        Changes the load parameters
+        :param t:
+        :param gain: device parameter
+        :return: Sample from SP
+        """
+        # Defines a load step after 0.01 s
+        if self.loadstep_time * self.ts < t <= self.loadstep_time * self.ts + self.ts:
+            gen.proc.mean = gain * 0.55
+            gen.reserve = gain * 0.55
+        elif t <= self.ts:
+            gen.proc.mean = gain
+
+        return gen.sample(t)
+
+
+class CallbackList(list):
+    def fire(self, *args, **kwargs):
+        for listener in self:
+            listener(*args, **kwargs)
+
+
+# def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, n_trail):
+def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, batch_size,
+                        actor_hidden_size, critic_hidden_size, n_trail):
+    makedirs(folder_name + experiment_name + n_trail, exist_ok=True)
+
+    def xylables_i(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$')
+        ax.grid(which='both')
+        fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf')
+        plt.close()
+
+    def xylables_v(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$')
+        ax.grid(which='both')
+        #ax.set_xlim([0, 0.005])
+        ts = time.gmtime()
+        fig.savefig(f'{folder_name + experiment_name + n_trail}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+        plt.close()
+
+    def xylables_R(fig):
+        ax = fig.gca()
+        ax.set_xlabel(r'$t\,/\,\mathrm{s}$')
+        ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$')
+        ax.grid(which='both')
+        ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2])
+        fig.savefig(f'{folder_name + experiment_name + n_trail}/Load.pdf')
+        plt.close()
+
+    rew = Reward(v_nom, v_lim, v_DC, gamma, use_gamma_normalization=use_gamma_in_rew)
+    rand_load = RandomLoad(max_episode_steps, net.ts)
+
+    cb = CallbackList()
+    cb.append(partial(gen.reset, initial=R))
+    cb.append(rand_load.reset)
+
+    env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1',
+                   reward_fun=rew.rew_fun,
+                   viz_cols=[
+                       PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']],
+                                callback=xylables_v,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']],
+                                callback=xylables_i,
+                                color=[['b', 'r', 'g'], ['b', 'r', 'g']],
+                                style=[[None], ['--']]
+                                ),
+                       PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']],
+                                callback=xylables_R,
+                                color=[['b', 'r', 'g']],
+                                style=[[None]]
+                                )
+                   ],
+                   viz_mode='episode',
+                   max_episode_steps=max_episode_steps,
+                   model_params={'lc.resistor1.R': R_filter,
+                                 'lc.resistor2.R': R_filter,
+                                 'lc.resistor3.R': R_filter,
+                                 'lc.resistor4.R': 0.0000001,
+                                 'lc.resistor5.R': 0.0000001,
+                                 'lc.resistor6.R': 0.0000001,
+                                 'lc.inductor1.L': L_filter,
+                                 'lc.inductor2.L': L_filter,
+                                 'lc.inductor3.L': L_filter,
+                                 'lc.capacitor1.C': C_filter,
+                                 'lc.capacitor2.C': C_filter,
+                                 'lc.capacitor3.C': C_filter,
+                                 'r_load.resistor1.R': partial(rand_load.load_step, gain=R),
+                                 'r_load.resistor2.R': partial(rand_load.load_step, gain=R),
+                                 'r_load.resistor3.R': partial(rand_load.load_step, gain=R),
+                                 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_lim,
+                                                                                high=v_lim) if t == 0 else None,
+                                 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_lim,
+                                                                                high=v_lim) if t == 0 else None,
+                                 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_lim,
+                                                                                high=v_lim) if t == 0 else None,
+                                 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_lim,
+                                                                               high=i_lim) if t == 0 else None,
+                                 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_lim,
+                                                                               high=i_lim) if t == 0 else None,
+                                 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_lim,
+                                                                               high=i_lim) if t == 0 else None,
+                                 },
+                   net=net,
+                   model_path='../../omg_grid/grid.paper_loadstep.fmu',
+                   # on_episode_reset_callback=partial(gen.reset, initial=R),
+                   # on_episode_reset_callback=[partial(gen.reset, initial=R), partial(rand_load.reset)],
+                   # on_episode_reset_callback=rand_load.reset,
+                   on_episode_reset_callback=cb.fire,
+                   is_normalized=True
+                   )
+
+    with open(f'{folder_name + experiment_name + n_trail}/env.txt', 'w') as f:
+        print(str(env), file=f)
+    env = Monitor(env)
+
+    # DDPG learning parameters
+    # gamma = 0.9  # discount factor
+    batch_size = batch_size
+    memory_interval = 1
+    # alpha_actor = 5e-6
+    # learning_rate = 5e-3
+
+    # learning_rate = trail.suggest_loguniform("lr", 1e-5, 1)
+
+    noise_var = 0.2
+    noise_theta = 5  # stiffness of OU
+    #alpha_lRelu = alpha_lRelu
+    weigth_regularizer = 0.5
+
+    memory_lim = 5000  # = buffersize?
+    warm_up_steps_actor = 2048
+    warm_up_steps_critic = 1024
+    target_model_update = 1000
+
+    # NN architecture
+    actor_hidden_size = actor_hidden_size  # Using LeakyReLU
+    # output linear
+    critic_hidden_size_1 = critic_hidden_size  # Using LeakyReLU
+    critic_hidden_size_2 = critic_hidden_size  # Using LeakyReLU
+    critic_hidden_size_3 = critic_hidden_size  # Using LeakyReLU
+
+    # output linear
+
+    class RecordEnvCallback(BaseCallback):
+        def _on_step(self) -> bool:
+            rewards = []
+            obs = env.reset()
+            for _ in range(max_episode_steps):
+                env.render()
+                action, _states = model.predict(obs, deterministic=True)
+                obs, reward, done, info = env.step(action)
+                rewards.append(reward)
+                if done:
+                    break
+
+            acc_Reward = list(accumulate(rewards))
+
+            plt.plot(rewards)
+            plt.xlabel(r'$t\,/\,\mathrm{s}$')
+            plt.ylabel('$Reward$')
+            plt.grid(which='both')
+            ts = time.gmtime()
+            plt.savefig(f'{folder_name + experiment_name + n_trail}/reward{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf')
+            plt.close()
+
+            # plt.plot(acc_Reward)
+            # plt.xlabel(r'$t\,/\,\mathrm{s}$')
+            # plt.ylabel('$Reward_sum$')
+            # plt.grid(which='both')
+            # plt.savefig(f'{folder_name + experiment_name + timestamp}/reward_sum_{datetime.now()}.pdf')
+            # plt.show()
+
+            env.close()
+            env.reset()
+            return True
+
+    n_actions = env.action_space.shape[-1]
+    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions),
+                                                sigma=noise_var * np.ones(n_actions), dt=net.ts)
+
+    policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1,
+                                                                                                  critic_hidden_size_2,
+                                                                                                  critic_hidden_size_3]))
+
+    model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name + experiment_name + n_trail}/',
+                 policy_kwargs=policy_kwargs,
+                 learning_rate=learning_rate, buffer_size=memory_lim, learning_starts=warm_up_steps_critic,
+                 batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise,
+                 train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False,
+                 create_eval_env=False, seed=None, device='auto', _init_setup_model=True)
+
+    model.actor.mu._modules['0'].weight.data = model.actor.mu._modules['0'].weight.data * weight_scale
+    model.actor.mu._modules['2'].weight.data = model.actor.mu._modules['2'].weight.data * weight_scale
+    model.actor_target.mu._modules['0'].weight.data = model.actor_target.mu._modules['0'].weight.data * weight_scale
+    model.actor_target.mu._modules['2'].weight.data = model.actor_target.mu._modules['2'].weight.data * weight_scale
+    # model.actor.mu._modules['0'].bias.data = model.actor.mu._modules['0'].bias.data * weight_bias_scale
+    # model.actor.mu._modules['2'].bias.data = model.actor.mu._modules['2'].bias.data * weight_bias_scale
+
+    checkpoint_on_event = CheckpointCallback(save_freq=10000,
+                                             save_path=f'{folder_name + experiment_name + n_trail}/checkpoints/')
+    record_env = RecordEnvCallback()
+    plot_callback = EveryNTimesteps(n_steps=10000, callback=record_env)
+    model.learn(total_timesteps=200000, callback=[checkpoint_on_event, plot_callback])
+
+    model.save(f'{folder_name + experiment_name + n_trail}/model.zip')
+
+    return_sum = 0.0
+    obs = env.reset()
+    rew.gamma = 0
+    while True:
+
+        action, _states = model.predict(obs)
+        obs, rewards, done, info = env.step(action)
+        env.render()
+        return_sum += rewards
+        if done:
+            break
+
+    return return_sum
+
+
+def objective(trail):
+    learning_rate = 0.0004  # trail.suggest_loguniform("lr", 1e-5, 5e-3)  # 0.0002#
+    gamma = 0.7  # trail.suggest_loguniform("gamma", 0.5, 0.99)
+    weight_scale = 0.02  # trail.suggest_loguniform("weight_scale", 5e-4, 1)  # 0.005
+    batch_size = 128  # trail.suggest_int("batch_size", 32, 1024)  # 128
+    # alpha_lRelu = trail.suggest_loguniform("alpha_lRelu", 0.0001, 0.5)  #0.1
+    actor_hidden_size = 100  # trail.suggest_int("actor_hidden_size", 10, 500)  # 100  # Using LeakyReLU
+    # output linear
+    critic_hidden_size = 100  # trail.suggest_int("critic_hidden_size", 10, 500)  # # Using LeakyReLU
+
+    # memory_interval = 1
+    # noise_var = 0.2
+    # noise_theta = 5  # stiffness of OU
+    # weigth_regularizer = 0.5
+
+    memory_lim = 5000  # = buffersize?
+    warm_up_steps_actor = 2048
+    warm_up_steps_critic = 1024
+    target_model_update = 1000
+
+    #
+
+    # n_trail = str(0)#str(trail.number)
+    # gamma = 0.75
+    use_gamma_in_rew = 1
+
+    return experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, batch_size,
+                               actor_hidden_size, critic_hidden_size, str(trail.number))
+
+
+# study = optuna.load_study(study_name="V-crtl_learn_use_hyperopt_params", storage="sqlite:///Hyperotp_visualization/test.sqlite3")
+
+study = optuna.create_study(study_name="V-crtl_stochLoad_single_Loadstep_exploring_starts",
+                            direction='maximize', storage=f'sqlite:///{folder_name}optuna_data.sqlite3')
+
+study.optimize(objective, n_trials=1)
+print(study.best_params, study.best_value)
+
+# pd.Series(index=[trail.params['lr'] for trail in study.trials], data=[trail.value for trail in study.trials]).scatter()
diff --git a/experiments/model_validation/execution/monte_carlo_runner.py b/experiments/model_validation/execution/monte_carlo_runner.py
index a242c428..7c2b298b 100644
--- a/experiments/model_validation/execution/monte_carlo_runner.py
+++ b/experiments/model_validation/execution/monte_carlo_runner.py
@@ -66,13 +66,15 @@ def run(self, n_episodes: int = 10, n_mc: int = 5, visualise: bool = False, prep
             for m in tqdm(range(n_mc), desc='monte_carlo_run', unit='epoch', leave=False):
                 prepare_mc_experiment()  # reset stoch components
 
-                r_vec = np.zeros(self.env.max_episode_steps)
+                r_vec = np.zeros(self.env.max_episode_steps + 1)
 
                 obs = self.env.reset()
 
-                for p in tqdm(range(self.env.max_episode_steps), desc='steps', unit='step', leave=False):
+                for p in tqdm(range(self.env.max_episode_steps + 1), desc='steps', unit='step', leave=False):
                     self.agent.observe(r, False)
                     act = self.agent.act(obs)
+                    if p == 1999:
+                        asd = 1
                     obs, r, done, info = self.env.step(act)
                     r_vec[p] = r
                     self.env.render()
@@ -97,7 +99,7 @@ def run(self, n_episodes: int = 10, n_mc: int = 5, visualise: bool = False, prep
                             dev_return = 0
                             print('NO DEV RETURN!!!!')
 
-                        dev_fac = 5  # 3
+                        dev_fac = 0.5  # 3
 
                         print(self.agent.episode_return)
                         print(dev_return)
diff --git a/experiments/model_validation/single_inverter_voltage_current_control_safe_opt_includingTB.py b/experiments/model_validation/single_inverter_voltage_current_control_safe_opt_includingTB.py
index a39491e1..5ba0b8bf 100644
--- a/experiments/model_validation/single_inverter_voltage_current_control_safe_opt_includingTB.py
+++ b/experiments/model_validation/single_inverter_voltage_current_control_safe_opt_includingTB.py
@@ -50,14 +50,14 @@
 matplotlib.rcParams.update(params)
 
 include_simulate = True
-show_plots = True
+show_plots = False
 balanced_load = False
 do_measurement = False
-save_results = False
+save_results = True
 
 # Files saves results and  resulting plots to the folder saves_VI_control_safeopt in the current directory
 current_directory = os.getcwd()
-save_folder = os.path.join(current_directory, r'VSim_rebase2_MC3')
+save_folder = os.path.join(current_directory, r'V_ctrl_delay_included')
 os.makedirs(save_folder, exist_ok=True)
 
 np.random.seed(1)
@@ -67,7 +67,7 @@
 delta_t = 1e-4  # simulation time step size / s
 undersample = 1
 max_episode_steps = 2000  # number of simulation steps per episode
-num_episodes = 1  # number of simulation episodes (i.e. SafeOpt iterations)
+num_episodes = 40  # number of simulation episodes (i.e. SafeOpt iterations)
 n_MC = 1  # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from
 v_DC = 600  # DC-link voltage / V; will be set as model parameter in the FMU
 nomFreq = 60  # nominal grid frequency / Hz
@@ -188,7 +188,7 @@ def cal_J_min(phase_shift, amp_dev):
     # unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!)
     # parameter set
     safe_threshold = 0
-    j_min = cal_J_min(phase_shift, amp_dev)  # cal min allowed performance
+    j_min = 15000  # cal_J_min(phase_shift, amp_dev)  # cal min allowed performance
 
     # The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop
     # expanding points eventually.
@@ -206,6 +206,7 @@ def cal_J_min(phase_shift, amp_dev):
     # Definition of the controllers
     # Choose Kp and Ki for the current and voltage controller as mutable parameters
     mutable_params = dict(voltageP=MutableFloat(0.0175), voltageI=MutableFloat(12))  # 300Hz
+    #mutable_params = dict(voltageP=MutableFloat(0.022), voltageI=MutableFloat(213))  # 300Hz
     voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'],
                                     limits=(-iLimit, iLimit))
 
@@ -224,11 +225,9 @@ def cal_J_min(phase_shift, amp_dev):
 
     # Define a voltage forming inverter using the PIPI and droop parameters from above
 
-    # Controller with observer
-    # ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param,
-    #                                   observer=[Lueneberger(*params) for params in
-    #                                             repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample,
-    #                                   name='master')
+    # Controller with observer ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t,
+    # droop_param, qdroop_param, observer=[Lueneberger(*params) for params in repeat((A, B, C, L, delta_t *
+    # undersample, v_DC / 2), 3)], undersampling=undersample, name='master')
 
     # Controller without observer
     ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param,
@@ -356,7 +355,7 @@ def reset_loads():
         runner = MonteCarloRunner(agent, env)
 
         runner.run(num_episodes, n_mc=n_MC, visualise=True, prepare_mc_experiment=reset_loads,
-                   return_gradient_extend=True)
+                   return_gradient_extend=False)
 
         df_len = pd.DataFrame({'lengthscale': lengthscale,
                                'bounds': bounds,
diff --git a/net/net_p10.yaml b/net/net_p10.yaml
new file mode 100644
index 00000000..56d096f3
--- /dev/null
+++ b/net/net_p10.yaml
@@ -0,0 +1,51 @@
+v_nom: 325.27
+freq_nom: 50
+ts: 1e-4
+#max_episode_steps: 1000
+
+components:
+  inv1:
+    id: inverter1
+    i_nom: 300
+    i_lim: 400
+    #v_nom: 190
+    v_lim: 650
+    v_DC: 800
+    pdroop:
+      gain: 0
+      tau: 0.005
+    qdroop:
+      gain: 0
+      tau: 0.005
+    v_noise:
+      fun:
+        normal: # np.random.*
+          loc: 0
+          scale: 0.4
+        clip:
+          a_min: 0.3
+          a_max: 0.5
+      i_noise:
+        fun:
+          normal: # np.random.*
+            loc: 0
+            scale: 0.0018
+        clip: # np.clip
+          a_min: 0.0005
+          a_max: 0.32
+    cls: MasterInverter_dq0
+    in:
+      u: [ i1p1, i1p2, i1p3 ]    # names of the inputs
+    out:
+      v: [ lc.capacitor1.v, lc.capacitor2.v, lc.capacitor3.v ]
+      i: [ lc.inductor1.i, lc.inductor2.i, lc.inductor3.i ]
+
+    # iref: [0,0,0]
+    # vref: [1,0,0]
+
+  load:
+    id: r_load
+    cls: Load
+    out:
+      i: [ .resistor1.i, .resistor2.i, .resistor3.i ]
+      R: [ .resistor1.R, .resistor2.R, .resistor3.R ]
diff --git a/net/net_single-inv-Paper_Loadstep.yaml b/net/net_single-inv-Paper_Loadstep.yaml
index 856506d0..fd534b89 100644
--- a/net/net_single-inv-Paper_Loadstep.yaml
+++ b/net/net_single-inv-Paper_Loadstep.yaml
@@ -5,8 +5,10 @@ ts: 1e-4
 components:
   inv1:
     id: inverter1
-    #i_nom: 20
-    #i_lim: 30
+    i_nom: 12
+    i_lim: 16
+    #v_nom: 190
+    v_lim: 285
     v_DC: 600
     v_noise:
       fun:
diff --git a/net/net_single-inv-curr.yaml b/net/net_single-inv-curr.yaml
index aedfed2c..a3664577 100644
--- a/net/net_single-inv-curr.yaml
+++ b/net/net_single-inv-curr.yaml
@@ -1,6 +1,6 @@
 v_nom: 230*sqrt(2)
-#freq_nom: 50
-ts: .5e-4
+freq_nom: 50
+ts: 0.5e-4
 
 components:
   inv1:
diff --git a/net/net_vctrl_single_inv.yaml b/net/net_vctrl_single_inv.yaml
new file mode 100644
index 00000000..7e165dfe
--- /dev/null
+++ b/net/net_vctrl_single_inv.yaml
@@ -0,0 +1,51 @@
+v_nom: 169.7
+freq_nom: 60
+ts: 1e-4
+#max_episode_steps: 1000
+
+components:
+  inv1:
+    id: inverter1
+    i_nom: 12
+    i_lim: 16
+    #v_nom: 190
+    v_lim: 285
+    v_DC: 600
+    pdroop:
+      gain: 0
+      tau: 0.005
+    qdroop:
+      gain: 0
+      tau: 0.005
+    v_noise:
+      fun:
+        normal: # np.random.*
+          loc: 0
+          scale: 0.4
+        clip:
+          a_min: 0.3
+          a_max: 0.5
+      i_noise:
+        fun:
+          normal: # np.random.*
+            loc: 0
+            scale: 0.0018
+        clip: # np.clip
+          a_min: 0.0005
+          a_max: 0.32
+    cls: MasterInverter
+    in:
+      u: [ i1p1, i1p2, i1p3 ]    # names of the inputs
+    out:
+      v: [ lc.capacitor1.v, lc.capacitor2.v, lc.capacitor3.v ]
+      i: [ lc.inductor1.i, lc.inductor2.i, lc.inductor3.i ]
+
+    # iref: [0,0,0]
+    # vref: [1,0,0]
+
+  load:
+    id: r_load
+    cls: Load
+    out:
+      #i: [ .resistor1.i, .resistor2.i, .resistor3.i ]
+      R: [ .resistor1.R, .resistor2.R, .resistor3.R ]
diff --git a/net/net_vctrl_single_inv_dq0.yaml b/net/net_vctrl_single_inv_dq0.yaml
new file mode 100644
index 00000000..6c78a400
--- /dev/null
+++ b/net/net_vctrl_single_inv_dq0.yaml
@@ -0,0 +1,51 @@
+v_nom: 169.7
+freq_nom: 60
+ts: 1e-4
+#max_episode_steps: 1000
+
+components:
+  inv1:
+    id: inverter1
+    i_nom: 12
+    i_lim: 16
+    #v_nom: 190
+    v_lim: 285
+    v_DC: 600
+    pdroop:
+      gain: 0
+      tau: 0.005
+    qdroop:
+      gain: 0
+      tau: 0.005
+    v_noise:
+      fun:
+        normal: # np.random.*
+          loc: 0
+          scale: 0.4
+        clip:
+          a_min: 0.3
+          a_max: 0.5
+      i_noise:
+        fun:
+          normal: # np.random.*
+            loc: 0
+            scale: 0.0018
+        clip: # np.clip
+          a_min: 0.0005
+          a_max: 0.32
+    cls: MasterInverter_dq0
+    in:
+      u: [ i1p1, i1p2, i1p3 ]    # names of the inputs
+    out:
+      v: [ lc.capacitor1.v, lc.capacitor2.v, lc.capacitor3.v ]
+      i: [ lc.inductor1.i, lc.inductor2.i, lc.inductor3.i ]
+
+    # iref: [0,0,0]
+    # vref: [1,0,0]
+
+  load:
+    id: r_load
+    cls: Load
+    out:
+      i: [ .resistor1.i, .resistor2.i, .resistor3.i ]
+      R: [ .resistor1.R, .resistor2.R, .resistor3.R ]
diff --git a/omg_grid/create_fmu.mos b/omg_grid/create_fmu.mos
index 317ab001..5be93f79 100644
--- a/omg_grid/create_fmu.mos
+++ b/omg_grid/create_fmu.mos
@@ -3,4 +3,4 @@ setCommandLineOptions("-d=newInst"); getErrorString();
 setCommandLineOptions("-d=initialization"); getErrorString();
 setCommandLineOptions("--simCodeTarget=Cpp"); getErrorString();
 setCommandLineOptions("-d=-disableDirectionalDerivatives"); getErrorString();
-OpenModelica.Scripting.translateModelFMU(grid.network, version="2.0", fmuType = "me"); getErrorString();
+OpenModelica.Scripting.translateModelFMU(grid.P10_R_load, version="2.0", fmuType = "me"); getErrorString();
diff --git a/omg_grid/grid.mo b/omg_grid/grid.mo
index ad04737e..a7a589ce 100644
--- a/omg_grid/grid.mo
+++ b/omg_grid/grid.mo
@@ -2152,6 +2152,42 @@ package grid
     annotation(
       Diagram);
   end testbench_SC_load;
+  
+  model P10_R_load
+    grid.inverters.inverter inverter1(v_DC = 60)  annotation(
+      Placement(visible = true, transformation(origin = {-70, 30}, extent = {{-10, 10}, {10, -10}}, rotation = 0)));
+    Modelica.Blocks.Interfaces.RealInput i1p1 annotation(
+      Placement(visible = true, transformation(origin = {-104, 18}, extent = {{-8, -8}, {8, 8}}, rotation = 0), iconTransformation(origin = {-104, 18}, extent = {{-8, -8}, {8, 8}}, rotation = 0)));
+    Modelica.Blocks.Interfaces.RealInput i1p2 annotation(
+      Placement(visible = true, transformation(origin = {-104, 30}, extent = {{-8, -8}, {8, 8}}, rotation = 0), iconTransformation(origin = {-104, 30}, extent = {{-8, -8}, {8, 8}}, rotation = 0)));
+    Modelica.Blocks.Interfaces.RealInput i1p3 annotation(
+      Placement(visible = true, transformation(origin = {-104, 42}, extent = {{-8, -8}, {8, 8}}, rotation = 0), iconTransformation(origin = {-104, 42}, extent = {{-8, -8}, {8, 8}}, rotation = 0)));
+  grid.filter.lc lc(C1 = 0.0000136, C2 = 0.0000136, C3 = 0.0000136, L1 = 0.0023, L2 = 0.0023, L3 = 0.0023, R1 = 0.4, R2 = 0.4, R3 = 0.4, R4 = 0.0000000001, R5 = 0.0000000001, R6 = 0.0000000001)  annotation(
+      Placement(visible = true, transformation(origin = {-32, 30}, extent = {{-10, -10}, {10, 10}}, rotation = 0)));
+  grid.loads.r r_load(R1 = 7.15, R2 = 7.15, R3 = 7.15)  annotation(
+      Placement(visible = true, transformation(origin = {10, 30}, extent = {{-10, -10}, {10, 10}}, rotation = 0)));
+  equation
+  connect(i1p1, inverter1.u1) annotation(
+      Line(points = {{-104, 18}, {-86, 18}, {-86, 36}, {-80, 36}}, color = {0, 0, 127}));
+  connect(i1p2, inverter1.u2) annotation(
+      Line(points = {{-104, 30}, {-80, 30}}, color = {0, 0, 127}));
+  connect(i1p3, inverter1.u3) annotation(
+      Line(points = {{-104, 42}, {-86, 42}, {-86, 24}, {-80, 24}}, color = {0, 0, 127}));
+  connect(lc.pin3, inverter1.pin3) annotation(
+      Line(points = {{-42, 36}, {-51, 36}, {-51, 24}, {-60, 24}}, color = {0, 0, 255}));
+  connect(lc.pin2, inverter1.pin2) annotation(
+      Line(points = {{-42, 30}, {-60, 30}}, color = {0, 0, 255}));
+  connect(lc.pin1, inverter1.pin1) annotation(
+      Line(points = {{-42, 24}, {-51, 24}, {-51, 36}, {-60, 36}}, color = {0, 0, 255}));
+  connect(r_load.pin3, lc.pin6) annotation(
+      Line(points = {{0, 36}, {-22, 36}, {-22, 36}, {-22, 36}}, color = {0, 0, 255}));
+  connect(r_load.pin2, lc.pin5) annotation(
+      Line(points = {{0, 30}, {-22, 30}, {-22, 30}, {-22, 30}}, color = {0, 0, 255}));
+  connect(r_load.pin1, lc.pin4) annotation(
+      Line(points = {{0, 24}, {-22, 24}, {-22, 24}, {-22, 24}}, color = {0, 0, 255}));
+    annotation(
+      Diagram);
+  end P10_R_load;
   annotation(
     uses(Modelica(version = "3.2.3")));
 end grid;
\ No newline at end of file
diff --git a/omg_grid/grid.paper_loadstep.fmu b/omg_grid/grid.paper_loadstep.fmu
new file mode 100644
index 00000000..86bf39c9
Binary files /dev/null and b/omg_grid/grid.paper_loadstep.fmu differ
diff --git a/omg_grid/grid.paper_loadstepWIN.fmu b/omg_grid/grid.paper_loadstepWIN.fmu
new file mode 100644
index 00000000..fe4bb924
Binary files /dev/null and b/omg_grid/grid.paper_loadstepWIN.fmu differ
diff --git a/openmodelica_microgrid_gym/env/modelica.py b/openmodelica_microgrid_gym/env/modelica.py
index bed19710..7ce72af7 100644
--- a/openmodelica_microgrid_gym/env/modelica.py
+++ b/openmodelica_microgrid_gym/env/modelica.py
@@ -341,7 +341,7 @@ def step(self, action: Sequence) -> Tuple[np.ndarray, float, bool, Mapping]:
         if params:
             self.model.set_params(**params)
         risk = self.net.risk()
-
+        risk = 0
         # Simulate and observe result state
         outputs = self._create_state()
 
diff --git a/openmodelica_microgrid_gym/util/randproc.py b/openmodelica_microgrid_gym/util/randproc.py
index 33436e85..71cec70e 100644
--- a/openmodelica_microgrid_gym/util/randproc.py
+++ b/openmodelica_microgrid_gym/util/randproc.py
@@ -27,8 +27,18 @@ def __init__(self, process_cls: Type[BaseProcess], proc_kwargs=None, bounds=None
         self._last_t = 0
         self._reserve = None
 
-    def reset(self, initial):
-        self._last = initial
+    def reset(self, initial=None):
+        """
+        Resets the process, if initial is None, it is set randomly in the range of bounds
+        """
+        if initial is None:
+            self._last = np.random.uniform(low=self.bounds[0], high=self.bounds[1])
+            self.proc.mean = self._last
+            # self.reserve = self._last
+        else:
+            self._last = initial
+            self.proc.mean = self._last
+            #self.reserve = self._last
         self._last_t = 0
         self._reserve = None