From 41ca387166563437797a8ac53ed02e70bcd00896 Mon Sep 17 00:00:00 2001 From: ViktorM Date: Wed, 3 Jul 2024 22:57:46 -0700 Subject: [PATCH] Increased time resolution for more precision performance tracking. --- rl_games/algos_torch/sac_agent.py | 15 +++++++-------- rl_games/common/a2c_common.py | 28 ++++++++++++++-------------- rl_games/torch_runner.py | 2 +- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/rl_games/algos_torch/sac_agent.py b/rl_games/algos_torch/sac_agent.py index fd79fb7a..d4010fc4 100644 --- a/rl_games/algos_torch/sac_agent.py +++ b/rl_games/algos_torch/sac_agent.py @@ -441,7 +441,7 @@ def clear_stats(self): self.algo_observer.after_clear_stats() def play_steps(self, random_exploration = False): - total_time_start = time.time() + total_time_start = time.perf_counter() total_update_time = 0 total_time = 0 step_time = 0.0 @@ -466,11 +466,10 @@ def play_steps(self, random_exploration = False): with torch.no_grad(): action = self.act(obs.float(), self.env_info["action_space"].shape, sample=True) - step_start = time.time() - + step_start = time.perf_counter() with torch.no_grad(): next_obs, rewards, dones, infos = self.env_step(action) - step_end = time.time() + step_end = time.perf_counter() self.current_rewards += rewards self.current_lengths += 1 @@ -500,7 +499,6 @@ def play_steps(self, random_exploration = False): self.obs = next_obs.clone() rewards = self.rewards_shaper(rewards) - self.replay_buffer.add(obs, action, torch.unsqueeze(rewards, 1), next_obs_processed, torch.unsqueeze(dones, 1)) if isinstance(obs, dict): @@ -508,9 +506,10 @@ def play_steps(self, random_exploration = False): if not random_exploration: self.set_train() - update_time_start = time.time() + + update_time_start = time.perf_counter() actor_loss_info, critic1_loss, critic2_loss = self.update(self.epoch_num) - update_time_end = time.time() + update_time_end = time.perf_counter() update_time = update_time_end - update_time_start self.extract_actor_stats(actor_losses, entropies, alphas, alpha_losses, actor_loss_info) @@ -521,7 +520,7 @@ def play_steps(self, random_exploration = False): total_update_time += update_time - total_time_end = time.time() + total_time_end = time.perf_counter() total_time = total_time_end - total_time_start play_time = total_time - total_update_time diff --git a/rl_games/common/a2c_common.py b/rl_games/common/a2c_common.py index f9bd5a14..19b95985 100644 --- a/rl_games/common/a2c_common.py +++ b/rl_games/common/a2c_common.py @@ -757,9 +757,9 @@ def play_steps(self): if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) - step_time_start = time.time() + step_time_start = time.perf_counter() self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) - step_time_end = time.time() + step_time_end = time.perf_counter() step_time += (step_time_end - step_time_start) @@ -830,9 +830,9 @@ def play_steps_rnn(self): if self.has_central_value: self.experience_buffer.update_data('states', n, self.obs['states']) - step_time_start = time.time() + step_time_start = time.perf_counter() self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) - step_time_end = time.time() + step_time_end = time.perf_counter() step_time += (step_time_end - step_time_start) @@ -920,7 +920,7 @@ def train_epoch(self): super().train_epoch() self.set_eval() - play_time_start = time.time() + play_time_start = time.perf_counter() with torch.no_grad(): if self.is_rnn: @@ -930,8 +930,8 @@ def train_epoch(self): self.set_train() - play_time_end = time.time() - update_time_start = time.time() + play_time_end = time.perf_counter() + update_time_start = time.perf_counter() rnn_masks = batch_dict.get('rnn_masks', None) self.curr_frames = batch_dict.pop('played_frames') @@ -966,7 +966,7 @@ def train_epoch(self): if self.normalize_input: self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch - update_time_end = time.time() + update_time_end = time.perf_counter() play_time = play_time_end - play_time_start update_time = update_time_end - update_time_start total_time = update_time_end - play_time_start @@ -1034,7 +1034,7 @@ def prepare_dataset(self, batch_dict): def train(self): self.init_tensors() self.mean_rewards = self.last_mean_rewards = -100500 - start_time = time.time() + start_time = time.perf_counter() total_time = 0 rep_count = 0 # self.frame = 0 # loading from checkpoint @@ -1183,15 +1183,15 @@ def train_epoch(self): super().train_epoch() self.set_eval() - play_time_start = time.time() + play_time_start = time.perf_counter() with torch.no_grad(): if self.is_rnn: batch_dict = self.play_steps_rnn() else: batch_dict = self.play_steps() - play_time_end = time.time() - update_time_start = time.time() + play_time_end = time.perf_counter() + update_time_start = time.perf_counter() rnn_masks = batch_dict.get('rnn_masks', None) self.set_train() @@ -1240,7 +1240,7 @@ def train_epoch(self): if self.normalize_input: self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch - update_time_end = time.time() + update_time_end = time.perf_counter() play_time = play_time_end - play_time_start update_time = update_time_end - update_time_start total_time = update_time_end - play_time_start @@ -1310,7 +1310,7 @@ def prepare_dataset(self, batch_dict): def train(self): self.init_tensors() self.last_mean_rewards = -100500 - start_time = time.time() + start_time = time.perf_counter() total_time = 0 rep_count = 0 self.obs = self.env_reset() diff --git a/rl_games/torch_runner.py b/rl_games/torch_runner.py index 4377d29b..86be48ac 100644 --- a/rl_games/torch_runner.py +++ b/rl_games/torch_runner.py @@ -63,7 +63,7 @@ def __init__(self, algo_observer=None): self.algo_observer = algo_observer if algo_observer else DefaultAlgoObserver() torch.backends.cudnn.benchmark = True - ### it didnot help for lots for openai gym envs anyway :( + ### it did not help for lots for openai gym envs anyway :( #torch.backends.cudnn.deterministic = True #torch.use_deterministic_algorithms(True)