From 41ca387166563437797a8ac53ed02e70bcd00896 Mon Sep 17 00:00:00 2001
From: ViktorM <victor.makoviychuk@gmail.com>
Date: Wed, 3 Jul 2024 22:57:46 -0700
Subject: [PATCH] Increased time resolution for more precision performance
 tracking.

---
 rl_games/algos_torch/sac_agent.py | 15 +++++++--------
 rl_games/common/a2c_common.py     | 28 ++++++++++++++--------------
 rl_games/torch_runner.py          |  2 +-
 3 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/rl_games/algos_torch/sac_agent.py b/rl_games/algos_torch/sac_agent.py
index fd79fb7a..d4010fc4 100644
--- a/rl_games/algos_torch/sac_agent.py
+++ b/rl_games/algos_torch/sac_agent.py
@@ -441,7 +441,7 @@ def clear_stats(self):
         self.algo_observer.after_clear_stats()
 
     def play_steps(self, random_exploration = False):
-        total_time_start = time.time()
+        total_time_start = time.perf_counter()
         total_update_time = 0
         total_time = 0
         step_time = 0.0
@@ -466,11 +466,10 @@ def play_steps(self, random_exploration = False):
                 with torch.no_grad():
                     action = self.act(obs.float(), self.env_info["action_space"].shape, sample=True)
 
-            step_start = time.time()
-
+            step_start = time.perf_counter()
             with torch.no_grad():
                 next_obs, rewards, dones, infos = self.env_step(action)
-            step_end = time.time()
+            step_end = time.perf_counter()
 
             self.current_rewards += rewards
             self.current_lengths += 1
@@ -500,7 +499,6 @@ def play_steps(self, random_exploration = False):
                 self.obs = next_obs.clone()
 
             rewards = self.rewards_shaper(rewards)
-
             self.replay_buffer.add(obs, action, torch.unsqueeze(rewards, 1), next_obs_processed, torch.unsqueeze(dones, 1))
 
             if isinstance(obs, dict):
@@ -508,9 +506,10 @@ def play_steps(self, random_exploration = False):
 
             if not random_exploration:
                 self.set_train()
-                update_time_start = time.time()
+
+                update_time_start = time.perf_counter()
                 actor_loss_info, critic1_loss, critic2_loss = self.update(self.epoch_num)
-                update_time_end = time.time()
+                update_time_end = time.perf_counter()
                 update_time = update_time_end - update_time_start
 
                 self.extract_actor_stats(actor_losses, entropies, alphas, alpha_losses, actor_loss_info)
@@ -521,7 +520,7 @@ def play_steps(self, random_exploration = False):
 
             total_update_time += update_time
 
-        total_time_end = time.time()
+        total_time_end = time.perf_counter()
         total_time = total_time_end - total_time_start
         play_time = total_time - total_update_time
 
diff --git a/rl_games/common/a2c_common.py b/rl_games/common/a2c_common.py
index f9bd5a14..19b95985 100644
--- a/rl_games/common/a2c_common.py
+++ b/rl_games/common/a2c_common.py
@@ -757,9 +757,9 @@ def play_steps(self):
             if self.has_central_value:
                 self.experience_buffer.update_data('states', n, self.obs['states'])
 
-            step_time_start = time.time()
+            step_time_start = time.perf_counter()
             self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
-            step_time_end = time.time()
+            step_time_end = time.perf_counter()
 
             step_time += (step_time_end - step_time_start)
 
@@ -830,9 +830,9 @@ def play_steps_rnn(self):
             if self.has_central_value:
                 self.experience_buffer.update_data('states', n, self.obs['states'])
 
-            step_time_start = time.time()
+            step_time_start = time.perf_counter()
             self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
-            step_time_end = time.time()
+            step_time_end = time.perf_counter()
 
             step_time += (step_time_end - step_time_start)
 
@@ -920,7 +920,7 @@ def train_epoch(self):
         super().train_epoch()
 
         self.set_eval()
-        play_time_start = time.time()
+        play_time_start = time.perf_counter()
 
         with torch.no_grad():
             if self.is_rnn:
@@ -930,8 +930,8 @@ def train_epoch(self):
 
         self.set_train()
 
-        play_time_end = time.time()
-        update_time_start = time.time()
+        play_time_end = time.perf_counter()
+        update_time_start = time.perf_counter()
         rnn_masks = batch_dict.get('rnn_masks', None)
 
         self.curr_frames = batch_dict.pop('played_frames')
@@ -966,7 +966,7 @@ def train_epoch(self):
             if self.normalize_input:
                 self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch
 
-        update_time_end = time.time()
+        update_time_end = time.perf_counter()
         play_time = play_time_end - play_time_start
         update_time = update_time_end - update_time_start
         total_time = update_time_end - play_time_start
@@ -1034,7 +1034,7 @@ def prepare_dataset(self, batch_dict):
     def train(self):
         self.init_tensors()
         self.mean_rewards = self.last_mean_rewards = -100500
-        start_time = time.time()
+        start_time = time.perf_counter()
         total_time = 0
         rep_count = 0
         # self.frame = 0  # loading from checkpoint
@@ -1183,15 +1183,15 @@ def train_epoch(self):
         super().train_epoch()
 
         self.set_eval()
-        play_time_start = time.time()
+        play_time_start = time.perf_counter()
         with torch.no_grad():
             if self.is_rnn:
                 batch_dict = self.play_steps_rnn()
             else:
                 batch_dict = self.play_steps()
 
-        play_time_end = time.time()
-        update_time_start = time.time()
+        play_time_end = time.perf_counter()
+        update_time_start = time.perf_counter()
         rnn_masks = batch_dict.get('rnn_masks', None)
 
         self.set_train()
@@ -1240,7 +1240,7 @@ def train_epoch(self):
             if self.normalize_input:
                 self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch
 
-        update_time_end = time.time()
+        update_time_end = time.perf_counter()
         play_time = play_time_end - play_time_start
         update_time = update_time_end - update_time_start
         total_time = update_time_end - play_time_start
@@ -1310,7 +1310,7 @@ def prepare_dataset(self, batch_dict):
     def train(self):
         self.init_tensors()
         self.last_mean_rewards = -100500
-        start_time = time.time()
+        start_time = time.perf_counter()
         total_time = 0
         rep_count = 0
         self.obs = self.env_reset()
diff --git a/rl_games/torch_runner.py b/rl_games/torch_runner.py
index 4377d29b..86be48ac 100644
--- a/rl_games/torch_runner.py
+++ b/rl_games/torch_runner.py
@@ -63,7 +63,7 @@ def __init__(self, algo_observer=None):
 
         self.algo_observer = algo_observer if algo_observer else DefaultAlgoObserver()
         torch.backends.cudnn.benchmark = True
-        ### it didnot help for lots for openai gym envs anyway :(
+        ### it did not help for lots for openai gym envs anyway :(
         #torch.backends.cudnn.deterministic = True
         #torch.use_deterministic_algorithms(True)