Denys88 · Denys88 · Jul 4, 2024 · Jul 4, 2024 · Jul 4, 2024
diff --git a/README.md b/README.md
@@ -67,10 +67,10 @@ Explore RL Games quick and easily in colab notebooks:
 
 ## Installation
 
-For maximum training performance a preliminary installation of Pytorch 1.9+ with CUDA 11.1+ is highly recommended:
+For maximum training performance a preliminary installation of Pytorch 2.2 or newer with CUDA 12.1 or newer is highly recommended:
 
-```conda install pytorch torchvision cudatoolkit=11.3 -c pytorch -c nvidia``` or:
-```pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html```
+```conda install pytorch torchvision pytorch-cuda=12.1 -c pytorch -c nvidia``` or:
+```pip install pip3 install torch torchvision```
 
 Then:
 

diff --git a/rl_games/algos_torch/sac_agent.py b/rl_games/algos_torch/sac_agent.py
@@ -441,7 +441,7 @@ def clear_stats(self):
         self.algo_observer.after_clear_stats()
 
     def play_steps(self, random_exploration = False):
-        total_time_start = time.time()
+        total_time_start = time.perf_counter()
         total_update_time = 0
         total_time = 0
         step_time = 0.0
@@ -466,11 +466,10 @@ def play_steps(self, random_exploration = False):
                 with torch.no_grad():
                     action = self.act(obs.float(), self.env_info["action_space"].shape, sample=True)
 
-            step_start = time.time()
-
+            step_start = time.perf_counter()
             with torch.no_grad():
                 next_obs, rewards, dones, infos = self.env_step(action)
-            step_end = time.time()
+            step_end = time.perf_counter()
 
             self.current_rewards += rewards
             self.current_lengths += 1
@@ -500,17 +499,17 @@ def play_steps(self, random_exploration = False):
                 self.obs = next_obs.clone()
 
             rewards = self.rewards_shaper(rewards)
-
             self.replay_buffer.add(obs, action, torch.unsqueeze(rewards, 1), next_obs_processed, torch.unsqueeze(dones, 1))
 
             if isinstance(obs, dict):
                 obs = self.obs['obs']
 
             if not random_exploration:
                 self.set_train()
-                update_time_start = time.time()
+
+                update_time_start = time.perf_counter()
                 actor_loss_info, critic1_loss, critic2_loss = self.update(self.epoch_num)
-                update_time_end = time.time()
+                update_time_end = time.perf_counter()
                 update_time = update_time_end - update_time_start
 
                 self.extract_actor_stats(actor_losses, entropies, alphas, alpha_losses, actor_loss_info)
@@ -521,7 +520,7 @@ def play_steps(self, random_exploration = False):
 
             total_update_time += update_time
 
-        total_time_end = time.time()
+        total_time_end = time.perf_counter()
         total_time = total_time_end - total_time_start
         play_time = total_time - total_update_time
 

diff --git a/rl_games/common/a2c_common.py b/rl_games/common/a2c_common.py
@@ -757,9 +757,9 @@ def play_steps(self):
             if self.has_central_value:
                 self.experience_buffer.update_data('states', n, self.obs['states'])
 
-            step_time_start = time.time()
+            step_time_start = time.perf_counter()
             self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
-            step_time_end = time.time()
+            step_time_end = time.perf_counter()
 
             step_time += (step_time_end - step_time_start)
 
@@ -830,9 +830,9 @@ def play_steps_rnn(self):
             if self.has_central_value:
                 self.experience_buffer.update_data('states', n, self.obs['states'])
 
-            step_time_start = time.time()
+            step_time_start = time.perf_counter()
             self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
-            step_time_end = time.time()
+            step_time_end = time.perf_counter()
 
             step_time += (step_time_end - step_time_start)
 
@@ -920,7 +920,7 @@ def train_epoch(self):
         super().train_epoch()
 
         self.set_eval()
-        play_time_start = time.time()
+        play_time_start = time.perf_counter()
 
         with torch.no_grad():
             if self.is_rnn:
@@ -930,8 +930,8 @@ def train_epoch(self):
 
         self.set_train()
 
-        play_time_end = time.time()
-        update_time_start = time.time()
+        play_time_end = time.perf_counter()
+        update_time_start = time.perf_counter()
         rnn_masks = batch_dict.get('rnn_masks', None)
 
         self.curr_frames = batch_dict.pop('played_frames')
@@ -966,7 +966,7 @@ def train_epoch(self):
             if self.normalize_input:
                 self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch
 
-        update_time_end = time.time()
+        update_time_end = time.perf_counter()
         play_time = play_time_end - play_time_start
         update_time = update_time_end - update_time_start
         total_time = update_time_end - play_time_start
@@ -1034,7 +1034,7 @@ def prepare_dataset(self, batch_dict):
     def train(self):
         self.init_tensors()
         self.mean_rewards = self.last_mean_rewards = -100500
-        start_time = time.time()
+        start_time = time.perf_counter()
         total_time = 0
         rep_count = 0
         # self.frame = 0  # loading from checkpoint
@@ -1183,15 +1183,15 @@ def train_epoch(self):
         super().train_epoch()
 
         self.set_eval()
-        play_time_start = time.time()
+        play_time_start = time.perf_counter()
         with torch.no_grad():
             if self.is_rnn:
                 batch_dict = self.play_steps_rnn()
             else:
                 batch_dict = self.play_steps()
 
-        play_time_end = time.time()
-        update_time_start = time.time()
+        play_time_end = time.perf_counter()
+        update_time_start = time.perf_counter()
         rnn_masks = batch_dict.get('rnn_masks', None)
 
         self.set_train()
@@ -1240,7 +1240,7 @@ def train_epoch(self):
             if self.normalize_input:
                 self.model.running_mean_std.eval() # don't need to update statstics more than one miniepoch
 
-        update_time_end = time.time()
+        update_time_end = time.perf_counter()
         play_time = play_time_end - play_time_start
         update_time = update_time_end - update_time_start
         total_time = update_time_end - play_time_start
@@ -1310,7 +1310,7 @@ def prepare_dataset(self, batch_dict):
     def train(self):
         self.init_tensors()
         self.last_mean_rewards = -100500
-        start_time = time.time()
+        start_time = time.perf_counter()
         total_time = 0
         rep_count = 0
         self.obs = self.env_reset()

diff --git a/rl_games/torch_runner.py b/rl_games/torch_runner.py
@@ -63,7 +63,7 @@ def __init__(self, algo_observer=None):
 
         self.algo_observer = algo_observer if algo_observer else DefaultAlgoObserver()
         torch.backends.cudnn.benchmark = True
-        ### it didnot help for lots for openai gym envs anyway :(
+        ### it did not help for lots for openai gym envs anyway :(
         #torch.backends.cudnn.deterministic = True
         #torch.use_deterministic_algorithms(True)