Update plots for tutorial; larger font;

RL4AA · Feb 2, 2024 · 0cbd070 · 0cbd070
1 parent 600de2c
commit 0cbd070
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 5 deletions.
diff --git a/img/random_policy.png b/img/random_policy.png
diff --git a/img/trained_meta_policy.png b/img/trained_meta_policy.png
diff --git a/meta-rl/read_out_train.py b/meta-rl/read_out_train.py
@@ -9,9 +9,8 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
-from sympy import root
-
 from maml_rl.utils.reinforcement_learning import get_returns
+from sympy import root
 
 # from maml_rl.utils.torch_utils import to_numpy
 
@@ -175,7 +174,7 @@ def plot_progress(
     ax.set_title(title)
     ax.set_xlabel("Batches")
     ax.set_ylabel("Returns")
-    ax.legend()
+    ax.legend(loc="lower right")
     ax.grid(True)
 
     if save_folder:
@@ -285,14 +284,18 @@ def setup_and_plot(base_folder, experiment_name, experiment_type, ax, label_pref
         returns_mean_valid,
         nr_total_interactions,
     ) = read_train_data(my_dir=progress_folder)
+    fig = plt.figure(figsize=(6, 4))
+    ax = fig.add_subplot(111)
     plot_progress(
         returns_train,
         returns_valid,
         returns_mean_train,
         returns_mean_valid,
         title=f"Statistics for exp: {args.experiment_type}, "
         + f"total {nr_total_interactions} steps",
+        ax=ax,
     )
+    ax.set_ylim(-120, 0)  # For tutorial purposes
 
     data_train_individual, data_valid_individual = read_train_data_individual(
         my_dir=progress_folder

diff --git a/tutorial.ipynb b/tutorial.ipynb
@@ -594,7 +594,7 @@
     "\n",
     "Run the following code to train the task policy $\\varphi_0^0$ for 500 steps:\n",
     "\n",
-    "`python test.py --experiment-name tutorial --experiment-type adapt_from_scratch --num-batches=500 --plot-interval=50 --task-ids 0`\n",
+    "`python test.py --experiment-name tutorial --experiment-type adapt_from_scratch --num-batches 500 --plot-interval 50 --task-ids 0`\n",
     "\n",
     "Once it has run, you can look at the adaptation progress by running:\n",
     "\n",
@@ -655,7 +655,7 @@
     "\n",
     "We will now use a pre-trained policy located in `awake/pretrained_policy.th` and evalulate it against a certain number of fixed tasks.\n",
     "\n",
-    "`python test.py --experiment-name tutorial --experiment-type test_meta --use-meta-policy --policy awake/pretrained_policy.th --num-batches=500 --plot-interval=50 --task-ids 0 1 2 3 4`\n",
+    "`python test.py --experiment-name tutorial --experiment-type test_meta --use-meta-policy --policy awake/pretrained_policy.th --num-batches 500 --plot-interval 50 --task-ids 0 1 2 3 4`\n",
     "\n",
     "- use  `--task-ids 0 1 2 3 4` to run evaluation against all 5 tasks, or e.g. `--task-ids 0` to evaluate only for task 0.\n",
     "- here we set the flag `--use-meta-policy` so that it uses the pre-trained policy.\n",