aravindr93 · Divye02 · May 18, 2019 · May 18, 2019 · May 18, 2019 · Jun 27, 2019
diff --git a/hand_vil/README.md b/hand_vil/README.md
@@ -0,0 +1,50 @@
+# hand_vil
+Learning Deep Visuomotor Policies for Dexterous Hand Manipulation
+
+## Setup
+
+Each repository above contains detailed setup instructions. 
+1. **Step 1:** Install [mjrl](https://github.com/aravindr93/mjrl), using instructions in the repository ([direct link](https://github.com/aravindr93/mjrl/tree/master/setup)). `mjrl` comes with an anaconda environment which helps to easily import and use a variety of MuJoCo tasks.
+2. **Step 2:** Install [mj_envs](https://github.com/vikashplus/mj_envs) by following the instructions in the repository. Note that `mj_envs` uses git submodules, and hence must be cloned correctly per instructions in the repo.
+3. **Step 3:** After setting up `mjrl` and `mj_envs`, Add them to your python path alongside `hand_vil`.
+```
+$ export PYTHONPATH=$PYTHONPATH:<your_path>/mjrl
+$ export PYTHONPATH=$PYTHONPATH:<your_path>/mj_envs
+$ export PYTHONPATH=$PYTHONPATH:<your_path>/hand_dapg/hand_vil
+```
+
+## Training the Visuomotor policies
+
+1. **Step 1:** Make a "local_settings.py" file and set the variable "MAIN_DIR" to point to the root folder of the project. Consult local_settings.py.sample.
+
+3. **Step 3** We already have the expert policies for each of the environments fetched from [hand_dapg](https://github.com/aravindr93/hand_dapg). So we are ready to train the visual policy for any of the above 4 environments.
+* It is highly reccomended that you use a machine with a GPU for faster training. If you are not planning on using a GPU, make sure to set `use_cuda` in the config to False.
+* All the training for the different environments are present in configs/
+* Move the config that you want to run to the root project directory. For example to use the Hand Hammer config run the following command:
+```
+mv configs/config_main_hammer.py config_main.py
+```
+* Now, we are ready the train the visual model.
+```
+$ python run.py
+```
+
+Note that this will save the generated training data to `gen_data/data/<name_of_run>/train_data`
+and will save the generated validation data to `gen_data/data/<name_of_run>/val_data`, and the trained policy
+to `gen_data/data/<name_of_run>/<abbr_run_name>_viz_policy`,
+
+## Visualizing a trained policy
+Once you have the trained policy you can set the appropriate `CAMERA_NAME`,  `ENV_NAME`, `VIZ_FOLDER` and `FULL_POLICY_PATH` (the trained policies are going to be saved in the `gen_data` folder) in the `viz_policy.py`. This will save multiple episodes of the trained policy being unrolled under `gen_data/results/vides/VIZ_FOLDER`.
+
+## Bibliography
+
+If you use the code in this or associated repositories above, please cite the following paper.
+```
+@INPROCEEDINGS{Jain-ICRA-19,
+    AUTHOR    = {Divye Jain AND Andrew Li AND Shivam Singhal AND 
+                 Aravind Rajeswaran AND Vikash Kumar AND Emanuel Todorov},
+    TITLE     = "{Learning Deep Visuomotor Policies for Dexterous Hand Manipulation}",
+    BOOKTITLE = {International Conference on Robotics and Automation (ICRA)},
+    YEAR      = {2019},
+}
+```
diff --git a/hand_vil/__init__.py b/hand_vil/__init__.py
diff --git a/hand_vil/config_main.py b/hand_vil/config_main.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  id_post= "public_final_20traj_10ep",
+  camera_name= "vil_camera",
+  device_id= 0,
+  train_expert= False,
+  beta_start= 1,
+  use_cuda= True,
+  beta_decay= 0.2,
+  env_id_il= "mjrl_SHAP_door_handle-v5",
+  traj_budget_expert= 12500,
+  num_traj_expert= 50,
+  sliding_window= 80,
+  val_traj_per_file= 5,
+  horizon_il= 150,
+  batch_size_viz_pol= 128,
+  use_late_fusion= True,
+  dagger_epoch= 20,
+  viz_policy_folder_dagger= "dagger_hand_door_viz_policy",
+  use_tactile= True,
+  expert_policy_folder= "hand_door_expert",
+  trainer_epochs= 10,
+  eval_num_traj= 100,
+  bc_epoch= 20,
+  env_name= "hand_door",
+  train_traj_per_file= 20,
+  seed= 3000,
+  num_files_val= 1,
+  delta= 0.01,
+  has_robot_info= True,
+  num_files_train= 1,
+  gen_traj_dagger_ep= 20,
+  lr= 0.0003
+)
diff --git a/hand_vil/configs/config_main_door.py b/hand_vil/configs/config_main_door.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  id_post= "public_final_20traj_10ep",
+  camera_name= "vil_camera",
+  device_id= 0,
+  train_expert= False,
+  beta_start= 1,
+  use_cuda= True,
+  beta_decay= 0.2,
+  env_id_il= "mjrl_SHAP_door_handle-v5",
+  traj_budget_expert= 12500,
+  num_traj_expert= 50,
+  sliding_window= 80,
+  val_traj_per_file= 5,
+  horizon_il= 150,
+  batch_size_viz_pol= 128,
+  use_late_fusion= True,
+  dagger_epoch= 20,
+  viz_policy_folder_dagger= "dagger_hand_door_viz_policy",
+  use_tactile= True,
+  expert_policy_folder= "hand_door_expert",
+  trainer_epochs= 10,
+  eval_num_traj= 100,
+  bc_epoch= 20,
+  env_name= "hand_door",
+  train_traj_per_file= 20,
+  seed= 3000,
+  num_files_val= 1,
+  delta= 0.01,
+  has_robot_info= True,
+  num_files_train= 1,
+  gen_traj_dagger_ep= 20,
+  lr= 0.0003
+)
diff --git a/hand_vil/configs/config_main_hammer.py b/hand_vil/configs/config_main_hammer.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  bc_epoch= 20,
+  expert_policy_folder= "hand_hammer_expert",
+  env_id= "mjrl_hammer-v0",
+  val_traj_per_file= 5,
+  has_robot_info= True,
+  gen_traj_dagger_ep= 50,
+  trainer_epochs= 10,
+  seed= 1000,
+  id_post= "public_final_20traj_10ep",
+  num_files_train= 1,
+  use_late_fusion= True,
+  num_files_val= 1,
+  camera_name= "vil_camera",
+  dagger_epoch= 20,
+  beta_decay= 0.2,
+  viz_policy_folder_dagger= "dagger_hand_hammer_viz_policy",
+  eval_num_traj= 100,
+  device_id= 0,
+  delta= 0.01,
+  env_name= "hand_hammer",
+  sliding_window= 80,
+  use_tactile= True,
+  batch_size_viz_pol= 128,
+  use_cuda= True,
+  lr= 0.0003,
+  horizon_il= 150,
+  train_expert= False,
+  num_traj_expert= 20,
+  traj_budget_expert= 12500,
+  train_traj_per_file= 20,
+  beta_start= 1
+)
diff --git a/hand_vil/configs/config_main_pen_def.py b/hand_vil/configs/config_main_pen_def.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  lr= 0.0003,
+  delta= 0.01,
+  traj_budget_expert= 12500,
+  use_cuda= True,
+  num_files_val= 1,
+  camera_name= "vil_camera",
+  seed= 1000,
+  num_files_train= 1,
+  batch_size_viz_pol= 128,
+  id_post= "def_public_best_20traj_10ep",
+  dagger_epoch= 10,
+  env_id= "mjrl_pen_reposition-v2",
+  trainer_epochs= 10,
+  val_traj_per_file= 5,
+  viz_policy_folder_dagger= "dagger_hand_pen_def_viz_policy",
+  device_id= 0,
+  expert_policy_folder= "hand_pen_expert",
+  train_traj_per_file= 20,
+  bc_epoch= 20,
+  env_name= "hand_pen",
+  beta_start= 1,
+  has_robot_info= True,
+  gen_traj_dagger_ep= 20,
+  use_late_fusion= True,
+  use_tactile= True,
+  sliding_window= 80,
+  train_expert= False,
+  eval_num_traj= 100,
+  horizon_il= 150,
+  beta_decay= 0.2,
+  num_traj_expert= 50
+)
diff --git a/hand_vil/configs/config_main_pen_v1.py b/hand_vil/configs/config_main_pen_v1.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  lr= 0.0003,
+  delta= 0.01,
+  traj_budget_expert= 12500,
+  use_cuda= True,
+  num_files_val= 1,
+  camera_name= "view_1",
+  seed= 1000,
+  num_files_train= 1,
+  batch_size_viz_pol= 128,
+  id_post= "view_1_public_best_20trag_10ep",
+  dagger_epoch= 10,
+  env_id= "mjrl_pen_reposition-v2",
+  trainer_epochs= 10,
+  val_traj_per_file= 5,
+  viz_policy_folder_dagger= "dagger_hand_pen_view_1_viz_policy",
+  device_id= 0,
+  expert_policy_folder= "hand_pen_expert",
+  train_traj_per_file= 20,
+  bc_epoch= 20,
+  env_name= "hand_pen",
+  beta_start= 1,
+  has_robot_info= True,
+  gen_traj_dagger_ep= 20,
+  use_late_fusion= True,
+  use_tactile= True,
+  sliding_window= 80,
+  train_expert= False,
+  eval_num_traj= 100,
+  horizon_il= 150,
+  beta_decay= 0.2,
+  num_traj_expert= 50
+)
diff --git a/hand_vil/configs/config_main_pen_v2.py b/hand_vil/configs/config_main_pen_v2.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  lr= 0.0003,
+  delta= 0.01,
+  traj_budget_expert= 12500,
+  use_cuda= True,
+  num_files_val= 1,
+  camera_name= "view_2",
+  seed= 1000,
+  num_files_train= 1,
+  batch_size_viz_pol= 128,
+  id_post= "view_2_public_best_20trag_10ep",
+  dagger_epoch= 10,
+  env_id= "mjrl_pen_reposition-v2",
+  trainer_epochs= 10,
+  val_traj_per_file= 5,
+  viz_policy_folder_dagger= "dagger_hand_pen_view_2_viz_policy",
+  device_id= 0,
+  expert_policy_folder= "hand_pen_expert",
+  train_traj_per_file= 20,
+  bc_epoch= 20,
+  env_name= "hand_pen",
+  beta_start= 1,
+  has_robot_info= True,
+  gen_traj_dagger_ep= 20,
+  use_late_fusion= True,
+  use_tactile= True,
+  sliding_window= 80,
+  train_expert= False,
+  eval_num_traj= 100,
+  horizon_il= 150,
+  beta_decay= 0.2,
+  num_traj_expert= 50
+)
diff --git a/hand_vil/configs/config_main_pen_v3.py b/hand_vil/configs/config_main_pen_v3.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  lr= 0.0003,
+  delta= 0.01,
+  traj_budget_expert= 12500,
+  use_cuda= True,
+  num_files_val= 1,
+  camera_name= "view_3",
+  seed= 1000,
+  num_files_train= 1,
+  batch_size_viz_pol= 128,
+  id_post= "view_3_public_best_20trag_10ep",
+  dagger_epoch= 10,
+  env_id= "mjrl_pen_reposition-v2",
+  trainer_epochs= 10,
+  val_traj_per_file= 5,
+  viz_policy_folder_dagger= "dagger_hand_pen_view_3_viz_policy",
+  device_id= 0,
+  expert_policy_folder= "hand_pen_expert",
+  train_traj_per_file= 20,
+  bc_epoch= 20,
+  env_name= "hand_pen",
+  beta_start= 1,
+  has_robot_info= True,
+  gen_traj_dagger_ep= 20,
+  use_late_fusion= True,
+  use_tactile= True,
+  sliding_window= 80,
+  train_expert= False,
+  eval_num_traj= 100,
+  horizon_il= 150,
+  beta_decay= 0.2,
+  num_traj_expert= 50
+)
diff --git a/hand_vil/configs/config_main_pen_v4.py b/hand_vil/configs/config_main_pen_v4.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  lr= 0.0003,
+  delta= 0.01,
+  traj_budget_expert= 12500,
+  use_cuda= True,
+  num_files_val= 1,
+  camera_name= "view_4",
+  seed= 1000,
+  num_files_train= 1,
+  batch_size_viz_pol= 128,
+  id_post= "view_4_public_best_20trag_10ep",
+  dagger_epoch= 10,
+  env_id= "mjrl_pen_reposition-v2",
+  trainer_epochs= 10,
+  val_traj_per_file= 5,
+  viz_policy_folder_dagger= "dagger_hand_pen_view_4_viz_policy",
+  device_id= 0,
+  expert_policy_folder= "hand_pen_expert",
+  train_traj_per_file= 20,
+  bc_epoch= 20,
+  env_name= "hand_pen",
+  beta_start= 1,
+  has_robot_info= True,
+  gen_traj_dagger_ep= 20,
+  use_late_fusion= True,
+  use_tactile= True,
+  sliding_window= 80,
+  train_expert= False,
+  eval_num_traj= 100,
+  horizon_il= 150,
+  beta_decay= 0.2,
+  num_traj_expert= 50
+)
diff --git a/hand_vil/configs/config_main_pen_v5.py b/hand_vil/configs/config_main_pen_v5.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  lr= 0.0003,
+  delta= 0.01,
+  traj_budget_expert= 12500,
+  use_cuda= True,
+  num_files_val= 1,
+  camera_name= "view_5",
+  seed= 1000,
+  num_files_train= 1,
+  batch_size_viz_pol= 128,
+  id_post= "view_5_public_best_20trag_10ep",
+  dagger_epoch= 10,
+  env_id= "mjrl_pen_reposition-v2",
+  trainer_epochs= 10,
+  val_traj_per_file= 5,
+  viz_policy_folder_dagger= "dagger_hand_pen_view_5_viz_policy",
+  device_id= 0,
+  expert_policy_folder= "hand_pen_expert",
+  train_traj_per_file= 20,
+  bc_epoch= 20,
+  env_name= "hand_pen",
+  beta_start= 1,
+  has_robot_info= True,
+  gen_traj_dagger_ep= 20,
+  use_late_fusion= True,
+  use_tactile= True,
+  sliding_window= 80,
+  train_expert= False,
+  eval_num_traj= 100,
+  horizon_il= 150,
+  beta_decay= 0.2,
+  num_traj_expert= 50
+)
diff --git a/hand_vil/configs/config_main_pickup.py b/hand_vil/configs/config_main_pickup.py
@@ -0,0 +1,33 @@
+DEFAULT_CONFIG = dict(
+  train_traj_per_file= 20,
+  num_files_train= 1,
+  val_traj_per_file= 5,
+  id_post= "public_final_20traj_10ep",
+  horizon_il= 150,
+  eval_num_traj= 100,
+  has_robot_info= True,
+  gen_traj_dagger_ep= 20,
+  device_id= 0,
+  trainer_epochs= 10,
+  sliding_window= 80,
+  num_files_val= 1,
+  camera_name= "vil_camera",
+  viz_policy_folder_dagger= "dagger_hand_pickup_viz_policy",
+  dagger_epoch= 50,
+  traj_budget_expert= 12500,
+  lr= 0.0003,
+  use_cuda= True,
+  use_tactile= True,
+  seed= 1000,
+  delta= 0.01,
+  beta_start= 1,
+  env_id= "mjrl_SHAP_slide_pickup-v42",
+  num_traj_expert= 50,
+  bc_epoch= 20,
+  batch_size_viz_pol= 128,
+  use_late_fusion= True,
+  train_expert= False,
+  beta_decay= 0.2,
+  expert_policy_folder= "hand_pickup_expert",
+  env_name= "hand_pickup"
+)
diff --git a/hand_vil/local_settings.py.sample b/hand_vil/local_settings.py.sample
@@ -0,0 +1 @@
+MAIN_DIR = '/Users/divye/Documents/research/vil_paper/hand_dapg/hand_vil'