diff --git a/hand_vil/README.md b/hand_vil/README.md new file mode 100644 index 0000000..3c665bd --- /dev/null +++ b/hand_vil/README.md @@ -0,0 +1,50 @@ +# hand_vil +Learning Deep Visuomotor Policies for Dexterous Hand Manipulation + +## Setup + +Each repository above contains detailed setup instructions. +1. **Step 1:** Install [mjrl](https://github.com/aravindr93/mjrl), using instructions in the repository ([direct link](https://github.com/aravindr93/mjrl/tree/master/setup)). `mjrl` comes with an anaconda environment which helps to easily import and use a variety of MuJoCo tasks. +2. **Step 2:** Install [mj_envs](https://github.com/vikashplus/mj_envs) by following the instructions in the repository. Note that `mj_envs` uses git submodules, and hence must be cloned correctly per instructions in the repo. +3. **Step 3:** After setting up `mjrl` and `mj_envs`, Add them to your python path alongside `hand_vil`. +``` +$ export PYTHONPATH=$PYTHONPATH:/mjrl +$ export PYTHONPATH=$PYTHONPATH:/mj_envs +$ export PYTHONPATH=$PYTHONPATH:/hand_dapg/hand_vil +``` + +## Training the Visuomotor policies + +1. **Step 1:** Make a "local_settings.py" file and set the variable "MAIN_DIR" to point to the root folder of the project. Consult local_settings.py.sample. + +3. **Step 3** We already have the expert policies for each of the environments fetched from [hand_dapg](https://github.com/aravindr93/hand_dapg). So we are ready to train the visual policy for any of the above 4 environments. +* It is highly reccomended that you use a machine with a GPU for faster training. If you are not planning on using a GPU, make sure to set `use_cuda` in the config to False. +* All the training for the different environments are present in configs/ +* Move the config that you want to run to the root project directory. For example to use the Hand Hammer config run the following command: +``` +mv configs/config_main_hammer.py config_main.py +``` +* Now, we are ready the train the visual model. +``` +$ python run.py +``` + +Note that this will save the generated training data to `gen_data/data//train_data` +and will save the generated validation data to `gen_data/data//val_data`, and the trained policy +to `gen_data/data//_viz_policy`, + +## Visualizing a trained policy +Once you have the trained policy you can set the appropriate `CAMERA_NAME`, `ENV_NAME`, `VIZ_FOLDER` and `FULL_POLICY_PATH` (the trained policies are going to be saved in the `gen_data` folder) in the `viz_policy.py`. This will save multiple episodes of the trained policy being unrolled under `gen_data/results/vides/VIZ_FOLDER`. + +## Bibliography + +If you use the code in this or associated repositories above, please cite the following paper. +``` +@INPROCEEDINGS{Jain-ICRA-19, + AUTHOR = {Divye Jain AND Andrew Li AND Shivam Singhal AND + Aravind Rajeswaran AND Vikash Kumar AND Emanuel Todorov}, + TITLE = "{Learning Deep Visuomotor Policies for Dexterous Hand Manipulation}", + BOOKTITLE = {International Conference on Robotics and Automation (ICRA)}, + YEAR = {2019}, +} +``` diff --git a/hand_vil/__init__.py b/hand_vil/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hand_vil/config_main.py b/hand_vil/config_main.py new file mode 100644 index 0000000..14cc438 --- /dev/null +++ b/hand_vil/config_main.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + id_post= "public_final_20traj_10ep", + camera_name= "vil_camera", + device_id= 0, + train_expert= False, + beta_start= 1, + use_cuda= True, + beta_decay= 0.2, + env_id_il= "mjrl_SHAP_door_handle-v5", + traj_budget_expert= 12500, + num_traj_expert= 50, + sliding_window= 80, + val_traj_per_file= 5, + horizon_il= 150, + batch_size_viz_pol= 128, + use_late_fusion= True, + dagger_epoch= 20, + viz_policy_folder_dagger= "dagger_hand_door_viz_policy", + use_tactile= True, + expert_policy_folder= "hand_door_expert", + trainer_epochs= 10, + eval_num_traj= 100, + bc_epoch= 20, + env_name= "hand_door", + train_traj_per_file= 20, + seed= 3000, + num_files_val= 1, + delta= 0.01, + has_robot_info= True, + num_files_train= 1, + gen_traj_dagger_ep= 20, + lr= 0.0003 +) diff --git a/hand_vil/configs/config_main_door.py b/hand_vil/configs/config_main_door.py new file mode 100644 index 0000000..14cc438 --- /dev/null +++ b/hand_vil/configs/config_main_door.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + id_post= "public_final_20traj_10ep", + camera_name= "vil_camera", + device_id= 0, + train_expert= False, + beta_start= 1, + use_cuda= True, + beta_decay= 0.2, + env_id_il= "mjrl_SHAP_door_handle-v5", + traj_budget_expert= 12500, + num_traj_expert= 50, + sliding_window= 80, + val_traj_per_file= 5, + horizon_il= 150, + batch_size_viz_pol= 128, + use_late_fusion= True, + dagger_epoch= 20, + viz_policy_folder_dagger= "dagger_hand_door_viz_policy", + use_tactile= True, + expert_policy_folder= "hand_door_expert", + trainer_epochs= 10, + eval_num_traj= 100, + bc_epoch= 20, + env_name= "hand_door", + train_traj_per_file= 20, + seed= 3000, + num_files_val= 1, + delta= 0.01, + has_robot_info= True, + num_files_train= 1, + gen_traj_dagger_ep= 20, + lr= 0.0003 +) diff --git a/hand_vil/configs/config_main_hammer.py b/hand_vil/configs/config_main_hammer.py new file mode 100644 index 0000000..1aa57f1 --- /dev/null +++ b/hand_vil/configs/config_main_hammer.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + bc_epoch= 20, + expert_policy_folder= "hand_hammer_expert", + env_id= "mjrl_hammer-v0", + val_traj_per_file= 5, + has_robot_info= True, + gen_traj_dagger_ep= 50, + trainer_epochs= 10, + seed= 1000, + id_post= "public_final_20traj_10ep", + num_files_train= 1, + use_late_fusion= True, + num_files_val= 1, + camera_name= "vil_camera", + dagger_epoch= 20, + beta_decay= 0.2, + viz_policy_folder_dagger= "dagger_hand_hammer_viz_policy", + eval_num_traj= 100, + device_id= 0, + delta= 0.01, + env_name= "hand_hammer", + sliding_window= 80, + use_tactile= True, + batch_size_viz_pol= 128, + use_cuda= True, + lr= 0.0003, + horizon_il= 150, + train_expert= False, + num_traj_expert= 20, + traj_budget_expert= 12500, + train_traj_per_file= 20, + beta_start= 1 +) diff --git a/hand_vil/configs/config_main_pen_def.py b/hand_vil/configs/config_main_pen_def.py new file mode 100644 index 0000000..711a634 --- /dev/null +++ b/hand_vil/configs/config_main_pen_def.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + lr= 0.0003, + delta= 0.01, + traj_budget_expert= 12500, + use_cuda= True, + num_files_val= 1, + camera_name= "vil_camera", + seed= 1000, + num_files_train= 1, + batch_size_viz_pol= 128, + id_post= "def_public_best_20traj_10ep", + dagger_epoch= 10, + env_id= "mjrl_pen_reposition-v2", + trainer_epochs= 10, + val_traj_per_file= 5, + viz_policy_folder_dagger= "dagger_hand_pen_def_viz_policy", + device_id= 0, + expert_policy_folder= "hand_pen_expert", + train_traj_per_file= 20, + bc_epoch= 20, + env_name= "hand_pen", + beta_start= 1, + has_robot_info= True, + gen_traj_dagger_ep= 20, + use_late_fusion= True, + use_tactile= True, + sliding_window= 80, + train_expert= False, + eval_num_traj= 100, + horizon_il= 150, + beta_decay= 0.2, + num_traj_expert= 50 +) diff --git a/hand_vil/configs/config_main_pen_v1.py b/hand_vil/configs/config_main_pen_v1.py new file mode 100644 index 0000000..221f82d --- /dev/null +++ b/hand_vil/configs/config_main_pen_v1.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + lr= 0.0003, + delta= 0.01, + traj_budget_expert= 12500, + use_cuda= True, + num_files_val= 1, + camera_name= "view_1", + seed= 1000, + num_files_train= 1, + batch_size_viz_pol= 128, + id_post= "view_1_public_best_20trag_10ep", + dagger_epoch= 10, + env_id= "mjrl_pen_reposition-v2", + trainer_epochs= 10, + val_traj_per_file= 5, + viz_policy_folder_dagger= "dagger_hand_pen_view_1_viz_policy", + device_id= 0, + expert_policy_folder= "hand_pen_expert", + train_traj_per_file= 20, + bc_epoch= 20, + env_name= "hand_pen", + beta_start= 1, + has_robot_info= True, + gen_traj_dagger_ep= 20, + use_late_fusion= True, + use_tactile= True, + sliding_window= 80, + train_expert= False, + eval_num_traj= 100, + horizon_il= 150, + beta_decay= 0.2, + num_traj_expert= 50 +) diff --git a/hand_vil/configs/config_main_pen_v2.py b/hand_vil/configs/config_main_pen_v2.py new file mode 100644 index 0000000..51c4e76 --- /dev/null +++ b/hand_vil/configs/config_main_pen_v2.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + lr= 0.0003, + delta= 0.01, + traj_budget_expert= 12500, + use_cuda= True, + num_files_val= 1, + camera_name= "view_2", + seed= 1000, + num_files_train= 1, + batch_size_viz_pol= 128, + id_post= "view_2_public_best_20trag_10ep", + dagger_epoch= 10, + env_id= "mjrl_pen_reposition-v2", + trainer_epochs= 10, + val_traj_per_file= 5, + viz_policy_folder_dagger= "dagger_hand_pen_view_2_viz_policy", + device_id= 0, + expert_policy_folder= "hand_pen_expert", + train_traj_per_file= 20, + bc_epoch= 20, + env_name= "hand_pen", + beta_start= 1, + has_robot_info= True, + gen_traj_dagger_ep= 20, + use_late_fusion= True, + use_tactile= True, + sliding_window= 80, + train_expert= False, + eval_num_traj= 100, + horizon_il= 150, + beta_decay= 0.2, + num_traj_expert= 50 +) diff --git a/hand_vil/configs/config_main_pen_v3.py b/hand_vil/configs/config_main_pen_v3.py new file mode 100644 index 0000000..32c2c1f --- /dev/null +++ b/hand_vil/configs/config_main_pen_v3.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + lr= 0.0003, + delta= 0.01, + traj_budget_expert= 12500, + use_cuda= True, + num_files_val= 1, + camera_name= "view_3", + seed= 1000, + num_files_train= 1, + batch_size_viz_pol= 128, + id_post= "view_3_public_best_20trag_10ep", + dagger_epoch= 10, + env_id= "mjrl_pen_reposition-v2", + trainer_epochs= 10, + val_traj_per_file= 5, + viz_policy_folder_dagger= "dagger_hand_pen_view_3_viz_policy", + device_id= 0, + expert_policy_folder= "hand_pen_expert", + train_traj_per_file= 20, + bc_epoch= 20, + env_name= "hand_pen", + beta_start= 1, + has_robot_info= True, + gen_traj_dagger_ep= 20, + use_late_fusion= True, + use_tactile= True, + sliding_window= 80, + train_expert= False, + eval_num_traj= 100, + horizon_il= 150, + beta_decay= 0.2, + num_traj_expert= 50 +) diff --git a/hand_vil/configs/config_main_pen_v4.py b/hand_vil/configs/config_main_pen_v4.py new file mode 100644 index 0000000..5f5d509 --- /dev/null +++ b/hand_vil/configs/config_main_pen_v4.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + lr= 0.0003, + delta= 0.01, + traj_budget_expert= 12500, + use_cuda= True, + num_files_val= 1, + camera_name= "view_4", + seed= 1000, + num_files_train= 1, + batch_size_viz_pol= 128, + id_post= "view_4_public_best_20trag_10ep", + dagger_epoch= 10, + env_id= "mjrl_pen_reposition-v2", + trainer_epochs= 10, + val_traj_per_file= 5, + viz_policy_folder_dagger= "dagger_hand_pen_view_4_viz_policy", + device_id= 0, + expert_policy_folder= "hand_pen_expert", + train_traj_per_file= 20, + bc_epoch= 20, + env_name= "hand_pen", + beta_start= 1, + has_robot_info= True, + gen_traj_dagger_ep= 20, + use_late_fusion= True, + use_tactile= True, + sliding_window= 80, + train_expert= False, + eval_num_traj= 100, + horizon_il= 150, + beta_decay= 0.2, + num_traj_expert= 50 +) diff --git a/hand_vil/configs/config_main_pen_v5.py b/hand_vil/configs/config_main_pen_v5.py new file mode 100644 index 0000000..a6995ce --- /dev/null +++ b/hand_vil/configs/config_main_pen_v5.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + lr= 0.0003, + delta= 0.01, + traj_budget_expert= 12500, + use_cuda= True, + num_files_val= 1, + camera_name= "view_5", + seed= 1000, + num_files_train= 1, + batch_size_viz_pol= 128, + id_post= "view_5_public_best_20trag_10ep", + dagger_epoch= 10, + env_id= "mjrl_pen_reposition-v2", + trainer_epochs= 10, + val_traj_per_file= 5, + viz_policy_folder_dagger= "dagger_hand_pen_view_5_viz_policy", + device_id= 0, + expert_policy_folder= "hand_pen_expert", + train_traj_per_file= 20, + bc_epoch= 20, + env_name= "hand_pen", + beta_start= 1, + has_robot_info= True, + gen_traj_dagger_ep= 20, + use_late_fusion= True, + use_tactile= True, + sliding_window= 80, + train_expert= False, + eval_num_traj= 100, + horizon_il= 150, + beta_decay= 0.2, + num_traj_expert= 50 +) diff --git a/hand_vil/configs/config_main_pickup.py b/hand_vil/configs/config_main_pickup.py new file mode 100644 index 0000000..a96da37 --- /dev/null +++ b/hand_vil/configs/config_main_pickup.py @@ -0,0 +1,33 @@ +DEFAULT_CONFIG = dict( + train_traj_per_file= 20, + num_files_train= 1, + val_traj_per_file= 5, + id_post= "public_final_20traj_10ep", + horizon_il= 150, + eval_num_traj= 100, + has_robot_info= True, + gen_traj_dagger_ep= 20, + device_id= 0, + trainer_epochs= 10, + sliding_window= 80, + num_files_val= 1, + camera_name= "vil_camera", + viz_policy_folder_dagger= "dagger_hand_pickup_viz_policy", + dagger_epoch= 50, + traj_budget_expert= 12500, + lr= 0.0003, + use_cuda= True, + use_tactile= True, + seed= 1000, + delta= 0.01, + beta_start= 1, + env_id= "mjrl_SHAP_slide_pickup-v42", + num_traj_expert= 50, + bc_epoch= 20, + batch_size_viz_pol= 128, + use_late_fusion= True, + train_expert= False, + beta_decay= 0.2, + expert_policy_folder= "hand_pickup_expert", + env_name= "hand_pickup" +) diff --git a/hand_vil/local_settings.py.sample b/hand_vil/local_settings.py.sample new file mode 100644 index 0000000..76edeed --- /dev/null +++ b/hand_vil/local_settings.py.sample @@ -0,0 +1 @@ +MAIN_DIR = '/Users/divye/Documents/research/vil_paper/hand_dapg/hand_vil' \ No newline at end of file diff --git a/hand_vil/requirements.txt b/hand_vil/requirements.txt new file mode 100644 index 0000000..447a684 --- /dev/null +++ b/hand_vil/requirements.txt @@ -0,0 +1,80 @@ +absl-py==0.2.2 +astor==0.6.2 +bleach==1.5.0 +certifi==2018.1.18 +cffi==1.11.4 +chardet==3.0.4 +cycler==0.10.0 +Cython==0.27.3 +decorator==4.2.1 +entrypoints==0.2.3 +future==0.16.0 +gast==0.2.0 +glfw==1.5.1 +grpcio==1.12.1 +gym==0.9.3 +html5lib==0.9999999 +idna==2.6 +imageio==2.2.0 +ipykernel==4.8.0 +ipython==6.2.1 +ipython-genutils==0.2.0 +ipywidgets==7.1.1 +jedi==0.11.1 +Jinja2==2.10 +jsonschema==2.6.0 +jupyter==1.0.0 +jupyter-client==5.2.2 +jupyter-console==5.2.0 +jupyter-core==4.4.0 +Keras==2.1.4 +Markdown==2.6.11 +MarkupSafe==1.0 +matplotlib==2.1.2 +mistune==0.8.3 +-e git+https://github.com/openai/mujoco-py.git@6ac6ac203a875ef35b1505827264cadccbfd9f05#egg=mujoco_py +nbconvert==5.3.1 +nbformat==4.4.0 +notebook==5.4.0 +numpy==1.14.0 +olefile==0.45.1 +pandocfilters==1.4.2 +parso==0.1.1 +pexpect==4.3.1 +pickleshare==0.7.4 +Pillow==5.0.0 +prompt-toolkit==1.0.15 +protobuf==3.5.2.post1 +ptyprocess==0.5.2 +pycparser==2.18 +pyglet==1.3.1 +Pygments==2.2.0 +pyparsing==2.2.0 +python-dateutil==2.6.1 +pytz==2018.3 +PyYAML==3.12 +pyzmq==16.0.3 +qtconsole==4.3.1 +requests==2.18.4 +scipy==1.0.0 +Send2Trash==1.4.2 +simplegeneric==0.8.1 +six==1.11.0 +sk-video==1.1.10 +tabulate==0.7.5 +tensorboard==1.8.0 +tensorflow==1.8.0 +termcolor==1.1.0 +terminado==0.8.1 +testpath==0.3.1 +Theano==1.0.1 +torch==0.4.0 +torchvision==0.2.1 +tornado==4.5.3 +tqdm==4.23.4 +traitlets==4.3.2 +urllib3==1.22 +wcwidth==0.1.7 +webencodings==0.5.1 +Werkzeug==0.14.1 +widgetsnbextension==3.1.0 diff --git a/hand_vil/run.py b/hand_vil/run.py new file mode 100644 index 0000000..497c014 --- /dev/null +++ b/hand_vil/run.py @@ -0,0 +1,45 @@ +from hand_vil.run_utils import * +# Self-explanatory python stuff. +import time as timer +import json +import os +from hand_vil.settings import * + + +def main(config): + dump(config) + + ts = timer.time() + register_env(config) + + if config['train_expert']: + train_expert_policy(config) + print() + dump(config) + + gen_data_from_expert(config) + print() + dump(config) + + do_dagger(config) + print() + dump(config) + + print('Done with all steps') + print('total time taken = %f' % (timer.time() - ts)) + +def dump(config): + config_file = os.path.join(config['main_dir'], 'config.json') + + with open(config_file, 'w') as fp: + json.dump(config, fp) + + +if __name__ == '__main__': + config = DEFAULT_CONFIG + config['main_dir'] = os.path.join(DATA_DIR, '%s_%s' % (config['env_name'], config['id_post'])) + ensure_dir(config['main_dir']) + config['id'] = '%s_id_%s' % (config['env_name'], config['id_post']) + config['env_id'] = ENV_ID[config['env_name']] + + main(config) diff --git a/hand_vil/run_utils.py b/hand_vil/run_utils.py new file mode 100644 index 0000000..2cac044 --- /dev/null +++ b/hand_vil/run_utils.py @@ -0,0 +1,219 @@ +from mjrl.utils.transforms import ClipAction, ToCudaTensor +# loading dataset of observation, action, and image of state/sequence of +# images of previous states and current state [for context to capture motion info] +from mjrl.utils.dataset import get_dataset_from_files +# Torch dataloader for loading training and validation data into network [data as described above]. +from torch.utils.data import DataLoader +from hand_vil.settings import * +# Multi-layer perceptron policy +from mjrl.policies.gaussian_cnn import CNN +# DAgger [Dataset Aggregation] algorithm, as described in Ross et. al. paper +from mjrl.algos.dagger_vil import Dagger +from mjrl.utils.gym_env import GymEnv +from torchvision import transforms +from mjrl.samplers.vil_sampler import trajectory_generator +from gym.envs.registration import register + +from mjrl.policies.gaussian_mlp import MLP +from mjrl.baselines.mlp_baseline import MLPBaseline +from mjrl.algos.npg_cg import NPG +from mjrl.utils.train_agent import train_agent + +import numpy as np +import pickle +import glob +import time as timer + + +def train_expert_policy(config): + print('-' * 80) + previous_dir = os.getcwd() + ensure_dir(GEN_DATA_DIR) + os.chdir(GEN_DATA_DIR) + + print('Training Expert') + e = make_gym_env(config['env_id'], config) + policy = MLP(e.spec, hidden_sizes=(32, 32), seed=config['seed']) + baseline = MLPBaseline(e.spec, reg_coef=1e-3, batch_size=64, epochs=2, learn_rate=1e-3) + agent = NPG(e, policy, baseline, normalized_step_size=0.1, seed=config['seed'], save_logs=True) + + job_name = '%s_expert' % config['env_name'] + # Need to change where it dumps the policy + train_agent(job_name=job_name, + agent=agent, + seed=config['seed'], + niter=30, + gamma=0.995, + gae_lambda=0.97, + num_cpu=1, + sample_mode='trajectories', + num_traj=200, + save_freq=5, + evaluation_rollouts=5) + os.chdir(previous_dir) + os.rename(os.path.join(GEN_DATA_DIR, job_name, 'iterations/best_policy.pickle'), + os.path.join(EXPERT_POLICIES_DIR, EXPERT_POLICIES[config['env_name']])) + print('-' * 80) + +def gen_data_from_expert(config): + print('-' * 80) + + train_dir = os.path.join(config['main_dir'], 'train_data') + val_dir = os.path.join(config['main_dir'], 'val_data') + + e = make_gym_env(config['env_id'], config) + try: + gen_data(e, train_dir, config['num_files_train'], config['train_traj_per_file'], config) + except Exception as e: + os.rmdir(train_dir) + raise + try: + gen_data(e, val_dir, config['num_files_val'], config['val_traj_per_file'], config) + except Exception as e: + os.rmdir(val_dir) + raise + + del (e) + + +def do_dagger(config): + config['viz_policy_folder_dagger'] = 'dagger_%s_viz_policy' % config['env_name'] + viz_policy_folder_dagger = os.path.join(config['main_dir'], config['viz_policy_folder_dagger']) + + print('-' * 80) + if os.path.exists(viz_policy_folder_dagger): + print('DAgger: Viz policy already exists') + return + print('DAgger: Training viz policy now') + + ensure_dir(viz_policy_folder_dagger) + train_dataloader, val_dataloader, transformed_train_dataset, transformed_val_dataset = get_dataloaders_datasets(config) + + # policy = MLP(e.spec, hidden_sizes=(64,64), seed=SEED) + e = make_gym_env(config['env_id'], config) + robot_info_dim = len(e.env.env.get_proprioception(use_tactile=config['use_tactile'])) + + policy = CNN(action_dim=transformed_train_dataset.action_dim, + robot_info_dim=robot_info_dim, + action_stats=transformed_train_dataset.get_action_stats(), + robot_info_stats=transformed_train_dataset.get_robot_info_stats(), + use_late_fusion=config['use_late_fusion'], use_cuda=config['use_cuda']) + + ts = timer.time() + + expert_policy = pickle.load(open(get_expert_policy_path(config['env_name'], config), 'rb')) + + # frame_size = (128, 128) + dagger_algo = Dagger( + dagger_epochs=config['dagger_epoch'], + expert_policy=expert_policy, + viz_policy=policy, + old_data_loader=train_dataloader, + val_data_loader=val_dataloader, + log_dir=os.path.join(LOG_DIR, config['id'], 'dagger'), + pol_dir_name=viz_policy_folder_dagger, + save_epoch=1, + beta_decay=config['beta_decay'], + beta_start=config['beta_start'], + env=e, + lr=config['lr'], + num_traj_gen=config['gen_traj_dagger_ep'], + camera_name=config['camera_name'], + seed=config['seed'] + (config['num_files_train'] * config['train_traj_per_file']), + trainer_epochs=config['trainer_epochs'], + eval_num_traj=config['eval_num_traj'], + sliding_window=config['sliding_window'], + device_id=config['device_id'], + use_cuda=config['use_cuda'], + frame_size=FRAME_SIZE, + use_tactile=config['use_tactile']) + + dagger_algo.train() + trained_policy = dagger_algo.viz_policy + + print("time taken = %f" % (timer.time() - ts)) + del (e) + + +def get_dataloaders_datasets(config): + train_dir = os.path.join(config['main_dir'], 'train_data') + val_dir = os.path.join(config['main_dir'], 'val_data') + + train_path_files = glob.glob(os.path.join(train_dir, '*')) + val_path_files = glob.glob(os.path.join(val_dir, '*')) + + if config['use_cuda']: + transforms_list = [ClipAction(), ToCudaTensor()] + else: + transforms_list = [ClipAction()] + + transformed_train_dataset = get_dataset_from_files(train_path_files, + transform=transforms.Compose(transforms_list)) + transformed_val_dataset = get_dataset_from_files(val_path_files, + transform=transforms.Compose(transforms_list)) + + train_dataloader = DataLoader(transformed_train_dataset, + batch_size=config['batch_size_viz_pol'], + shuffle=True, + num_workers=4) + val_dataloader = DataLoader(transformed_val_dataset, + batch_size=config['batch_size_viz_pol'], + shuffle=True, + num_workers=4) + return train_dataloader, val_dataloader, transformed_train_dataset, transformed_val_dataset + + +def gen_data(env, data_dir, num_files, trajs_per_file, config): + if os.path.exists(data_dir): + print('%s folder already exists' % os.path.basename(data_dir)) + return + + ensure_dir(data_dir) + print('Generating %s' % os.path.basename(data_dir)) + expert_policy_path = get_expert_policy_path(config['env_name'], config) + expert_policy = pickle.load(open(expert_policy_path, 'rb')) + + for i in range(num_files): + seed = config['seed'] + i * trajs_per_file + paths = np.array( + trajectory_generator(N=trajs_per_file, + expert_policy=expert_policy, + viz_policy=None, + beta=1.0, + seed_offset=1, + env=env, + use_tactile=config['use_tactile'], + camera_name=config['camera_name'], + use_cuda=config['use_cuda'], + frame_size=FRAME_SIZE, + device_id=config['device_id'], + pegasus_seed=seed)) + + train_file = os.path.join(data_dir, 'train_paths_%s_batch_%d.pickle' % (config['env_name'], i)) + pickle.dump(paths, open(train_file, 'wb')) + + +def make_gym_env(id, config): + e = GymEnv(id) + config['env_spec'] = e.spec.__dict__ + return e + + +def ensure_dir(directory): + if not os.path.exists(directory): + os.makedirs(directory) + return directory + + +def get_expert_policy_path(env_name, config): + exp_p_p = os.path.join(EXPERT_POLICIES_DIR, EXPERT_POLICIES[env_name]) + print('Using: %s' % exp_p_p) + return exp_p_p + + +def register_env(config): + register( + id=config['env_id'], + entry_point=ENTRY_POINT[config['env_name']], + max_episode_steps=config['horizon_il'], + ) diff --git a/hand_vil/settings.py b/hand_vil/settings.py new file mode 100644 index 0000000..1b01a71 --- /dev/null +++ b/hand_vil/settings.py @@ -0,0 +1,64 @@ +from hand_vil.local_settings import MAIN_DIR +import os +from hand_vil.config_main import DEFAULT_CONFIG + + +GEN_DATA_DIR = os.path.join(MAIN_DIR, 'gen_data') + +RES_DIR = os.path.join(GEN_DATA_DIR, 'results') +VIDOES_FOLDER = os.path.join(RES_DIR, 'videos') +PLOTS_FOLDER = os.path.join(RES_DIR, 'plots') + + +DATA_DIR = os.path.join(GEN_DATA_DIR, 'data') +LOG_DIR = os.path.join(DATA_DIR, 'logs') + +POLICIES_DIR = os.path.join(DATA_DIR, 'policies') +TRAIN_DATA_DIR = os.path.join(DATA_DIR, 'train_data') +VAL_DATA_DIR = os.path.join(DATA_DIR, 'val_data') + +EXPERT_POLICIES_DIR = os.path.join(MAIN_DIR, '..', 'dapg', 'policies') +TRAIN_TRAJS = 5000 +TEST_TRAJS = 100 + +ENV_ID = { + 'hand_pickup': 'mjrl_SHAP_slide_pickup-v42', + 'hand_hammer': 'mjrl_hammer-v0', + 'hand_pen': 'mjrl_pen_reposition-v2', + 'hand_door': 'mjrl_SHAP_door_handle-v5', + 'point_mass': 'mjrl_point_mass-v1', +} + +EXPERT_POLICIES = { + # Add Other envs here + 'hand_pickup': 'relocate-v0.pickle', + 'hand_hammer': 'hammer-v0.pickle', + 'hand_pen': 'pen-v0.pickle', + 'hand_door': 'door-v0.pickle', + # 'point_mass': 'point_mass.pickle', +} + +ENTRY_POINT = { + 'hand_pickup': 'mj_envs.hand_manipulation_suite:RelocateEnvV0', + 'hand_hammer': 'mj_envs.hand_manipulation_suite:HammerEnvV0', + 'hand_pen': 'mj_envs.hand_manipulation_suite:PenEnvV0', + 'hand_door': 'mj_envs.hand_manipulation_suite:DoorEnvV0', + # 'point_mass': 'mj_envs.hand_manipulation_suite:PointMassEnv', +} + +VIZ_ENV_IDS = { + 'hand_hammer': 'hammer-v0', + 'hand_door': 'door-v0', + 'hand_pickup': 'relocate-v0', + 'hand_pen': 'pen-v0' +} + +FRAME_SIZE = (128, 128) + +CAMERA_NAME = { + 'hand_pickup': 'vil_camera', + 'hand_hammer': 'vil_camera', + 'hand_pen': 'vil_camera', + 'hand_door': 'vil_camera', + 'point_mass': 'top_view', +} diff --git a/hand_vil/viz_policy.py b/hand_vil/viz_policy.py new file mode 100644 index 0000000..170ec8f --- /dev/null +++ b/hand_vil/viz_policy.py @@ -0,0 +1,33 @@ +import pickle +from mjrl.utils.gym_env import GymEnv +from settings import * +import mj_envs +from mj_envs.utils import hand_vil + +def ensure_dir(directory): + if not os.path.exists(directory): + os.makedirs(directory) + return directory + + +CAMERA_NAME = 'vil_camera' +ENV_NAME = 'hand_door' +VIZ_FOLDER = 'hand_door_videos' +FULL_POLICY_PATH = '' + + +def main(): + e = GymEnv(VIZ_ENV_IDS[ENV_NAME]) + + policy = pickle.load(open(FULL_POLICY_PATH, 'rb')) + print('usind %d horizon', e.horizon) + policy.model.eval() + policy.old_model.eval() + hand_vil.visualize_policy_offscreen(gym_env=e, save_loc=ensure_dir(os.path.join(VIDOES_FOLDER, VIZ_FOLDER)) + '/', policy=policy, use_tactile=False, + num_episodes=3, horizon=e.horizon, mode='evaluation', + camera_name=CAMERA_NAME, pickle_dump=False, frame_size_model=FRAME_SIZE) + del (e) + + +if __name__ == '__main__': + main()