diff --git a/cmd/root.go b/cmd/root.go index 67073d912e..a533033863 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`, log.Fatalf("cmd.Help function failed: %s", err) } }, - Version: "v1.3.0", + Version: "v1.4.0", } ) diff --git a/community/examples/cloud-batch.yaml b/community/examples/cloud-batch.yaml index 80ba4cfb51..acfede4f20 100644 --- a/community/examples/cloud-batch.yaml +++ b/community/examples/cloud-batch.yaml @@ -27,19 +27,19 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: appfs + source: modules/file-system/filestore kind: terraform - id: appfs use: [network1] settings: {local_mount: /sw} - - source: modules/scripts/startup-script + - id: hello-startup-script + source: modules/scripts/startup-script kind: terraform - id: hello-startup-script settings: runners: - type: shell @@ -53,9 +53,9 @@ deployment_groups: #!/bin/sh echo "Hello World" > /sw/hello.txt - - source: community/modules/scheduler/cloud-batch-job + - id: batch-job + source: community/modules/scheduler/cloud-batch-job kind: terraform - id: batch-job use: [network1, appfs, hello-startup-script] settings: runnable: "cat /sw/hello.txt" @@ -64,8 +64,8 @@ deployment_groups: family: centos-7 project: centos-cloud - - source: community/modules/scheduler/cloud-batch-login-node + - id: batch-login + source: community/modules/scheduler/cloud-batch-login-node kind: terraform - id: batch-login use: [batch-job] outputs: [instructions] diff --git a/community/examples/hpc-cluster-small-sharedvpc.yaml b/community/examples/hpc-cluster-small-sharedvpc.yaml index 4b87fbffd1..e70e22ba80 100644 --- a/community/examples/hpc-cluster-small-sharedvpc.yaml +++ b/community/examples/hpc-cluster-small-sharedvpc.yaml @@ -41,17 +41,17 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 settings: project_id: $(vars.host_project_id) network_name: your-shared-network subnetwork_name: your-shared-subnetwork - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home @@ -59,9 +59,9 @@ deployment_groups: network_name: $(network1.network_id) # This debug_partition will work out of the box without requesting additional GCP quota. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: debug_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: debug_partition use: - network1 - homefs @@ -73,9 +73,9 @@ deployment_groups: machine_type: n2-standard-2 # This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -83,9 +83,9 @@ deployment_groups: partition_name: compute max_node_count: 20 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -95,9 +95,9 @@ deployment_groups: login_node_count: 1 shared_vpc_host_project: $(vars.host_project_id) - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/community/examples/htcondor-pool.yaml b/community/examples/htcondor-pool.yaml index cb798fac3e..f4d82168cd 100644 --- a/community/examples/htcondor-pool.yaml +++ b/community/examples/htcondor-pool.yaml @@ -27,32 +27,32 @@ vars: deployment_groups: - group: htcondor modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 settings: network_name: htcondor-pool subnetwork_name: htcondor-pool-usc1 outputs: - network_name - - source: community/modules/scripts/htcondor-install + - id: htcondor_install + source: community/modules/scripts/htcondor-install kind: terraform - id: htcondor_install - - source: community/modules/project/service-enablement + - id: htcondor_services + source: community/modules/project/service-enablement kind: terraform - id: htcondor_services use: - htcondor_install - - source: community/modules/scheduler/htcondor-configure + - id: htcondor_configure + source: community/modules/scheduler/htcondor-configure kind: terraform - id: htcondor_configure - - source: modules/scripts/startup-script + - id: htcondor_configure_central_manager + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_central_manager settings: runners: - type: shell @@ -61,9 +61,9 @@ deployment_groups: - $(htcondor_install.install_htcondor_runner) - $(htcondor_configure.central_manager_runner) - - source: modules/compute/vm-instance + - id: htcondor_cm + source: modules/compute/vm-instance kind: terraform - id: htcondor_cm use: - network1 - htcondor_configure_central_manager @@ -78,9 +78,9 @@ deployment_groups: outputs: - internal_ip - - source: modules/scripts/startup-script + - id: htcondor_configure_execute_point + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_execute_point settings: runners: - type: shell @@ -89,9 +89,9 @@ deployment_groups: - $(htcondor_install.install_htcondor_runner) - $(htcondor_configure.execute_point_runner) - - source: community/modules/compute/htcondor-execute-point + - id: htcondor_execute_point + source: community/modules/compute/htcondor-execute-point kind: terraform - id: htcondor_execute_point use: - network1 - htcondor_configure_execute_point @@ -104,9 +104,9 @@ deployment_groups: scopes: - cloud-platform - - source: modules/scripts/startup-script + - id: htcondor_configure_access_point + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_access_point settings: runners: - type: shell @@ -128,9 +128,9 @@ deployment_groups: request_cpus = 1 request_memory = 100MB queue - - source: modules/compute/vm-instance + - id: htcondor_access + source: modules/compute/vm-instance kind: terraform - id: htcondor_access use: - network1 - htcondor_configure_access_point diff --git a/community/examples/intel/daos-cluster.yaml b/community/examples/intel/daos-cluster.yaml index ec7864d4a8..0fab4f4431 100644 --- a/community/examples/intel/daos-cluster.yaml +++ b/community/examples/intel/daos-cluster.yaml @@ -28,16 +28,16 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 # This module creates a DAOS server. Server images MUST be created before running this. # https://github.com/daos-stack/google-cloud-daos/tree/main/images # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server - - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 + - id: daos-server + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 kind: terraform - id: daos-server use: [network1] settings: number_of_instances: 2 @@ -46,9 +46,9 @@ deployment_groups: # This module creates a MIG with DAOS clients. Client images MUST be created before running this. # https://github.com/daos-stack/google-cloud-daos/tree/main/images # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_client - - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.2.1 + - id: daos-client + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.2.1 kind: terraform - id: daos-client use: [network1, daos-server] settings: number_of_instances: 2 diff --git a/community/examples/intel/daos-slurm.yaml b/community/examples/intel/daos-slurm.yaml index 10d2378a98..beb5598b3b 100644 --- a/community/examples/intel/daos-slurm.yaml +++ b/community/examples/intel/daos-slurm.yaml @@ -28,13 +28,13 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: "/home" @@ -42,9 +42,9 @@ deployment_groups: # This module creates a DAOS server. Server images MUST be created before running this. # https://github.com/daos-stack/google-cloud-daos/tree/main/images # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server - - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 + - id: daos + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 kind: terraform - id: daos use: [network1] settings: labels: {ghpc_role: file-system} @@ -68,9 +68,9 @@ deployment_groups: reclaim: "lazy" containers: [] - - source: modules/scripts/startup-script + - id: daos-client-script + source: modules/scripts/startup-script kind: terraform - id: daos-client-script settings: runners: - type: shell @@ -87,9 +87,9 @@ deployment_groups: destination: /var/daos/daos_client_config.sh ## This debug_partition will work out of the box without requesting additional GCP quota. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: debug_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: debug_partition use: - network1 - homefs @@ -100,9 +100,9 @@ deployment_groups: machine_type: n2-standard-2 # This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -110,9 +110,9 @@ deployment_groups: partition_name: compute max_node_count: 20 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -127,9 +127,9 @@ deployment_groups: - "https://www.googleapis.com/auth/devstorage.read_only" - "https://www.googleapis.com/auth/cloud-platform" - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/community/examples/intel/hpc-cluster-intel-select.yaml b/community/examples/intel/hpc-cluster-intel-select.yaml index ecfb42a703..6e6372a855 100644 --- a/community/examples/intel/hpc-cluster-intel-select.yaml +++ b/community/examples/intel/hpc-cluster-intel-select.yaml @@ -31,12 +31,12 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/scripts/startup-script + - id: startup_controller + source: modules/scripts/startup-script kind: terraform - id: startup_controller settings: runners: - type: shell @@ -47,9 +47,9 @@ deployment_groups: google_install_mpi --prefix /apps --intel_compliance outputs: - startup_script - - source: modules/scripts/startup-script + - id: startup_compute + source: modules/scripts/startup-script kind: terraform - id: startup_compute settings: runners: - type: shell @@ -78,17 +78,17 @@ deployment_groups: - startup_script - group: packer modules: - - source: modules/packer/custom-image + - id: controller-image + source: modules/packer/custom-image kind: packer - id: controller-image settings: disk_size: 20 source_image_project_id: [schedmd-slurm-public] source_image_family: schedmd-slurm-21-08-8-hpc-centos-7 image_family: $(vars.controller_image_family) - - source: modules/packer/custom-image + - id: compute-image + source: modules/packer/custom-image kind: packer - id: compute-image settings: disk_size: 20 source_image_project_id: [schedmd-slurm-public] @@ -96,20 +96,20 @@ deployment_groups: image_family: $(vars.compute_image_family) - group: cluster modules: - - source: modules/network/pre-existing-vpc + - id: cluster-network + source: modules/network/pre-existing-vpc kind: terraform - id: cluster-network - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: - cluster-network settings: local_mount: /home # This debug_partition will work out of the box without requesting additional GCP quota. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: debug_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: debug_partition use: - cluster-network - homefs @@ -122,9 +122,9 @@ deployment_groups: instance_image: family: $(vars.compute_image_family) project: $(vars.project_id) - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - cluster-network - homefs @@ -135,9 +135,9 @@ deployment_groups: project: $(vars.project_id) max_node_count: 100 machine_type: c2-standard-60 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - cluster-network - compute_partition @@ -148,9 +148,9 @@ deployment_groups: family: $(vars.controller_image_family) project: $(vars.project_id) controller_machine_type: c2-standard-4 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - cluster-network - slurm_controller diff --git a/community/examples/omnia-cluster.yaml b/community/examples/omnia-cluster.yaml index 83496d96e4..63871743d5 100644 --- a/community/examples/omnia-cluster.yaml +++ b/community/examples/omnia-cluster.yaml @@ -21,6 +21,9 @@ vars: deployment_name: omnia-cluster zone: us-central1-c region: us-central1 + instance_image: + family: rocky-linux-8 + project: rocky-linux-cloud # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md @@ -30,30 +33,30 @@ deployment_groups: modules: ## Network - - source: modules/network/vpc + - id: network + source: modules/network/vpc kind: terraform - id: network ## File Systems - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network] settings: local_mount: "/home" ## Installation Scripts - - source: community/modules/scripts/omnia-install + - id: omnia + source: community/modules/scripts/omnia-install kind: terraform - id: omnia outputs: [inventory_file, omnia_user_warning] settings: manager_ips: [localhost] compute_ips: $(compute.internal_ip) - - source: modules/scripts/startup-script + - id: startup-manager + source: modules/scripts/startup-script kind: terraform - id: startup-manager settings: runners: - type: shell @@ -65,9 +68,9 @@ deployment_groups: - $(omnia.copy_inventory_runner) - $(omnia.install_omnia_runner) - - source: modules/scripts/startup-script + - id: startup-compute + source: modules/scripts/startup-script kind: terraform - id: startup-compute settings: runners: - type: shell @@ -78,9 +81,9 @@ deployment_groups: - $(omnia.setup_omnia_node_runner) ## Compute - - source: modules/compute/vm-instance + - id: manager + source: modules/compute/vm-instance kind: terraform - id: manager use: - network - homefs @@ -89,9 +92,9 @@ deployment_groups: name_prefix: omnia-manager machine_type: n2-standard-4 - - source: modules/compute/vm-instance + - id: compute + source: modules/compute/vm-instance kind: terraform - id: compute use: - network - homefs @@ -101,9 +104,9 @@ deployment_groups: instance_count: 2 # This module simply makes terraform wait until the startup script is complete - - source: community/modules/scripts/wait-for-startup + - id: wait + source: community/modules/scripts/wait-for-startup kind: terraform - id: wait use: - network settings: diff --git a/community/examples/slurm-gcp-v5-hpc-centos7.yaml b/community/examples/slurm-gcp-v5-hpc-centos7.yaml index 44b847419d..e913af43f7 100644 --- a/community/examples/slurm-gcp-v5-hpc-centos7.yaml +++ b/community/examples/slurm-gcp-v5-hpc-centos7.yaml @@ -31,20 +31,20 @@ deployment_groups: # Source is an embedded resource, denoted by "resources/*" without ./, ../, / # as a prefix. To refer to a local resource, prefix with ./, ../ or / # Example - ./resources/network/vpc - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: community/modules/compute/schedmd-slurm-gcp-v5-partition + - id: debug_partition + source: community/modules/compute/schedmd-slurm-gcp-v5-partition kind: terraform - id: debug_partition use: - network1 - homefs @@ -55,9 +55,9 @@ deployment_groups: machine_type: n2-standard-2 is_default: true - - source: community/modules/compute/schedmd-slurm-gcp-v5-partition + - id: compute_partition + source: community/modules/compute/schedmd-slurm-gcp-v5-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -65,18 +65,18 @@ deployment_groups: partition_name: compute node_count_dynamic_max: 20 - - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller + - id: slurm_controller + source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller kind: terraform - id: slurm_controller use: - network1 - debug_partition - compute_partition - homefs - - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login + - id: slurm_login + source: community/modules/scheduler/schedmd-slurm-gcp-v5-login kind: terraform - id: slurm_login use: - network1 - slurm_controller diff --git a/community/examples/slurm-gcp-v5-ubuntu2004.yaml b/community/examples/slurm-gcp-v5-ubuntu2004.yaml index c5af848861..f42e707147 100644 --- a/community/examples/slurm-gcp-v5-ubuntu2004.yaml +++ b/community/examples/slurm-gcp-v5-ubuntu2004.yaml @@ -31,20 +31,20 @@ deployment_groups: # Source is an embedded resource, denoted by "resources/*" without ./, ../, / # as a prefix. To refer to a local resource, prefix with ./, ../ or / # Example - ./resources/network/vpc - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: community/modules/compute/schedmd-slurm-gcp-v5-partition + - id: debug_partition + source: community/modules/compute/schedmd-slurm-gcp-v5-partition kind: terraform - id: debug_partition use: - network1 - homefs @@ -55,9 +55,9 @@ deployment_groups: machine_type: n2-standard-2 is_default: true - - source: community/modules/compute/schedmd-slurm-gcp-v5-partition + - id: compute_partition + source: community/modules/compute/schedmd-slurm-gcp-v5-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -65,18 +65,18 @@ deployment_groups: partition_name: compute node_count_dynamic_max: 20 - - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller + - id: slurm_controller + source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller kind: terraform - id: slurm_controller use: - network1 - debug_partition - compute_partition - homefs - - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login + - id: slurm_login + source: community/modules/scheduler/schedmd-slurm-gcp-v5-login kind: terraform - id: slurm_login use: - network1 - slurm_controller diff --git a/community/examples/spack-gromacs.yaml b/community/examples/spack-gromacs.yaml index 2ab1e88049..ef499f81e9 100644 --- a/community/examples/spack-gromacs.yaml +++ b/community/examples/spack-gromacs.yaml @@ -28,29 +28,29 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 ## Filesystems - - source: modules/file-system/filestore + - id: appsfs + source: modules/file-system/filestore kind: terraform - id: appsfs use: [network1] settings: local_mount: /sw - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home ## Install Scripts - - source: community/modules/scripts/spack-install + - id: spack + source: community/modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /sw/spack spack_url: https://github.com/spack/spack @@ -83,9 +83,9 @@ deployment_groups: # - mirror_name: gcs_cache # mirror_url: gs://bucket-name/... - - source: modules/scripts/startup-script + - id: spack-startup + source: modules/scripts/startup-script kind: terraform - id: spack-startup settings: runners: - type: shell @@ -94,9 +94,9 @@ deployment_groups: - $(spack.install_spack_deps_runner) - $(spack.install_spack_runner) - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -105,9 +105,9 @@ deployment_groups: partition_name: compute max_node_count: 20 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -116,9 +116,9 @@ deployment_groups: settings: login_node_count: 1 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/community/modules/compute/SchedMD-slurm-on-gcp-partition/README.md b/community/modules/compute/SchedMD-slurm-on-gcp-partition/README.md index c9863995e5..6b9d45500e 100644 --- a/community/modules/compute/SchedMD-slurm-on-gcp-partition/README.md +++ b/community/modules/compute/SchedMD-slurm-on-gcp-partition/README.md @@ -18,9 +18,9 @@ The following code snippet creates a partition module with: * Mounted to homefs via `use` ```yaml -- source: community/modules/compute/SchedMD-slurm-on-gcp-partition +- id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: [network1, homefs] settings: max_node_count: 200 diff --git a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf index ed0c14c6cb..2aec7e09c7 100644 --- a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf +++ b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.4.0" } required_version = ">= 0.14.0" diff --git a/community/modules/compute/htcondor-execute-point/README.md b/community/modules/compute/htcondor-execute-point/README.md index f335c41d8c..a54a4e3234 100644 --- a/community/modules/compute/htcondor-execute-point/README.md +++ b/community/modules/compute/htcondor-execute-point/README.md @@ -22,9 +22,9 @@ a startup script and network created in previous steps. > OS Login on all HTCondor nodes, including execute points. ```yaml -- source: community/modules/compute/htcondor-execute-point +- id: htcondor_execute_point + source: community/modules/compute/htcondor-execute-point kind: terraform - id: htcondor_execute_point use: - network1 - htcondor_configure_execute_point diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md index c344404a00..d977d584fd 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md @@ -71,7 +71,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | v5.0.3 | +| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | v5.1.0 | ## Resources @@ -82,6 +82,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. |
list(object({
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | +| [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example. | `bool` | `false` | no | | [deployment\_name](#input\_deployment\_name) | Name of the deployment. | `string` | n/a | yes | | [disable\_smt](#input\_disable\_smt) | Disables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no | diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf index 1f42304480..e44081e99a 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf @@ -29,6 +29,7 @@ locals { # Template By Definition additional_disks = var.additional_disks + bandwidth_tier = var.bandwidth_tier can_ip_forward = var.can_ip_forward disable_smt = var.disable_smt disk_auto_delete = var.disk_auto_delete @@ -69,7 +70,7 @@ locals { module "slurm_partition" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=v5.0.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=v5.1.0" slurm_cluster_name = local.slurm_cluster_name partition_nodes = local.partition_nodes diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf index 0dd61d1a95..03230e5c1f 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.1.0 variable "deployment_name" { description = "Name of the deployment." @@ -359,3 +359,22 @@ variable "spot_instance_config" { }) default = null } + +variable "bandwidth_tier" { + description = < **_WARNING:_** This module has only been tested against the HPC centos7 OS +> disk image (the default). Using other images may work, but have not been +> verified. + [disk]: https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk ### Example ```yaml -- source: community/modules/file-system/nfs-server +- id: homefs + source: community/modules/file-system/nfs-server kind: terraform - id: homefs use: [network1] settings: auto_delete_disk: true diff --git a/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh b/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh index 8ad49f4780..6c49163eb2 100644 --- a/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh +++ b/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh @@ -24,7 +24,7 @@ if [ ! "$(which mount.nfs)" ]; then enable_repo="baseos" else echo "Unsupported version of centos/RHEL/Rocky" - exit 1 + return 1 fi yum install --disablerepo="*" --enablerepo=${enable_repo} -y nfs-utils elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then @@ -32,6 +32,6 @@ if [ ! "$(which mount.nfs)" ]; then apt-get -y install nfs-common else echo 'Unsuported distribution' - exit 1 + return 1 fi fi diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf index 36b6144abc..129594c1f4 100644 --- a/community/modules/file-system/nfs-server/versions.tf +++ b/community/modules/file-system/nfs-server/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.4.0" } required_version = ">= 0.14.0" diff --git a/community/modules/project/new-project/README.md b/community/modules/project/new-project/README.md index 166f2c3eaa..6301b04d3f 100644 --- a/community/modules/project/new-project/README.md +++ b/community/modules/project/new-project/README.md @@ -9,9 +9,9 @@ This module is meant for use with Terraform 0.13. ### Example ```yaml -- source: community/modules/project/new-project +- id: project + source: community/modules/project/new-project kind: terraform - id: project settings: project_id: test_project folder_id: 334688113020 # random number diff --git a/community/modules/project/service-account/README.md b/community/modules/project/service-account/README.md index e0831db438..cf9331f2d2 100644 --- a/community/modules/project/service-account/README.md +++ b/community/modules/project/service-account/README.md @@ -5,9 +5,9 @@ Allows creation of service accounts for a Google Cloud Platform project. ### Example ```yaml -- source: community/modules/project/service-account +- id: service_acct + source: community/modules/project/service-account kind: terraform - id: service_acct settings: - project_id: $(vars.project_id) - names: [ "instance_acct" ] diff --git a/community/modules/project/service-enablement/README.md b/community/modules/project/service-enablement/README.md index 8184996eb8..f03091a28b 100644 --- a/community/modules/project/service-enablement/README.md +++ b/community/modules/project/service-enablement/README.md @@ -5,9 +5,9 @@ Allows management of multiple API services for a Google Cloud Platform project. ### Example ```yaml -- source: community/modules/project/service-enablement +- id: services-api + source: community/modules/project/service-enablement kind: terraform - id: services-api settings: gcp_service_list: [ "file.googleapis.com", diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf index 16d9699e8f..7f5fbc7e64 100644 --- a/community/modules/project/service-enablement/versions.tf +++ b/community/modules/project/service-enablement/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.4.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/README.md b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/README.md index eb195a0f20..ff3787ad52 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/README.md +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/README.md @@ -17,9 +17,9 @@ controller for optimal performance at different scales. ### Example ```yaml -- source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller +- id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf index 36750c362e..5e4b748824 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.4.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/README.md b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/README.md index ed19144dec..ba18a7adf4 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/README.md +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/README.md @@ -14,9 +14,9 @@ node is used in conjunction with the ### Example ```yaml -- source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node +- id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf index 1eacbe1e96..819c7fbecc 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.4.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/cloud-batch-job/README.md b/community/modules/scheduler/cloud-batch-job/README.md index 2f7151b5df..5b5328b16a 100644 --- a/community/modules/scheduler/cloud-batch-job/README.md +++ b/community/modules/scheduler/cloud-batch-job/README.md @@ -15,9 +15,9 @@ job unless one is provided. See the ## Example ```yaml -- source: community/modules/scheduler/cloud-batch-job +- id: batch-job + source: community/modules/scheduler/cloud-batch-job kind: terraform - id: batch-job use: [network1] settings: runnable: "echo 'hello world'" @@ -51,24 +51,24 @@ trying to set a property not natively supported in the `cloud-batch-job` module. deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: appfs + source: modules/file-system/filestore kind: terraform - id: appfs use: [network1] - - source: modules/scripts/startup-script + - id: batch-startup-script + source: modules/scripts/startup-script kind: terraform - id: batch-startup-script settings: runners: ... - - source: github.com/terraform-google-modules/terraform-google-vm//modules/instance_template?ref=v7.8.0 + - id: batch-compute-template + source: github.com/terraform-google-modules/terraform-google-vm//modules/instance_template?ref=v7.8.0 kind: terraform - id: batch-compute-template use: [batch-startup-script] settings: # Boiler plate to work with Cloud Foundation Toolkit @@ -82,9 +82,9 @@ deployment_groups: source_image_family: hpc-centos-7 source_image_project: cloud-hpc-image-public - - source: ./community/modules/scheduler/cloud-batch-job + - id: batch-job + source: ./community/modules/scheduler/cloud-batch-job kind: terraform - id: batch-job settings: instance_template: $(batch-compute-template.self_link) outputs: [instructions] diff --git a/community/modules/scheduler/cloud-batch-job/outputs.tf b/community/modules/scheduler/cloud-batch-job/outputs.tf index fcabec3d5d..4822c45bdb 100644 --- a/community/modules/scheduler/cloud-batch-job/outputs.tf +++ b/community/modules/scheduler/cloud-batch-job/outputs.tf @@ -29,7 +29,7 @@ output "instructions" { gcloud ${var.gcloud_version} batch jobs delete ${local.job_id} --location=${var.region} --project=${var.project_id} List all jobs in region: - gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id} + gcloud ${var.gcloud_version} batch jobs list --project=${var.project_id} EOT } diff --git a/community/modules/scheduler/cloud-batch-login-node/README.md b/community/modules/scheduler/cloud-batch-login-node/README.md index 89cdb27349..a208288162 100644 --- a/community/modules/scheduler/cloud-batch-login-node/README.md +++ b/community/modules/scheduler/cloud-batch-login-node/README.md @@ -18,14 +18,14 @@ systems and test installed software before submitting a Google Cloud Batch job. ## Example ```yaml -- source: community/modules/scheduler/cloud-batch-job +- id: batch-job + source: community/modules/scheduler/cloud-batch-job kind: terraform - id: batch-job ... -- source: community/modules/scheduler/cloud-batch-login-node +- id: batch-login + source: community/modules/scheduler/cloud-batch-login-node kind: terraform - id: batch-login use: [batch-job] outputs: [instructions] ``` diff --git a/community/modules/scheduler/cloud-batch-login-node/outputs.tf b/community/modules/scheduler/cloud-batch-login-node/outputs.tf index 2c5ec8b613..a7e34f0223 100644 --- a/community/modules/scheduler/cloud-batch-login-node/outputs.tf +++ b/community/modules/scheduler/cloud-batch-login-node/outputs.tf @@ -37,6 +37,6 @@ output "instructions" { gcloud ${var.gcloud_version} batch jobs delete ${var.job_id} --location=${var.region} --project=${var.project_id} List all jobs in region: - gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id} + gcloud ${var.gcloud_version} batch jobs list --project=${var.project_id} EOT } diff --git a/community/modules/scheduler/cloud-batch-login-node/versions.tf b/community/modules/scheduler/cloud-batch-login-node/versions.tf index ee89adf9c7..47ce3c04ce 100644 --- a/community/modules/scheduler/cloud-batch-login-node/versions.tf +++ b/community/modules/scheduler/cloud-batch-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.4.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/htcondor-configure/README.md b/community/modules/scheduler/htcondor-configure/README.md index b7f7e98d5d..5e36ea81e7 100644 --- a/community/modules/scheduler/htcondor-configure/README.md +++ b/community/modules/scheduler/htcondor-configure/README.md @@ -24,13 +24,13 @@ install the HTCondor software and adds custom configurations using [htcondor-configure] and [htcondor-execute-point]. ```yaml -- source: community/modules/scripts/htcondor-install +- id: htcondor_install + source: community/modules/scripts/htcondor-install kind: terraform - id: htcondor_install -- source: modules/scripts/startup-script +- id: htcondor_configure_central_manager + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_central_manager settings: runners: - type: shell @@ -39,9 +39,9 @@ install the HTCondor software and adds custom configurations using - $(htcondor_install.install_htcondor_runner) - $(htcondor_configure.central_manager_runner) -- source: modules/scripts/startup-script +- id: htcondor_configure_access_point + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_access_point settings: runners: - type: shell diff --git a/community/modules/scheduler/htcondor-configure/versions.tf b/community/modules/scheduler/htcondor-configure/versions.tf index 8f044df3e8..f9221d2919 100644 --- a/community/modules/scheduler/htcondor-configure/versions.tf +++ b/community/modules/scheduler/htcondor-configure/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.4.0" } required_version = ">= 0.13.0" diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md index a678b1b973..56a4830e3d 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md @@ -28,9 +28,9 @@ controller for optimal performance at different scales. ### Example ```yaml -- source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller +- id: slurm_controller + source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -90,8 +90,8 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | v5.0.3 | -| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.0.3 | +| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | v5.1.0 | +| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.1.0 | ## Resources @@ -133,7 +133,7 @@ No resources. | [network\_self\_link](#input\_network\_self\_link) | Network to deploy to. Either network\_self\_link or subnetwork\_self\_link must be specified. | `string` | `null` | no | | [network\_storage](#input\_network\_storage) | Storage to mounted on all instances.
server\_ip : Address of the storage server.
remote\_mount : The location in the remote instance filesystem to mount from.
local\_mount : The location on the instance filesystem to mount to.
fs\_type : Filesystem type (e.g. "nfs").
mount\_options : Options to mount with. |
list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy. | `string` | `"MIGRATE"` | no | -| [partition](#input\_partition) | Cluster partitions as a list. |
list(object({
compute_list = list(string)
partition = object({
enable_job_exclusive = bool
enable_placement_groups = bool
network_storage = list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
}))
partition_conf = map(string)
partition_name = string
partition_nodes = map(object({
node_count_dynamic_max = number
node_count_static = number
enable_spot_vm = bool
group_name = string
instance_template = string
node_conf = map(string)
spot_instance_config = object({
termination_action = string
})
}))
subnetwork = string
zone_policy_allow = list(string)
zone_policy_deny = list(string)
})
}))
| `[]` | no | +| [partition](#input\_partition) | Cluster partitions as a list. |
list(object({
compute_list = list(string)
partition = object({
enable_job_exclusive = bool
enable_placement_groups = bool
network_storage = list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
}))
partition_conf = map(string)
partition_name = string
partition_nodes = map(object({
bandwidth_tier = string
node_count_dynamic_max = number
node_count_static = number
enable_spot_vm = bool
group_name = string
instance_template = string
node_conf = map(string)
spot_instance_config = object({
termination_action = string
})
}))
subnetwork = string
zone_policy_allow = list(string)
zone_policy_deny = list(string)
})
}))
| `[]` | no | | [preemptible](#input\_preemptible) | Allow the instance to be preempted. | `bool` | `false` | no | | [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes | | [prolog\_scripts](#input\_prolog\_scripts) | List of scripts to be used for Prolog. Programs for the slurmd to execute
whenever it is asked to run a job step from a new job allocation.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Prolog. |
list(object({
filename = string
content = string
}))
| `[]` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf index b6a38ed12f..52b14457ed 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf @@ -25,7 +25,7 @@ locals { }] # Since deployment name may be used to create a cluster name, we remove any invalid character from the beginning # Also, slurm imposed a lot of restrictions to this name, so we format it to an acceptable string - tmp_cluster_name = substr(replace(lower(var.deployment_name), "/^[^a-z]*|[^a-z0-9]/", ""), 0, 8) + tmp_cluster_name = substr(replace(lower(var.deployment_name), "/^[^a-z]*|[^a-z0-9]/", ""), 0, 10) slurm_cluster_name = var.slurm_cluster_name != null ? var.slurm_cluster_name : local.tmp_cluster_name enable_public_ip_access_config = var.disable_controller_public_ips ? [] : [{ nat_ip = null, network_tier = null }] @@ -34,7 +34,7 @@ locals { } module "slurm_controller_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=v5.0.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=v5.1.0" access_config = local.access_config slurm_cluster_name = local.slurm_cluster_name @@ -66,7 +66,7 @@ module "slurm_controller_instance" { } module "slurm_controller_template" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.0.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.1.0" additional_disks = var.additional_disks can_ip_forward = var.can_ip_forward diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf index cbde26cffd..98e806c46b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.1.0 variable "access_config" { description = "Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet." @@ -318,6 +318,7 @@ variable "partition" { partition_conf = map(string) partition_name = string partition_nodes = map(object({ + bandwidth_tier = string node_count_dynamic_max = number node_count_static = number enable_spot_vm = bool diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md index 422710eba4..e5d75fb538 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md @@ -12,9 +12,9 @@ terraform modules. The login node is used in conjunction with the ### Example ```yaml -- source: community/modules/scheduler/schedmd-slurm-gcp-v5-login +- id: slurm_login + source: community/modules/scheduler/schedmd-slurm-gcp-v5-login kind: terraform - id: slurm_login use: - network1 - slurm_controller @@ -72,8 +72,8 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | v5.0.3 | -| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.0.3 | +| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | v5.1.0 | +| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.1.0 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf index 380af82af7..7e69227157 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf @@ -21,7 +21,7 @@ locals { }] # Since deployment name may be used to create a cluster name, we remove any invalid character from the beginning # Also, slurm imposed a lot of restrictions to this name, so we format it to an acceptable string - tmp_cluster_name = substr(replace(lower(var.deployment_name), "/^[^a-z]*|[^a-z0-9]/", ""), 0, 8) + tmp_cluster_name = substr(replace(lower(var.deployment_name), "/^[^a-z]*|[^a-z0-9]/", ""), 0, 10) slurm_cluster_name = var.slurm_cluster_name != null ? var.slurm_cluster_name : local.tmp_cluster_name enable_public_ip_access_config = var.disable_login_public_ips ? [] : [{ nat_ip = null, network_tier = null }] @@ -29,9 +29,10 @@ locals { } module "slurm_login_template" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.0.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.1.0" additional_disks = var.additional_disks + bandwidth_tier = "platform_default" can_ip_forward = var.can_ip_forward slurm_cluster_name = local.slurm_cluster_name disable_smt = var.disable_smt @@ -65,7 +66,7 @@ module "slurm_login_template" { } module "slurm_login_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=v5.0.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=v5.1.0" access_config = local.access_config slurm_cluster_name = local.slurm_cluster_name diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf index 0e465449c8..0f565de63b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.1.0 variable "project_id" { type = string diff --git a/community/modules/scripts/htcondor-install/README.md b/community/modules/scripts/htcondor-install/README.md index 69c1fe6965..80c085d6ff 100644 --- a/community/modules/scripts/htcondor-install/README.md +++ b/community/modules/scripts/htcondor-install/README.md @@ -21,13 +21,13 @@ install the HTCondor software and adds custom configurations using [htcondor-configure] and [htcondor-execute-point]. ```yaml -- source: community/modules/scripts/htcondor-install +- id: htcondor_install + source: community/modules/scripts/htcondor-install kind: terraform - id: htcondor_install -- source: modules/scripts/startup-script +- id: htcondor_configure_central_manager + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_central_manager settings: runners: - type: shell @@ -36,9 +36,9 @@ install the HTCondor software and adds custom configurations using - $(htcondor_install.install_htcondor_runner) - $(htcondor_configure.central_manager_runner) -- source: modules/scripts/startup-script +- id: htcondor_configure_access_point + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_access_point settings: runners: - type: shell diff --git a/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml b/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml index c3109bbb94..0351dfd75f 100644 --- a/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml +++ b/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml @@ -24,13 +24,13 @@ - name: Create virtual environment for HTCondor autoscaler ansible.builtin.pip: name: pip - version: 21.3.1 # last Python 2.7-compatible release + version: 21.3.1 # last Python 2.7-compatible release virtualenv: /usr/local/htcondor virtualenv_command: /usr/bin/python3 -m venv - name: Install latest setuptools ansible.builtin.pip: name: setuptools - state: 44.1.1 # last Python 2.7-compatible release + state: 44.1.1 # last Python 2.7-compatible release virtualenv: /usr/local/htcondor virtualenv_command: /usr/bin/python3 -m venv - name: Install HTCondor autoscaler dependencies @@ -41,6 +41,6 @@ - htcondor ansible.builtin.pip: name: "{{ item }}" - state: present # rely on pip resolver to pick latest compatible releases + state: present # rely on pip resolver to pick latest compatible releases virtualenv: /usr/local/htcondor virtualenv_command: /usr/bin/python3 -m venv diff --git a/community/modules/scripts/omnia-install/README.md b/community/modules/scripts/omnia-install/README.md index 7556b4f59b..e65514df21 100644 --- a/community/modules/scripts/omnia-install/README.md +++ b/community/modules/scripts/omnia-install/README.md @@ -1,14 +1,16 @@ ## Description -This module will install [DellHPC Omnia](https://github.com/dellhpc/omnia) -onto a cluster supporting a slurm controller and compute nodes. To see a full -example using omnia-install, see the -[omnia-cluster example](../../../community/examples/omnia-cluster.yaml). +This module will create a set of startup-script runners that will install and +run [DellHPC Omnia](https://github.com/dellhpc/omnia) version 1.3 onto a set of +VMs representing a slurm controller and compute nodes. For a full example using +omnia-install, see the [omnia-cluster example]. **Warning**: This module will create a user named "omnia" by default which has sudo permissions. You may want to remove this user and/or it's permissions from each node. +[omnia-cluster example]: ../../../community/examples/omnia-cluster.yaml + ## License @@ -53,6 +55,7 @@ No resources. | [manager\_ips](#input\_manager\_ips) | IPs of the Omnia manager nodes | `list(string)` | n/a | yes | | [omnia\_username](#input\_omnia\_username) | Name of the user that installs omnia | `string` | `"omnia"` | no | | [slurm\_uid](#input\_slurm\_uid) | User ID of the slurm user | `number` | `981` | no | +| [virtualenv](#input\_virtualenv) | Path to a virtual environment on the Omnia manager and compute VMs that
should be used for installing packages with pip. Defaults to the virtual
environment created by the startup-scripts module, /usr/local/ghpc-venv.
If the virtual environment cannot be found, the system environment will be
used instead. | `string` | `"/usr/local/ghpc-venv"` | no | ## Outputs diff --git a/community/modules/scripts/omnia-install/main.tf b/community/modules/scripts/omnia-install/main.tf index ee8edb2a18..f9de94b5a3 100644 --- a/community/modules/scripts/omnia-install/main.tf +++ b/community/modules/scripts/omnia-install/main.tf @@ -27,18 +27,20 @@ locals { setup_omnia_node_file = templatefile( "${path.module}/templates/setup_omnia_node.tpl", { - username = var.omnia_username - install_dir = local.install_dir + username = var.omnia_username + install_dir = local.install_dir + virtualenv_path = var.virtualenv } ) install_file = templatefile( "${path.module}/templates/install_omnia.tpl", { - username = var.omnia_username - install_dir = local.install_dir - omnia_compute = var.compute_ips - nodecount = local.nodecount - slurm_uid = var.slurm_uid + username = var.omnia_username + install_dir = local.install_dir + omnia_compute = var.compute_ips + nodecount = local.nodecount + slurm_uid = var.slurm_uid + virtualenv_path = var.virtualenv } ) inventory_path = "${local.install_dir}/inventory" diff --git a/community/modules/scripts/omnia-install/templates/install_omnia.tpl b/community/modules/scripts/omnia-install/templates/install_omnia.tpl index c812aac070..5989e8f9b1 100644 --- a/community/modules/scripts/omnia-install/templates/install_omnia.tpl +++ b/community/modules/scripts/omnia-install/templates/install_omnia.tpl @@ -19,34 +19,33 @@ vars: username: ${username} pub_key_path: "/home/{{ username }}/.ssh" - pub_key_file: "{{pub_key_path}}/id_rsa" - auth_key_file: "{{pub_key_path}}/authorized_keys" + pub_key_file: "{{ pub_key_path }}/id_rsa" + auth_key_file: "{{ pub_key_path }}/authorized_keys" tasks: - - name: "Create {{pub_key_path}} folder" - file: - path: "{{pub_key_path}}" + - name: "Create {{ pub_key_path }} folder" + ansible.builtin.file: + path: "{{ pub_key_path }}" state: directory mode: 0700 - owner: "{{username}}" + owner: "{{ username }}" - name: Create keys - openssh_keypair: - path: "{{pub_key_file}}" - owner: "{{username}}" + ansible.builtin.openssh_keypair: + path: "{{ pub_key_file }}" + owner: "{{ username }}" - name: Copy public key to authorized keys - copy: - src: "{{pub_key_file}}.pub" - dest: "{{auth_key_file}}" - owner: "{{username}}" + ansible.builtin.copy: + src: "{{ pub_key_file }}.pub" + dest: "{{ auth_key_file }}" + owner: "{{ username }}" mode: 0644 - name: Install necessary dependencies hosts: localhost tasks: - - name: Install git and epel-release - package: + - name: Install git + ansible.builtin.package: name: - git - - epel-release state: latest - name: Prepare the system for Omnia installation @@ -56,40 +55,29 @@ omnia_dir: "{{ install_dir }}/omnia" slurm_uid: ${slurm_uid} tasks: - - name: Unmask and restart firewalld - become: true - command: systemctl unmask firewalld && systemctl restart firewalld - name: Git checkout - git: + ansible.builtin.git: repo: 'https://github.com/dellhpc/omnia.git' dest: "{{ omnia_dir }}" - version: release-1.0 + version: v1.3 update: false - name: Copy inventory file with owner and permissions - copy: + ansible.builtin.copy: src: "{{ install_dir }}/inventory" dest: "{{ omnia_dir }}/inventory" mode: 0644 - - name: Update omnia.yml setting become to yes - replace: - path: "{{ omnia_dir }}/omnia.yml" - regexp: '- name(.*)' - replace: '- name\1\n become: yes' - - name: Patch Slurm source URL - replace: - path: "{{ omnia_dir }}/roles/slurm_manager/vars/main.yml" - regexp: '(.*)slurm-20.11.7.tar.bz2(.*)' - replace: '\1slurm-20.11.9.tar.bz2\2' - - name: Patch Slurm source checksum - replace: - path: "{{ omnia_dir }}/roles/slurm_manager/vars/main.yml" - regexp: '^slurm_md5: .*' - replace: 'slurm_md5: "md5:79b39943768ef21b83585e2f5087d9af"' - - name: Add slurm user ID to the omnia vars - replace: + - name: Force update the ansible.utils collection + command: ansible-galaxy collection install ansible.utils --force + - name: Update omnia config to not use a login node + ansible.builtin.lineinfile: + path: "{{ omnia_dir }}/omnia_config.yml" + regexp: '^login_node_required: .*' + line: 'login_node_required: false' + - name: Update omnia config to set the slurm UID + ansible.builtin.lineinfile: path: "{{ omnia_dir }}/roles/slurm_common/vars/main.yml" regexp: '^slurm_uid: ".*"' - replace: 'slurm_uid: "{{ slurm_uid }}"' + line: 'slurm_uid: "{{ slurm_uid }}"' - name: Run the Omnia installation once all nodes are ready hosts: localhost @@ -97,28 +85,46 @@ nodecount: ${nodecount} install_dir: ${install_dir} username: ${username} + venv: ${virtualenv_path} omnia_dir: "{{ install_dir }}/omnia" state_dir: "{{ install_dir }}/state" become_user: "{{ username }}" remote_user: "{{ username }}" tasks: - name: Wait for nodes to setup - shell: | + ansible.builtin.shell: | files=$(ls {{ state_dir }} | wc -l) if [ $files -eq ${nodecount} ]; then exit 0; fi echo "Waiting for ${nodecount} nodes to be ready, found $${files} nodes ready" exit 1 delay: 2 retries: 300 + - name: Checking if the provided virtualenv exists + stat: + path: "{{ venv }}" + register: venv_dir + - name: Run omnia using provided virtualenv for the python provider + ansible.builtin.shell: | + ansible-playbook omnia.yml \ + --private-key /home/{{ username }}/.ssh/id_rsa \ + --inventory inventory \ + --user "{{ username }}" --become \ + --e "ansible_python_interpreter={{ venv }}/bin/python3" \ + --skip-tags "kubernetes,nfs_client" + args: + chdir: "{{ omnia_dir }}" + environment: + ANSIBLE_HOST_KEY_CHECKING: False + when: venv_dir.stat.exists - name: Run omnia - shell: | + ansible.builtin.shell: | ansible-playbook omnia.yml \ --private-key /home/{{ username }}/.ssh/id_rsa \ --inventory inventory \ - --user "{{ username }}" \ - --e "ansible_python_interpreter=/usr/bin/python2" \ - --skip-tags "kubernetes" + --user "{{ username }}" --become \ + --skip-tags "kubernetes,nfs_client" args: chdir: "{{ omnia_dir }}" environment: ANSIBLE_HOST_KEY_CHECKING: False + when: not venv_dir.stat.exists diff --git a/community/modules/scripts/omnia-install/templates/inventory.tpl b/community/modules/scripts/omnia-install/templates/inventory.tpl index f2f7f92b9e..2e8a275a16 100644 --- a/community/modules/scripts/omnia-install/templates/inventory.tpl +++ b/community/modules/scripts/omnia-install/templates/inventory.tpl @@ -6,3 +6,7 @@ ${vm} %{for vm in omnia_compute ~} ${vm} %{endfor} + +[nfs_node] + +[login_node] diff --git a/community/modules/scripts/omnia-install/templates/setup_omnia_node.tpl b/community/modules/scripts/omnia-install/templates/setup_omnia_node.tpl index bd0fbc5b9d..071390f9e8 100644 --- a/community/modules/scripts/omnia-install/templates/setup_omnia_node.tpl +++ b/community/modules/scripts/omnia-install/templates/setup_omnia_node.tpl @@ -13,21 +13,42 @@ # limitations under the License. --- -- name: Create Omnia User +- name: Create user for installing Omnia hosts: localhost vars: username: ${username} tasks: - - name: Create user omnia - user: + - name: Create a new user + ansible.builtin.user: name: "{{ username }}" - - name: Allow '{{ username }}' user to have passwordless sudo - lineinfile: + - name: Allow '{{ username }}' user to have passwordless sudo access + ansible.builtin.lineinfile: dest: /etc/sudoers state: present regexp: '^%%{{ username }}' line: '%%{{ username }} ALL=(ALL) NOPASSWD: ALL' +- name: Setup selinux + hosts: localhost + vars: + venv: ${virtualenv_path} + tasks: + - name: Checking if the provided virtualenv exists + stat: + path: "{{ venv }}" + register: venv_dir + - name: Install selinux using system pip + ansible.builtin.pip: + name: selinux + when: not venv_dir.stat.exists + - name: Install selinux into provided virtualenv + ansible.builtin.pip: + name: selinux + virtualenv: /usr/local/ghpc-venv + when: venv_dir.stat.exists + - name: Allow SSH on NFS-based home directory + ansible.builtin.command: setsebool -P use_nfs_home_dirs 1 + - name: Set Status file hosts: localhost vars: @@ -35,13 +56,13 @@ state_dir: "{{ install_dir }}/state" tasks: - name: Get hostname - command: hostname + ansible.builtin.command: hostname register: machine_hostname - name: Create state dir if not already created - file: + ansible.builtin.file: path: "{{ state_dir }}" state: directory - name: Create file - file: + ansible.builtin.file: path: "{{ state_dir }}/{{ machine_hostname.stdout }}" state: touch diff --git a/community/modules/scripts/omnia-install/variables.tf b/community/modules/scripts/omnia-install/variables.tf index 65172c02cc..93011303e6 100644 --- a/community/modules/scripts/omnia-install/variables.tf +++ b/community/modules/scripts/omnia-install/variables.tf @@ -44,3 +44,15 @@ variable "slurm_uid" { default = 981 type = number } + +variable "virtualenv" { + description = <<-EOT + Path to a virtual environment on the Omnia manager and compute VMs that + should be used for installing packages with pip. Defaults to the virtual + environment created by the startup-scripts module, /usr/local/ghpc-venv. + If the virtual environment cannot be found, the system environment will be + used instead. + EOT + default = "/usr/local/ghpc-venv" + type = string +} diff --git a/community/modules/scripts/spack-install/README.md b/community/modules/scripts/spack-install/README.md index 8069ab4330..8c6c1c539b 100644 --- a/community/modules/scripts/spack-install/README.md +++ b/community/modules/scripts/spack-install/README.md @@ -30,9 +30,9 @@ As an example, the below is a possible definition of a spack installation. To see this module used in a full blueprint, see the [spack-gromacs.yaml] example. ```yaml - - source: community/modules/scripts/spack-install + - id: spack + source: community/modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /sw/spack spack_url: https://github.com/spack/spack @@ -95,23 +95,23 @@ Following the above description of this module, it can be added to a Slurm deployment via the following: ```yaml -- source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller - kind: terraform - id: slurm_controller - use: [spack] - settings: - subnetwork_name: ((module.network1.primary_subnetwork.name)) - login_node_count: 1 - partitions: - - $(compute_partition.partition) +- id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + kind: terraform + use: [spack] + settings: + subnetwork_name: ((module.network1.primary_subnetwork.name)) + login_node_count: 1 + partitions: + - $(compute_partition.partition) ``` Alternatively, it can be added as a startup script via: ```yaml - - source: modules/scripts/startup-script + - id: startup + source: modules/scripts/startup-script kind: terraform - id: startup settings: runners: - $(spack.install_spack_deps_runner) diff --git a/community/modules/scripts/spack-install/templates/install_spack.tpl b/community/modules/scripts/spack-install/templates/install_spack.tpl index d48ccc89b1..620ca3e7ff 100755 --- a/community/modules/scripts/spack-install/templates/install_spack.tpl +++ b/community/modules/scripts/spack-install/templates/install_spack.tpl @@ -9,6 +9,11 @@ if [[ $EUID -ne 0 ]]; then exit 1 fi +# Activate ghpc-venv virtual environment if it exists +if [ -d /usr/local/ghpc-venv ]; then + source /usr/local/ghpc-venv/bin/activate +fi + # Only install and configure spack if ${INSTALL_DIR} doesn't exist if [ ! -d ${INSTALL_DIR} ]; then @@ -151,4 +156,3 @@ echo "source ${INSTALL_DIR}/share/spack/setup-env.sh" >> /etc/profile.d/spack.sh chmod a+rx /etc/profile.d/spack.sh echo "$PREFIX Setup complete..." -exit 0 diff --git a/community/modules/scripts/wait-for-startup/README.md b/community/modules/scripts/wait-for-startup/README.md index 5973290416..705699f8b7 100644 --- a/community/modules/scripts/wait-for-startup/README.md +++ b/community/modules/scripts/wait-for-startup/README.md @@ -15,9 +15,9 @@ up a node. ### Example ```yaml -- source: community/modules/scripts/wait-for-startup +- id: wait + source: community/modules/scripts/wait-for-startup kind: terraform - id: wait settings: instance_name: ((module.workstation.name[0])) ``` diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf index 8364495d6a..372387666c 100644 --- a/community/modules/scripts/wait-for-startup/versions.tf +++ b/community/modules/scripts/wait-for-startup/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.4.0" } required_version = ">= 0.14.0" diff --git a/docs/tutorials/README.md b/docs/tutorials/README.md index 66c5e75d06..27b43c78a0 100644 --- a/docs/tutorials/README.md +++ b/docs/tutorials/README.md @@ -53,15 +53,15 @@ minutes. ### Gromacs -[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=develop&cloudshell_open_in_editor=docs%2Ftutorials%2Fgromacs%2Fspack-gromacs.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fgromacs%2Fspack-gromacs.md) +[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=main&cloudshell_open_in_editor=docs%2Ftutorials%2Fgromacs%2Fspack-gromacs.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fgromacs%2Fspack-gromacs.md) ### Openfoam -[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=develop&cloudshell_open_in_editor=docs%2Ftutorials%2Fopenfoam%2Fspack-openfoam.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fopenfoam%2Fspack-openfoam.md) +[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=main&cloudshell_open_in_editor=docs%2Ftutorials%2Fopenfoam%2Fspack-openfoam.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fopenfoam%2Fspack-openfoam.md) ### Weather Research and Forecasting (WRF) Model -[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=develop&cloudshell_open_in_editor=docs%2Ftutorials%2Fwrfv3%2Fspack-wrfv3.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fwrfv3%2Fspack-wrfv3.md) +[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=main&cloudshell_open_in_editor=docs%2Ftutorials%2Fwrfv3%2Fspack-wrfv3.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fwrfv3%2Fspack-wrfv3.md) ### Blueprint Diagram for Application Tutorials diff --git a/docs/tutorials/gromacs/spack-gromacs.md b/docs/tutorials/gromacs/spack-gromacs.md index 16caa06811..09d7b4ab53 100644 --- a/docs/tutorials/gromacs/spack-gromacs.md +++ b/docs/tutorials/gromacs/spack-gromacs.md @@ -46,9 +46,9 @@ PROJECT_NUMBER=$(gcloud projects list --filter= --forma echo "granting roles/editor to $PROJECT_NUMBER-compute@developer.gserviceaccount.com" -gcloud iam service-accounts enable --project "$PROJECT_NUMBER"-compute@developer.gserviceaccount.com +gcloud iam service-accounts enable --project $PROJECT_NUMBER-compute@developer.gserviceaccount.com -gcloud projects add-iam-policy-binding --member=serviceAccount:"$PROJECT_NUMBER"-compute@developer.gserviceaccount.com --role=roles/editor +gcloud projects add-iam-policy-binding --member=serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com --role=roles/editor ``` ## Build the Toolkit Binary diff --git a/docs/tutorials/gromacs/spack-gromacs.yaml b/docs/tutorials/gromacs/spack-gromacs.yaml index fd23f9fac6..e7d350a386 100644 --- a/docs/tutorials/gromacs/spack-gromacs.yaml +++ b/docs/tutorials/gromacs/spack-gromacs.yaml @@ -26,18 +26,18 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/monitoring/dashboard + - id: hpc_dash + source: modules/monitoring/dashboard kind: terraform - id: hpc_dash ## Install Scripts - - source: community/modules/scripts/spack-install + - id: spack + source: community/modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack @@ -97,9 +97,9 @@ deployment_groups: - mirror_name: gcs_cache mirror_url: $(vars.spack_cache_mirror_url) - - source: modules/scripts/startup-script + - id: controller-setup + source: modules/scripts/startup-script kind: terraform - id: controller-setup settings: runners: - type: shell @@ -142,18 +142,18 @@ deployment_groups: gmx_mpi grompp -f pme.mdp -c conf.gro -p topol.top -o input.tpr mpirun -n 60 -hostfile hostfile -ppn 30 gmx_mpi mdrun -notunepme -dlb yes -v -resethway -noconfout -nsteps 4000 -s input.tpr - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 settings: partition_name: compute max_node_count: 20 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - compute_partition @@ -161,9 +161,9 @@ deployment_groups: controller_startup_script: $(controller-setup.startup_script) login_node_count: 1 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - slurm_controller diff --git a/docs/tutorials/intel-select/hpc-cluster-intel-select.yaml b/docs/tutorials/intel-select/hpc-cluster-intel-select.yaml index d90b938090..c1ea22ae51 100644 --- a/docs/tutorials/intel-select/hpc-cluster-intel-select.yaml +++ b/docs/tutorials/intel-select/hpc-cluster-intel-select.yaml @@ -25,20 +25,20 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: modules/scripts/startup-script + - id: startup-controller + source: modules/scripts/startup-script kind: terraform - id: startup-controller settings: runners: - type: shell @@ -48,9 +48,9 @@ deployment_groups: google_install_mpi --prefix /apps --intel_compliance destination: /var/tmp/install_intel_controller.sh - - source: modules/scripts/startup-script + - id: startup-compute + source: modules/scripts/startup-script kind: terraform - id: startup-compute settings: runners: - type: shell @@ -61,9 +61,9 @@ deployment_groups: destination: /var/tmp/install_intel_compute.sh # This debug_partition will work out of the box without requesting additional GCP quota. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: debug_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: debug_partition use: - network1 - homefs @@ -75,9 +75,9 @@ deployment_groups: machine_type: n2-standard-2 # This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -85,9 +85,9 @@ deployment_groups: partition_name: compute max_node_count: 20 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -99,9 +99,9 @@ deployment_groups: compute_startup_script: $(startup-compute.startup_script) - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/docs/tutorials/intel-select/intel-select.md b/docs/tutorials/intel-select/intel-select.md index be04c71621..32da17f8b1 100644 --- a/docs/tutorials/intel-select/intel-select.md +++ b/docs/tutorials/intel-select/intel-select.md @@ -37,9 +37,9 @@ PROJECT_NUMBER=$(gcloud projects list --filter= --forma echo "granting roles/editor to $PROJECT_NUMBER-compute@developer.gserviceaccount.com" -gcloud iam service-accounts enable --project "$PROJECT_NUMBER"-compute@developer.gserviceaccount.com +gcloud iam service-accounts enable --project $PROJECT_NUMBER-compute@developer.gserviceaccount.com -gcloud projects add-iam-policy-binding --member=serviceAccount:"$PROJECT_NUMBER"-compute@developer.gserviceaccount.com --role=roles/editor +gcloud projects add-iam-policy-binding --member=serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com --role=roles/editor ``` ## Build the Toolkit Binary diff --git a/docs/tutorials/openfoam/spack-openfoam.md b/docs/tutorials/openfoam/spack-openfoam.md index fd573bce49..f31f510cc0 100644 --- a/docs/tutorials/openfoam/spack-openfoam.md +++ b/docs/tutorials/openfoam/spack-openfoam.md @@ -46,9 +46,9 @@ PROJECT_NUMBER=$(gcloud projects list --filter= --forma echo "granting roles/editor to $PROJECT_NUMBER-compute@developer.gserviceaccount.com" -gcloud iam service-accounts enable --project "$PROJECT_NUMBER"-compute@developer.gserviceaccount.com +gcloud iam service-accounts enable --project $PROJECT_NUMBER-compute@developer.gserviceaccount.com -gcloud projects add-iam-policy-binding --member=serviceAccount:"$PROJECT_NUMBER"-compute@developer.gserviceaccount.com --role=roles/editor +gcloud projects add-iam-policy-binding --member=serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com --role=roles/editor ``` ## Build the Toolkit Binary diff --git a/docs/tutorials/openfoam/spack-openfoam.yaml b/docs/tutorials/openfoam/spack-openfoam.yaml index ceb43699d0..5f2fae7f6f 100644 --- a/docs/tutorials/openfoam/spack-openfoam.yaml +++ b/docs/tutorials/openfoam/spack-openfoam.yaml @@ -26,18 +26,18 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/monitoring/dashboard + - id: hpc_dash + source: modules/monitoring/dashboard kind: terraform - id: hpc_dash ## Install Scripts - - source: community/modules/scripts/spack-install + - id: spack + source: community/modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack @@ -104,9 +104,9 @@ deployment_groups: - mirror_name: gcs_cache mirror_url: $(vars.spack_cache_mirror_url) - - source: modules/scripts/startup-script + - id: controller-setup + source: modules/scripts/startup-script kind: terraform - id: controller-setup settings: runners: - type: shell @@ -153,18 +153,18 @@ deployment_groups: mpirun -n 60 -npernode 30 -hostfile hostfile snappyHexMesh -overwrite -parallel mpirun -n 60 -npernode 30 -hostfile hostfile potentialFoam -parallel mpirun -n 60 -npernode 30 -hostfile hostfile simpleFoam -parallel - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 settings: partition_name: compute max_node_count: 20 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - compute_partition @@ -172,9 +172,9 @@ deployment_groups: controller_startup_script: $(controller-setup.startup_script) login_node_count: 1 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - slurm_controller diff --git a/docs/tutorials/wrfv3/spack-wrfv3.md b/docs/tutorials/wrfv3/spack-wrfv3.md index 886b7c8da7..039739250d 100644 --- a/docs/tutorials/wrfv3/spack-wrfv3.md +++ b/docs/tutorials/wrfv3/spack-wrfv3.md @@ -46,9 +46,9 @@ PROJECT_NUMBER=$(gcloud projects list --filter= --forma echo "granting roles/editor to $PROJECT_NUMBER-compute@developer.gserviceaccount.com" -gcloud iam service-accounts enable --project "$PROJECT_NUMBER"-compute@developer.gserviceaccount.com +gcloud iam service-accounts enable --project $PROJECT_NUMBER-compute@developer.gserviceaccount.com -gcloud projects add-iam-policy-binding --member=serviceAccount:"$PROJECT_NUMBER"-compute@developer.gserviceaccount.com --role=roles/editor +gcloud projects add-iam-policy-binding --member=serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com --role=roles/editor ``` ## Build the Toolkit Binary diff --git a/docs/tutorials/wrfv3/spack-wrfv3.yaml b/docs/tutorials/wrfv3/spack-wrfv3.yaml index 6ff08c933e..e5503b0132 100644 --- a/docs/tutorials/wrfv3/spack-wrfv3.yaml +++ b/docs/tutorials/wrfv3/spack-wrfv3.yaml @@ -26,18 +26,18 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/monitoring/dashboard + - id: hpc_dash + source: modules/monitoring/dashboard kind: terraform - id: hpc_dash ## Install Scripts - - source: community/modules/scripts/spack-install + - id: spack + source: community/modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack @@ -97,9 +97,9 @@ deployment_groups: - mirror_name: gcs_cache mirror_url: $(vars.spack_cache_mirror_url) - - source: modules/scripts/startup-script + - id: controller-setup + source: modules/scripts/startup-script kind: terraform - id: controller-setup settings: runners: - type: shell @@ -140,18 +140,18 @@ deployment_groups: mpirun -n 60 -hostfile hostfile -ppn ${SLURM_NTASKS_PER_NODE} wrf.exe - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 settings: partition_name: compute max_node_count: 20 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - compute_partition @@ -159,9 +159,9 @@ deployment_groups: controller_startup_script: $(controller-setup.startup_script) login_node_count: 1 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - slurm_controller diff --git a/examples/README.md b/examples/README.md index d50038d336..3814532779 100644 --- a/examples/README.md +++ b/examples/README.md @@ -484,9 +484,9 @@ spack load gromacs ### [omnia-cluster.yaml] ![community-badge] ![experimental-badge] Creates a simple [Dell Omnia][omnia-github] provisioned cluster with an -omnia-manager node and 2 omnia-compute nodes on the pre-existing default -network. Omnia will be automatically installed after the nodes are provisioned. -All nodes mount a filestore instance on `/home`. +omnia-manager node that acts as the slurm manager and 2 omnia-compute nodes on +the pre-existing default network. Omnia will be automatically installed after +the nodes are provisioned. All nodes mount a filestore instance on `/home`. > **_NOTE:_** The omnia-cluster.yaml example uses `vm-instance` modules to > create the cluster. For these instances, Simultaneous Multithreading (SMT) is @@ -526,7 +526,7 @@ A user defined blueprint should follow the following schema: ```yaml # Required: Name your blueprint. -blueprint_name: MyBlueprintName +blueprint_name: my-blueprint-name # Top-level variables, these will be pulled from if a required variable is not # provided as part of a module. Any variables can be set here by the user, @@ -551,9 +551,9 @@ deployment_groups: modules: # Local source, prefixed with ./ (/ and ../ also accepted) - - source: ./modules/role/module-name # Required: Points to the module directory. + - id: # Required: Name of this module used to uniquely identify it. + source: ./modules/role/module-name # Required: Points to the module directory. kind: < terraform | packer > # Required: Type of module, currently choose from terraform or packer. - id: # Required: Name of this module used to uniquely identify it. # Optional: All configured settings for the module. For terraform, each # variable listed in variables.tf can be set here, and are mandatory if no # default was provided and are not defined elsewhere (like the top-level vars) @@ -586,6 +586,10 @@ below. * **blueprint_name** (required): This name can be used to track resources and usage across multiple deployments that come from the same blueprint. + `blueprint_name` is used as a value for the `ghpc_blueprint` label key, and + must abide to label value naming constraints: `blueprint_name` must be at most + 63 characters long, and can only contain lowercase letters, numeric + characters, underscores and dashes. ### Deployment Variables @@ -711,10 +715,11 @@ vars: deployment_groups: - group: primary modules: - - source: path/to/module/1 - id: resource1 + - id: resource1 + source: path/to/module/1 ... - - source: path/to/module/2 + - id: resource2 + source: path/to/module/2 ... settings: key1: $(vars.zone) diff --git a/examples/hpc-cluster-high-io.yaml b/examples/hpc-cluster-high-io.yaml index 719130556d..fd7b82a552 100644 --- a/examples/hpc-cluster-high-io.yaml +++ b/examples/hpc-cluster-high-io.yaml @@ -31,36 +31,36 @@ deployment_groups: # Source is an embedded module, denoted by "modules/*" without ./, ../, / # as a prefix. To refer to a local or community module, prefix with ./, ../ or / # Example - ./modules/network/pre-existing-vpc - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: modules/file-system/filestore + - id: projectsfs + source: modules/file-system/filestore kind: terraform - id: projectsfs use: [network1] settings: filestore_tier: HIGH_SCALE_SSD size_gb: 10240 local_mount: /projects - - source: community/modules/file-system/DDN-EXAScaler + - id: scratchfs + source: community/modules/file-system/DDN-EXAScaler kind: terraform - id: scratchfs use: [network1] settings: local_mount: /scratch - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: low_cost_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: low_cost_partition use: - network1 - homefs @@ -74,9 +74,9 @@ deployment_groups: machine_type: n2-standard-4 # This compute_partition is far more performant than low_cost_partition. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -86,9 +86,9 @@ deployment_groups: max_node_count: 200 partition_name: compute - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -100,9 +100,9 @@ deployment_groups: controller_machine_type: c2-standard-8 suspend_time: 60 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs @@ -112,7 +112,7 @@ deployment_groups: settings: login_machine_type: n2-standard-4 - - source: modules/monitoring/dashboard + - id: hpc_dashboard + source: modules/monitoring/dashboard kind: terraform - id: hpc_dashboard outputs: [instructions] diff --git a/examples/hpc-cluster-small.yaml b/examples/hpc-cluster-small.yaml index 9bc3a1f750..ff922090e1 100644 --- a/examples/hpc-cluster-small.yaml +++ b/examples/hpc-cluster-small.yaml @@ -31,21 +31,21 @@ deployment_groups: # Source is an embedded module, denoted by "modules/*" without ./, ../, / # as a prefix. To refer to a local or community module, prefix with ./, ../ or / # Example - ./modules/network/vpc - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home # This debug_partition will work out of the box without requesting additional GCP quota. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: debug_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: debug_partition use: - network1 - homefs @@ -57,9 +57,9 @@ deployment_groups: machine_type: n2-standard-2 # This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first. - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -67,9 +67,9 @@ deployment_groups: partition_name: compute max_node_count: 20 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -79,9 +79,9 @@ deployment_groups: login_node_count: 1 suspend_time: 60 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/examples/image-builder.yaml b/examples/image-builder.yaml index fd74a13c4b..6893c612cf 100644 --- a/examples/image-builder.yaml +++ b/examples/image-builder.yaml @@ -30,12 +30,12 @@ vars: deployment_groups: - group: builder-env modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/scripts/startup-script + - id: scripts_for_image + source: modules/scripts/startup-script kind: terraform - id: scripts_for_image settings: runners: - type: shell @@ -47,9 +47,9 @@ deployment_groups: - group: packer modules: - - source: modules/packer/custom-image + - id: custom-image + source: modules/packer/custom-image kind: packer - id: custom-image settings: disk_size: 20 source_image_project_id: [schedmd-slurm-public] @@ -58,12 +58,12 @@ deployment_groups: - group: cluster modules: - - source: modules/network/pre-existing-vpc + - id: cluster-network + source: modules/network/pre-existing-vpc kind: terraform - id: cluster-network - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: [cluster-network] settings: partition_name: compute @@ -71,18 +71,18 @@ deployment_groups: instance_image: family: $(vars.new_image_family) project: $(vars.project_id) - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: [cluster-network, compute_partition] settings: login_node_count: 1 instance_image: family: $(vars.new_image_family) project: $(vars.project_id) - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: [cluster-network, slurm_controller] settings: instance_image: diff --git a/go.mod b/go.mod index 98f95ebbe9..3f87f0fd9a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module hpc-toolkit go 1.16 require ( - cloud.google.com/go/compute v1.8.0 + cloud.google.com/go/compute v1.9.0 github.com/hashicorp/go-getter v1.6.2 github.com/hashicorp/hcl v1.0.0 // indirect github.com/hashicorp/hcl/v2 v2.13.0 diff --git a/go.sum b/go.sum index cf85e02c4a..380138b951 100644 --- a/go.sum +++ b/go.sum @@ -44,8 +44,8 @@ cloud.google.com/go/compute v1.5.0/go.mod h1:9SMHyhJlzhlkJqrPAc839t2BZFTSk6Jdj6m cloud.google.com/go/compute v1.6.0/go.mod h1:T29tfhtVbq1wvAPo0E3+7vhgmkOYeXjhFvz/FMzPu0s= cloud.google.com/go/compute v1.6.1/go.mod h1:g85FgpzFvNULZ+S8AYq87axRKuf2Kh7deLqV/jJ3thU= cloud.google.com/go/compute v1.7.0/go.mod h1:435lt8av5oL9P3fv1OEzSbSUe+ybHXGMPQHHZWZxy9U= -cloud.google.com/go/compute v1.8.0 h1:NLtR56/eKx9K1s2Tw/4hec2vsU1S3WeKRMj8HXbBo6E= -cloud.google.com/go/compute v1.8.0/go.mod h1:boQ44qJsMqZjKzzsEkoJWQGj4h8ygmyk17UArClWzmg= +cloud.google.com/go/compute v1.9.0 h1:ED/FP4xv8GJw63v556/ASNc1CeeLUO2Bs8nzaHchkHg= +cloud.google.com/go/compute v1.9.0/go.mod h1:lWv1h/zUWTm/LozzfTJhBSkd6ShQq8la8VeeuOEGxfY= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= cloud.google.com/go/iam v0.3.0 h1:exkAomrVUuzx9kWFI1wm3KI0uoDeUFPB4kKGzx6x+Gc= @@ -764,8 +764,9 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= diff --git a/modules/README.md b/modules/README.md index bbb900c227..e5947d1242 100644 --- a/modules/README.md +++ b/modules/README.md @@ -146,8 +146,8 @@ Modules that are still in development and less stable are labeled with the * **[htcondor-install]** ![community-badge] ![experimental-badge] : Creates a startup script to install HTCondor and exports a list of required APIs * **[omnia-install]** ![community-badge] ![experimental-badge] : Installs Slurm - via [Dell Omnia](https://github.com/dellhpc/omnia) onto a cluster of compute - VMs. + via [Dell Omnia](https://github.com/dellhpc/omnia) onto a cluster of VMs + instances. * **[spack-install]** ![community-badge] ![experimental-badge] : Creates a startup script to install [Spack](https://github.com/spack/spack) on an instance or a slurm login or controller. @@ -184,9 +184,9 @@ review the directory structure of [the core modules](./) and example, the following code is using the embedded pre-existing-vpc module: ```yaml - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 ``` #### Local Modules @@ -197,9 +197,9 @@ the source to a path starting with `/`, `./`, or `../`. For instance, the following module definition refers the local pre-existing-vpc modules. ```yaml - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 ``` > **_NOTE:_** This example would have to be run from the HPC Toolkit repository @@ -216,17 +216,17 @@ Toolkit GitHub repository: Get module from GitHub over SSH: ```yaml - - source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc + - id: network1 + source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc kind: terraform - id: network1 ``` Get module from GitHub over HTTPS: ```yaml - - source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc + - id: network1 + source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc kind: terraform - id: network1 ``` Both examples above use the [double-slash notation][tfsubdir] (`//`) to indicate @@ -240,9 +240,9 @@ of this feature. For example, to temporarily point to a development copy of the Toolkit vpc module, use: ```yaml - - source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc?ref=develop + - id: network1 + source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc?ref=develop kind: terraform - id: network1 ``` [tfrev]: https://www.terraform.io/language/modules/sources#selecting-a-revision @@ -289,16 +289,16 @@ the used module's output. For example, see the following blueprint snippet: ```yaml modules: -- source: modules/network/vpc +- id: network1 + source: modules/network/vpc kind: terraform - id: network1 -- resource: modules/compute/vm-instance +- id: workstation + source: modules/compute/vm-instance kind: terraform - id: workstation use: [network1] settings: - ... + ... ``` In this snippet, the VM instance `workstation` uses the outputs of vpc diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index 129e746f99..03813e2fa1 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -6,9 +6,9 @@ This module creates one or more ### Example ```yaml -- source: modules/compute/vm-instance +- id: compute + source: modules/compute/vm-instance kind: terraform - id: compute use: [network1] settings: instance_count: 8 diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 503e7e908b..b77d6ded49 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -27,10 +27,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.4.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.4.0" } required_version = ">= 0.14.0" diff --git a/modules/file-system/filestore/README.md b/modules/file-system/filestore/README.md index 9bfcc6eec2..5eddd6d6d2 100644 --- a/modules/file-system/filestore/README.md +++ b/modules/file-system/filestore/README.md @@ -46,9 +46,9 @@ The Filestore instance defined below will have the following attributes: - connected to the network defined in the `network1` module ```yaml -- source: modules/file-system/filestore +- id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home @@ -65,9 +65,9 @@ The Filestore instance defined below will have the following attributes: - connected to the VPC network defined in the `network1` module ```yaml -- source: modules/file-system/filestore +- id: highscale + source: modules/file-system/filestore kind: terraform - id: highscale use: [network1] settings: filestore_tier: HIGH_SCALE_SSD diff --git a/modules/file-system/filestore/scripts/install-nfs-client.sh b/modules/file-system/filestore/scripts/install-nfs-client.sh index 8ad49f4780..6c49163eb2 100644 --- a/modules/file-system/filestore/scripts/install-nfs-client.sh +++ b/modules/file-system/filestore/scripts/install-nfs-client.sh @@ -24,7 +24,7 @@ if [ ! "$(which mount.nfs)" ]; then enable_repo="baseos" else echo "Unsupported version of centos/RHEL/Rocky" - exit 1 + return 1 fi yum install --disablerepo="*" --enablerepo=${enable_repo} -y nfs-utils elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then @@ -32,6 +32,6 @@ if [ ! "$(which mount.nfs)" ]; then apt-get -y install nfs-common else echo 'Unsuported distribution' - exit 1 + return 1 fi fi diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf index 5e3b0feefb..c0cf4c6d07 100644 --- a/modules/file-system/filestore/versions.tf +++ b/modules/file-system/filestore/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.4.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.4.0" } required_version = ">= 0.14.0" diff --git a/modules/file-system/pre-existing-network-storage/README.md b/modules/file-system/pre-existing-network-storage/README.md index 978ffa9070..338af4caad 100644 --- a/modules/file-system/pre-existing-network-storage/README.md +++ b/modules/file-system/pre-existing-network-storage/README.md @@ -11,9 +11,9 @@ Toolkit supported file-system such as [filestore](../filestore/README.md). ### Example ```yaml -- source: modules/file-system/pre-existing-network-storage +- id: homefs + source: modules/file-system/pre-existing-network-storage kind: terraform - id: homefs settings: server_ip: ## Set server IP here ## remote_mount: nfsshare diff --git a/modules/monitoring/dashboard/README.md b/modules/monitoring/dashboard/README.md index 4f5b4d6953..920ef99e0c 100644 --- a/modules/monitoring/dashboard/README.md +++ b/modules/monitoring/dashboard/README.md @@ -10,9 +10,9 @@ needed. ## Example ```yaml -- source: modules/monitoring/dashboard +- id: hpc_dash + source: modules/monitoring/dashboard kind: terraform - id: hpc_dash settings: widgets: - | diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf index 49d8cae75c..b039758539 100644 --- a/modules/monitoring/dashboard/versions.tf +++ b/modules/monitoring/dashboard/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.4.0" } required_version = ">= 0.14.0" diff --git a/modules/network/pre-existing-vpc/README.md b/modules/network/pre-existing-vpc/README.md index a9eaa2a90a..c6e64a7e19 100644 --- a/modules/network/pre-existing-vpc/README.md +++ b/modules/network/pre-existing-vpc/README.md @@ -12,9 +12,9 @@ sharing a single network module between deployment groups. ### Example ```yaml -- source: modules/network/pre-existing-vpc +- id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 settings: - project_id: $(vars.project_id) ``` diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf index 0310cf4f00..9e4ad633cf 100644 --- a/modules/network/pre-existing-vpc/versions.tf +++ b/modules/network/pre-existing-vpc/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.4.0" } required_version = ">= 0.14.0" diff --git a/modules/network/vpc/README.md b/modules/network/vpc/README.md index 24e84de1ab..9133a531da 100644 --- a/modules/network/vpc/README.md +++ b/modules/network/vpc/README.md @@ -106,9 +106,9 @@ compact set of subnetworks possible. ### Example ```yaml -- source: modules/network/vpc +- id: network1 + source: modules/network/vpc kind: terraform - id: network1 settings: - deployment_name: $(vars.deployment_name) ``` diff --git a/modules/scripts/startup-script/README.md b/modules/scripts/startup-script/README.md index c6937eba6c..f20b98739c 100644 --- a/modules/scripts/startup-script/README.md +++ b/modules/scripts/startup-script/README.md @@ -53,6 +53,49 @@ Each runner receives the following attributes: Therefore`args` should not include any arguments that alter this behavior, such as `--connection`, `--inventory`, or `--limit`. +### Runner dependencies + +The `ansible-local` runner requires ansible to be installed in the VM before +running. To support other playbook runners in the HPC Toolkit, we require +version 2.11 of ansible-core or higher. Note that this is distinct from the +package version used to install ansible with pip. The minimum pip package +of ansible is 4.10.0. + +To install ansible, a runner supplied by this module can be added as a prior +runner. An example of this can be found in the [Example](#example) section below +as the first runner in the list of runners. This script will do the following in +your VM instance: + +- Install system-wide python3 if not already installed using system package + managers (yum, apt-get, etc) +- Install `python3-distutils` system-wide in debian and ubuntu based + environments. This can be a missing dependency on system installations of + python3 for installing and upgrading pip. +- Install system-wide pip3 if not already installed and upgrade pip3 if the + version is not at least 18.0. +- Install and create a virtual environment located at `/usr/local/ghpc-venv`. +- Install ansible into this virtual environment if the current version of + ansible is not version 2.11 or higher. + +To use the virtual environment created by this script, you can activate it by +running the following commmand on the VM: + +```shell +source /usr/local/ghpc-venv/bin/activate +``` + +You may also need to provide the correct python interpreter as the python3 +binary in the virtual environment. This can be done by adding the following flag +when calling `ansible-playbook`: + +```shell +-e ansible_python_interpreter=/usr/local/ghpc-venv/bin/activate +``` + +> **_NOTE:_** ansible-playbook and other ansible command line tools will only be +> accessible from the command line (and in your PATH variable) after activating +> this environment. + ### Staging the runners Runners will be uploaded to a @@ -96,9 +139,9 @@ sudo journalctl -u google-startup-scripts.service ### Example ```yaml -- source: ./modules/scripts/startup-script +- id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -124,9 +167,9 @@ sudo journalctl -u google-startup-scripts.service tar zxvf /tmp/$1 -C / args: "bar.tgz 'Expanding file'" -- source: ./modules/compute/vm-instance +- id: compute-cluster + source: ./modules/compute/vm-instance kind: terraform - id: compute-cluster use: [homefs, startup] ``` diff --git a/modules/scripts/startup-script/examples/install_ansible.sh b/modules/scripts/startup-script/examples/install_ansible.sh index c9b5a4326a..5f4fd236f2 100644 --- a/modules/scripts/startup-script/examples/install_ansible.sh +++ b/modules/scripts/startup-script/examples/install_ansible.sh @@ -13,6 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +REQ_ANSIBLE_VERSION=2.11 +REQ_ANSIBLE_PIP_VERSION=4.10.0 +REQ_PIP_MINOR_VERSION=18 +REQ_PYTHON3_VERSION=6 + apt_wait() { while fuser /var/lib/dpkg/lock >/dev/null 2>&1; do echo "Sleeping for dpkg lock" @@ -30,24 +35,158 @@ apt_wait() { fi } -if [ ! -h /usr/bin/ansible-playbook ] || [ ! -f /usr/bin/ansible-playbook ]; then - if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then - if [ ! -f /bin/pip ]; then - curl -Os https://bootstrap.pypa.io/pip/2.7/get-pip.py - /usr/bin/python get-pip.py +# Installs any dependencies needed for python based on the OS +install_python_deps() { + if [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release 2>/dev/null || + grep -qi ubuntu /etc/os-release 2>/dev/null; then + apt-get install -y python3-distutils + fi +} + +# Gets the name of the python executable for python starting with python3, then +# checking python. Sets the variable to an empty string if neither are found. +get_python_path() { + python_path="" + if which python3 2>/dev/null; then + python_path=$(which python3 2>/dev/null) + elif which python 2>/dev/null; then + python_path=$(which python 2>/dev/null) + fi +} + +# Returns the python major version. If provided, it will use the first argument +# as the python executable, otherwise it will default to simply "python". +get_python_major_version() { + python_path=${1:-python} + python_major_version=$(${python_path} -c "import sys; print(sys.version_info.major)") +} + +# Returns the python minor version. If provided, it will use the first argument +# as the python executable, otherwise it will default to simply "python". +get_python_minor_version() { + python_path=${1:-python} + python_minor_version=$(${python_path} -c "import sys; print(sys.version_info.minor)") +} + +# Install python3 with the yum package manager. Updates python_path to the +# newly installed packaged. +install_python3_yum() { + major_version=$(rpm -E "%{rhel}") + enable_repo="" + if [ "${major_version}" -eq "7" ]; then + enable_repo="base,epel" + elif [ "${major_version}" -eq "8" ]; then + enable_repo="baseos" + else + echo "Unsupported version of centos/RHEL/Rocky" + return 1 + fi + yum install --disablerepo="*" --enablerepo=${enable_repo} -y python3 python3-pip + python_path=$(rpm -ql python3 | grep 'bin/python3$') +} + +# Install python3 with the apt package manager. Updates python_path to the +# newly installed packaged. +install_python3_apt() { + apt_wait + apt-get install -y python3 python3-distutils python3-pip + python_path=$(which python3) +} + +install_python3() { + if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || + [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then + install_python3_yum + elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release 2>/dev/null || + grep -qi ubuntu /etc/os-release 2>/dev/null; then + install_python3_apt + else + echo "Error: Unsupported Distribution" + return 1 + fi +} + +# Install python3 with the yum package manager. Updates python_path to the +# newly installed packaged. +install_pip3_yum() { + major_version=$(rpm -E "%{rhel}") + enable_repo="" + if [ "${major_version}" -eq "7" ]; then + enable_repo="base,epel" + elif [ "${major_version}" -eq "8" ]; then + enable_repo="baseos" + else + echo "Unsupported version of centos/RHEL/Rocky" + return 1 + fi + yum install --disablerepo="*" --enablerepo=${enable_repo} -y python3-pip +} + +# Install python3 with the apt package manager. Updates python_path to the +# newly installed packaged. +install_pip3_apt() { + apt-get update + apt-get install -y python3-pip +} + +install_pip3() { + if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || + [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then + install_pip3_yum + elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release 2>/dev/null || + grep -qi ubuntu /etc/os-release 2>/dev/null; then + install_pip3_apt + else + echo "Error: Unsupported Distribution" + return 1 + fi +} + +main() { + # Get the python3 executable, or install it if not found + get_python_path + get_python_major_version "${python_path}" + get_python_minor_version "${python_path}" + if [ "${python_path}" = "" ] || [ "${python_major_version}" = "2" ] || [ "${python_minor_version}" -lt "${REQ_PYTHON3_VERSION}" ]; then + if ! install_python3; then + return 1 fi - /usr/bin/python -m pip install virtualenv - /usr/bin/python -m virtualenv /usr/local/toolkit - /usr/local/toolkit/bin/python -m pip install wheel - /usr/local/toolkit/bin/python -m pip install ansible==2.9.27 - ln -s /usr/local/toolkit/bin/ansible-playbook /usr/bin/ansible-playbook - elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then - echo 'WARNING: unsupported installation of ansible in debian / ubuntu' - apt_wait - apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install -y ansible + get_python_major_version "${python_path}" + get_python_minor_version "${python_path}" else - echo 'Unsupported distribution' - exit 1 + install_python_deps fi -fi + + # Install and/or upgrade pip + if ! ${python_path} -m pip --version 2>/dev/null; then + if ! install_pip3; then + return 1 + fi + fi + pip_version=$(${python_path} -m pip --version | sed -nr 's/^pip ([0-9]+\.[0-9]+).*$/\1/p') + pip_major_version=$(echo "${pip_version}" | cut -d '.' -f 1) + if [ "${pip_major_version}" -lt "${REQ_PIP_MINOR_VERSION}" ]; then + ${python_path} -m pip install --upgrade pip + fi + + # Create pip virtual environment for HPC Toolkit + ${python_path} -m pip install virtualenv + ${python_path} -m virtualenv /usr/local/ghpc-venv + python_path=/usr/local/ghpc-venv/bin/python3 + + # Install ansible + ansible_version="" + if which ansible-playbook 2>/dev/null; then + ansible_version=$(ansible-playbook --version 2>/dev/null | sed -nr 's/^ansible-playbook.*([0-9]+\.[0-9]+\.[0-9]+).*/\1/p') + ansible_major_vers=$(echo "${ansible_version}" | cut -d '.' -f 1) + ansible_minor_vers=$(echo "${ansible_version}" | cut -d '.' -f 2) + ansible_req_major_vers=$(echo "${REQ_ANSIBLE_VERSION}" | cut -d '.' -f 1) + ansible_req_minor_vers=$(echo "${REQ_ANSIBLE_VERSION}" | cut -d '.' -f 2) + fi + if [ -z "${ansible_version}" ] || [ "${ansible_major_vers}" -ne "${ansible_req_major_vers}" ] || + [ "${ansible_minor_vers}" -lt "${ansible_req_minor_vers}" ]; then + ${python_path} -m pip install ansible==${REQ_ANSIBLE_PIP_VERSION} + fi +} + +main diff --git a/modules/scripts/startup-script/templates/startup-script-custom.tpl b/modules/scripts/startup-script/templates/startup-script-custom.tpl index 2c1ca2e124..8dad11cc3c 100644 --- a/modules/scripts/startup-script/templates/startup-script-custom.tpl +++ b/modules/scripts/startup-script/templates/startup-script-custom.tpl @@ -1,13 +1,22 @@ stdlib::run_playbook() { + python_interpreter_flag="" + if [ -d /usr/local/ghpc-venv ]; then + . /usr/local/ghpc-venv/bin/activate + python_interpreter_flag="-e ansible_python_interpreter=/usr/local/ghpc-venv/bin/python3" + fi if [ ! "$(which ansible-playbook)" ]; then stdlib::error "ansible-playbook not found"\ "Please install ansible before running ansible-local runners." exit 1 fi - /usr/bin/ansible-playbook --connection=local --inventory=localhost, --limit localhost $1 $2 - return $? + ansible-playbook $${python_interpreter_flag} --connection=local --inventory=localhost, --limit localhost $1 $2 + ret_code=$? + if [ -d /usr/local/ghpc-venv ]; then + deactivate + fi + return $${ret_code} } stdlib::runner() { @@ -27,6 +36,7 @@ stdlib::runner() { stdlib::get_from_bucket -u "gs://${bucket}/$object" -d "$destpath" -f "$filename" + stdlib::info "=== start executing runner: $object ===" case "$1" in ansible-local) stdlib::run_playbook "$destpath/$filename" "$args";; shell) . $destpath/$filename $args;; diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf index cea3ab22b0..7a67e58b5e 100644 --- a/modules/scripts/startup-script/versions.tf +++ b/modules/scripts/startup-script/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.4.0" } required_version = ">= 0.14.0" diff --git a/pkg/config/config.go b/pkg/config/config.go index 89248ab4f1..e311d0f94b 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -35,7 +35,10 @@ import ( "hpc-toolkit/pkg/sourcereader" ) -const expectedVarFormat = "$(vars.var_name) or $(module_id.var_name)" +const ( + expectedVarFormat string = "$(vars.var_name) or $(module_id.var_name)" + matchLabelExp string = `^[\p{Ll}\p{Lo}\p{N}_-]{1,63}$` +) var errorMessages = map[string]string{ // general @@ -66,6 +69,10 @@ var errorMessages = map[string]string{ "emptyGroupName": "group name must be set for each deployment group", "illegalChars": "invalid character(s) found in group name", "invalidOutput": "requested output was not found in the module", + "varNotDefined": "variable not defined", + "valueNotString": "value was not of type string", + "valueEmptyString": "value is an empty string", + "labelReqs": "value can only contain lowercase letters, numeric characters, underscores and dashes, and must be between 1 and 63 characters long.", } // DeploymentGroup defines a group of Modules that are all executed together @@ -402,6 +409,10 @@ func (dc *DeploymentConfig) validateConfig() { if err != nil { log.Fatal(err) } + err = dc.Config.checkBlueprintName() + if err != nil { + log.Fatal(err) + } moduleToGroup, err := checkModuleAndGroupNames(dc.Config.DeploymentGroups) if err != nil { log.Fatal(err) @@ -559,13 +570,14 @@ func ResolveVariables( return nil } -// DeploymentNameError signifies a problem with the blueprint deployment name. -type DeploymentNameError struct { - cause string +// InputValueError signifies a problem with the blueprint name. +type InputValueError struct { + inputKey string + cause string } -func (err *DeploymentNameError) Error() string { - return fmt.Sprintf("deployment_name must be a string and cannot be empty, cause: %v", err.cause) +func (err *InputValueError) Error() string { + return fmt.Sprintf("%v input error, cause: %v", err.inputKey, err.cause) } // ResolveGlobalVariables will resolve literal variables "((var.*))" in the @@ -579,20 +591,66 @@ func (b Blueprint) ResolveGlobalVariables(ctyVars map[string]cty.Value) error { return ResolveVariables(ctyVars, origin) } +// isValidLabelValue checks if a string is a valid value for a GCP label. +// For more information on valid label values, see the docs at: +// https://cloud.google.com/resource-manager/docs/creating-managing-labels#requirements +func isValidLabelValue(value string) bool { + return regexp.MustCompile(matchLabelExp).MatchString(value) +} + // DeploymentName returns the deployment_name from the config and does approperate checks. func (b *Blueprint) DeploymentName() (string, error) { nameInterface, found := b.Vars["deployment_name"] if !found { - return "", &DeploymentNameError{"deployment_name variable not defined."} + return "", &InputValueError{ + inputKey: "deployment_name", + cause: errorMessages["varNotFound"], + } } deploymentName, ok := nameInterface.(string) if !ok { - return "", &DeploymentNameError{"deployment_name was not of type string."} + return "", &InputValueError{ + inputKey: "deployment_name", + cause: errorMessages["valueNotString"], + } } if len(deploymentName) == 0 { - return "", &DeploymentNameError{"deployment_name was an empty string."} + return "", &InputValueError{ + inputKey: "deployment_name", + cause: errorMessages["valueEmptyString"], + } + } + + // Check that deployment_name is a valid label + if !isValidLabelValue(deploymentName) { + return "", &InputValueError{ + inputKey: "deployment_name", + cause: errorMessages["labelReqs"], + } } + return deploymentName, nil } + +// checkBlueprintName returns an error if blueprint_name does not comply with +// requirements for correct GCP label values. +func (b *Blueprint) checkBlueprintName() error { + + if len(b.BlueprintName) == 0 { + return &InputValueError{ + inputKey: "blueprint_name", + cause: errorMessages["valueEmptyString"], + } + } + + if !isValidLabelValue(b.BlueprintName) { + return &InputValueError{ + inputKey: "blueprint_name", + cause: errorMessages["labelReqs"], + } + } + + return nil +} diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 5fe2723885..7b8fd9eb03 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -232,6 +232,7 @@ func getBasicDeploymentConfigWithTestModule() DeploymentConfig { } return DeploymentConfig{ Config: Blueprint{ + BlueprintName: "simple", Vars: map[string]interface{}{"deployment_name": "deployment_name"}, DeploymentGroups: []DeploymentGroup{testDeploymentGroup}, }, @@ -318,7 +319,7 @@ func (s *MySuite) TestCheckModuleAndGroupNames(c *C) { func (s *MySuite) TestDeploymentName(c *C) { dc := getDeploymentConfigForTest() - var e *DeploymentNameError + var e *InputValueError // Is deployment_name a valid string? deploymentName, err := dc.Config.DeploymentName() @@ -333,7 +334,25 @@ func (s *MySuite) TestDeploymentName(c *C) { // Is deployment_name not a string? dc.Config.Vars["deployment_name"] = 100 - _, err = dc.Config.DeploymentName() + deploymentName, err = dc.Config.DeploymentName() + c.Assert(deploymentName, Equals, "") + c.Check(errors.As(err, &e), Equals, true) + + // Is deployment_names longer than 63 characters? + dc.Config.Vars["deployment_name"] = "deployment_name-deployment_name-deployment_name-deployment_name-0123" + deploymentName, err = dc.Config.DeploymentName() + c.Assert(deploymentName, Equals, "") + c.Check(errors.As(err, &e), Equals, true) + + // Does deployment_name contain special characters other than dashes or underscores? + dc.Config.Vars["deployment_name"] = "deployment.name" + deploymentName, err = dc.Config.DeploymentName() + c.Assert(deploymentName, Equals, "") + c.Check(errors.As(err, &e), Equals, true) + + // Does deployment_name contain capital letters? + dc.Config.Vars["deployment_name"] = "Deployment_name" + deploymentName, err = dc.Config.DeploymentName() c.Assert(deploymentName, Equals, "") c.Check(errors.As(err, &e), Equals, true) @@ -344,6 +363,40 @@ func (s *MySuite) TestDeploymentName(c *C) { c.Check(errors.As(err, &e), Equals, true) } +func (s *MySuite) TestCheckBlueprintName(c *C) { + dc := getDeploymentConfigForTest() + var e *InputValueError + + // Is blueprint_name a valid string? + err := dc.Config.checkBlueprintName() + c.Assert(err, IsNil) + + // Is blueprint_name a valid string with an underscore and dash? + dc.Config.BlueprintName = "blue-print_name" + err = dc.Config.checkBlueprintName() + c.Check(err, IsNil) + + // Is blueprint_name an empty string? + dc.Config.BlueprintName = "" + err = dc.Config.checkBlueprintName() + c.Check(errors.As(err, &e), Equals, true) + + // Is blueprint_name longer than 63 characters? + dc.Config.BlueprintName = "blueprint-name-blueprint-name-blueprint-name-blueprint-name-0123" + err = dc.Config.checkBlueprintName() + c.Check(errors.As(err, &e), Equals, true) + + // Does blueprint_name contain special characters other than dashes or underscores? + dc.Config.BlueprintName = "blueprint.name" + err = dc.Config.checkBlueprintName() + c.Check(errors.As(err, &e), Equals, true) + + // Does blueprint_name contain capital letters? + dc.Config.BlueprintName = "Blueprint_name" + err = dc.Config.checkBlueprintName() + c.Check(errors.As(err, &e), Equals, true) +} + func (s *MySuite) TestNewBlueprint(c *C) { dc := getDeploymentConfigForTest() outFile := filepath.Join(tmpTestDir, "out_TestNewBlueprint.yaml") diff --git a/pkg/modulewriter/modulewriter_test.go b/pkg/modulewriter/modulewriter_test.go index d2da694b3e..d202b8fb70 100644 --- a/pkg/modulewriter/modulewriter_test.go +++ b/pkg/modulewriter/modulewriter_test.go @@ -227,7 +227,7 @@ func (s *MySuite) TestWriteDeployment(c *C) { func (s *MySuite) TestWriteDeployment_BadDeploymentName(c *C) { testBlueprint := getBlueprintForTest() - var e *config.DeploymentNameError + var e *config.InputValueError testBlueprint.Vars = map[string]interface{}{"deployment_name": 100} err := WriteDeployment(&testBlueprint, testDir, false /* overwriteFlag */) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml index 8a382dc40e..be963b4463 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml @@ -140,7 +140,7 @@ - name: Delete Firewall Rule register: fw_deleted changed_when: fw_deleted.rc == 0 - failed_when: false # keep cleaning up + failed_when: false # keep cleaning up ansible.builtin.command: argv: - gcloud @@ -149,8 +149,8 @@ - delete - "{{ deployment_name }}" - name: Tear Down Pool - changed_when: true # assume something destroyed - failed_when: false # keep cleaning up + changed_when: true # assume something destroyed + failed_when: false # keep cleaning up run_once: true delegate_to: localhost environment: @@ -177,7 +177,7 @@ ansible.builtin.command: cmd: gcloud compute images delete {{ image_name.stdout }} - name: Tear Down Network - changed_when: true # assume something destroyed + changed_when: true # assume something destroyed delegate_to: localhost environment: TF_IN_AUTOMATION: "TRUE" @@ -190,8 +190,8 @@ - name: Run Integration Tests hosts: remote_host - gather_facts: no # must wait until host is reachable - ignore_unreachable: true # ensure always block will run even if SSH fails + gather_facts: false # must wait until host is reachable + ignore_unreachable: true # ensure always block will run even if SSH fails tasks: - name: HTCondor Test Block vars: @@ -218,7 +218,7 @@ loop_var: test always: - name: Tear Down Pool - changed_when: true # assume something destroyed + changed_when: true # assume something destroyed delegate_to: localhost run_once: true environment: @@ -243,7 +243,7 @@ - name: Delete custom image register: image_deleted changed_when: image_deleted.rc == 0 - failed_when: false # keep cleaning up + failed_when: false # keep cleaning up run_once: true delegate_to: localhost ansible.builtin.command: @@ -251,7 +251,7 @@ - name: Delete Firewall Rule register: fw_deleted changed_when: fw_deleted.rc == 0 - failed_when: false # keep cleaning up + failed_when: false # keep cleaning up run_once: true delegate_to: localhost ansible.builtin.command: @@ -262,7 +262,7 @@ - delete - "{{ deployment_name }}" - name: Tear Down Network - changed_when: true # assume something destroyed + changed_when: true # assume something destroyed run_once: true delegate_to: localhost environment: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml index 144b5a4b85..f64847dad1 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml @@ -127,7 +127,7 @@ - name: Delete Firewall Rule register: fw_deleted changed_when: fw_deleted.rc == 0 - failed_when: false # keep cleaning up + failed_when: false # keep cleaning up command: argv: - gcloud @@ -136,7 +136,7 @@ - delete - "{{ deployment_name }}" - name: Tear Down Cluster - changed_when: true # assume something destroyed + changed_when: true # assume something destroyed run_once: true delegate_to: localhost environment: @@ -151,8 +151,8 @@ - name: Run Integration Tests hosts: remote_host - gather_facts: no # must wait until host is reachable - ignore_unreachable: true # ensure always block will run even if SSH fails + gather_facts: false # must wait until host is reachable + ignore_unreachable: true # ensure always block will run even if SSH fails tasks: - name: Slurm Test Block vars: @@ -201,7 +201,7 @@ - name: Delete Firewall Rule register: fw_deleted changed_when: fw_deleted.rc == 0 - failed_when: false # keep cleaning up + failed_when: false # keep cleaning up run_once: true delegate_to: localhost command: @@ -212,7 +212,7 @@ - delete - "{{ deployment_name }}" - name: Tear Down Cluster - changed_when: true # assume something destroyed + changed_when: true # assume something destroyed run_once: true delegate_to: localhost environment: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml index ff7a2630c5..2e46579485 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml @@ -31,6 +31,9 @@ until: result.stdout.find("SUCCEEDED") != -1 retries: 60 delay: 10 + - name: Call batch list command printed in instructions + changed_when: false + ansible.builtin.command: gcloud alpha batch jobs list --project={{ custom_vars.project }} always: - name: delete job diff --git a/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml b/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml index 266f506351..f3d0bfb097 100644 --- a/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml +++ b/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml @@ -28,23 +28,23 @@ deployment_groups: # Source is an embedded module, denoted by "modules/*" without ./, ../, / # as a prefix. To refer to a local or community module, prefix with ./, ../ or / # Example - ./modules/network/pre-existing-vpc - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 settings: network_name: lustre-new-vpc - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home # Explicitly picking the local version of the module - - source: community/modules/file-system/DDN-EXAScaler + - id: scratchfs + source: community/modules/file-system/DDN-EXAScaler kind: terraform - id: scratchfs settings: local_mount: /scratch network_self_link: $(network1.network_self_link) @@ -52,9 +52,9 @@ deployment_groups: subnetwork_address: $(network1.subnetwork_address) # Create a separate workstation to catch regressions in vm-instance - - source: ./modules/compute/vm-instance + - id: workstation + source: ./modules/compute/vm-instance kind: terraform - id: workstation use: - network1 - homefs @@ -63,9 +63,9 @@ deployment_groups: name_prefix: test-workstation machine_type: c2-standard-4 - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -74,18 +74,18 @@ deployment_groups: max_node_count: 2 partition_name: compute - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs - scratchfs - compute_partition - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/tools/cloud-build/daily-tests/blueprints/monitoring.yaml b/tools/cloud-build/daily-tests/blueprints/monitoring.yaml index 9b6731a7c5..5c47f9d102 100644 --- a/tools/cloud-build/daily-tests/blueprints/monitoring.yaml +++ b/tools/cloud-build/daily-tests/blueprints/monitoring.yaml @@ -25,23 +25,23 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/vpc + - id: network + source: modules/network/vpc kind: terraform - id: network settings: network_name: monitoring-net - - source: community/modules/file-system/nfs-server + - id: homefs + source: community/modules/file-system/nfs-server kind: terraform - id: homefs use: [network] settings: local_mounts: [/home] auto_delete_disk: true - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -53,9 +53,9 @@ deployment_groups: - $(homefs.install_nfs_client_runner) - $(homefs.mount_runner) - - source: ./modules/compute/vm-instance + - id: workstation + source: ./modules/compute/vm-instance kind: terraform - id: workstation use: - network - homefs @@ -63,10 +63,10 @@ deployment_groups: settings: machine_type: c2-standard-4 metadata: - enable-oslogin: TRUE + enable-oslogin: true - - source: ./modules/monitoring/dashboard + - id: hpc-dash + source: ./modules/monitoring/dashboard kind: terraform - id: hpc-dash settings: title: $(vars.deployment_name) diff --git a/tools/validate_configs/test_configs/2-nfs-servers.yaml b/tools/validate_configs/test_configs/2-nfs-servers.yaml index f0fb6066ff..26ed5cba0c 100644 --- a/tools/validate_configs/test_configs/2-nfs-servers.yaml +++ b/tools/validate_configs/test_configs/2-nfs-servers.yaml @@ -25,22 +25,22 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: community/modules/file-system/nfs-server + - id: homefs + source: community/modules/file-system/nfs-server kind: terraform - id: homefs use: [network1] outputs: [network_storage] settings: local_mounts: ["/home"] auto_delete_disk: true - - source: ./community/modules/file-system/nfs-server + - id: appsfs + source: ./community/modules/file-system/nfs-server kind: terraform - id: appsfs use: [network1] outputs: [network_storage] settings: diff --git a/tools/validate_configs/test_configs/2filestore-4instances.yaml b/tools/validate_configs/test_configs/2filestore-4instances.yaml index 1146388f60..a1fe2c1291 100644 --- a/tools/validate_configs/test_configs/2filestore-4instances.yaml +++ b/tools/validate_configs/test_configs/2filestore-4instances.yaml @@ -25,13 +25,13 @@ vars: deployment_groups: - group: infrastructure modules: - - source: ./modules/network/vpc + - id: network + source: ./modules/network/vpc kind: terraform - id: network - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network] settings: name: homefs @@ -39,9 +39,9 @@ deployment_groups: labels: ghpc_role: storage-home - - source: ./modules/file-system/filestore + - id: apps + source: ./modules/file-system/filestore kind: terraform - id: apps use: [network] settings: name: apps @@ -49,9 +49,9 @@ deployment_groups: labels: ghpc_role: storage-apps - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -64,9 +64,9 @@ deployment_groups: source: "modules/startup-script/examples/mount.yaml" destination: "mount.yaml" - - source: ./modules/compute/vm-instance + - id: license-server-1 + source: ./modules/compute/vm-instance kind: terraform - id: license-server-1 use: [network] settings: name_prefix: ls1 @@ -74,9 +74,9 @@ deployment_groups: labels: ghpc_role: license - - source: modules/compute/vm-instance + - id: license-server-2 + source: modules/compute/vm-instance kind: terraform - id: license-server-2 use: [network] settings: name_prefix: ls2 @@ -84,9 +84,9 @@ deployment_groups: labels: ghpc_role: license - - source: modules/compute/vm-instance + - id: head-node + source: modules/compute/vm-instance kind: terraform - id: head-node use: - network - homefs @@ -99,9 +99,9 @@ deployment_groups: metadata: startup-script: $(startup.startup_script) - - source: modules/compute/vm-instance + - id: compute + source: modules/compute/vm-instance kind: terraform - id: compute use: - network - homefs diff --git a/tools/validate_configs/test_configs/centos8-ss.yaml b/tools/validate_configs/test_configs/centos8-ss.yaml index 38630837c4..be1e56120e 100644 --- a/tools/validate_configs/test_configs/centos8-ss.yaml +++ b/tools/validate_configs/test_configs/centos8-ss.yaml @@ -25,49 +25,40 @@ vars: deployment_groups: - group: primary modules: - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: appsfs + source: modules/file-system/filestore kind: terraform - id: appsfs use: [network1] settings: name: appsfs local_mount: /apps - - source: community/modules/file-system/nfs-server + - id: nfs + source: community/modules/file-system/nfs-server kind: terraform - id: nfs use: [network1] settings: - image: centos-cloud/centos-stream-8 auto_delete_disk: true - - source: ./community//modules/scripts/spack-install + - id: spack + source: ./community//modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack - spack_ref: v0.17.0 - spack_cache_url: - - mirror_name: 'gcs_cache' - mirror_url: gs://example-buildcache/linux-centos7 + spack_ref: v0.18.0 compilers: - gcc@10.3.0 target=x86_64 packages: - cmake%gcc@10.3.0 target=x86_64 - - intel-mkl%gcc@10.3.0 target=skylake - - intel-mpi@2018.4.274%gcc@10.3.0 target=skylake - - >- - fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5 - target=x86_64 - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -94,13 +85,14 @@ deployment_groups: destination: "install-nfs-client.sh" - $(appsfs.install_nfs_client_runner) - $(nfs.mount_runner) + - $(spack.install_spack_deps_runner) - type: shell content: $(spack.startup_script) destination: "/apps/spack-install.sh" - - source: ./modules/compute/vm-instance + - id: instance + source: ./modules/compute/vm-instance kind: terraform - id: instance use: [network1, startup, nfs, appsfs] settings: machine_type: e2-standard-4 diff --git a/tools/validate_configs/test_configs/cloud-batch-cft-instance-template.yaml b/tools/validate_configs/test_configs/cloud-batch-cft-instance-template.yaml index 0c63bf930a..d3a0ee919f 100644 --- a/tools/validate_configs/test_configs/cloud-batch-cft-instance-template.yaml +++ b/tools/validate_configs/test_configs/cloud-batch-cft-instance-template.yaml @@ -23,19 +23,19 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: appfs + source: modules/file-system/filestore kind: terraform - id: appfs use: [network1] settings: {local_mount: /sw} - - source: modules/scripts/startup-script + - id: batch-startup-script + source: modules/scripts/startup-script kind: terraform - id: batch-startup-script settings: runners: - type: shell @@ -49,9 +49,9 @@ deployment_groups: #!/bin/sh echo "Hello World" > /sw/hello.txt - - source: github.com/terraform-google-modules/terraform-google-vm//modules/instance_template?ref=v7.8.0 + - id: batch-compute-template + source: github.com/terraform-google-modules/terraform-google-vm//modules/instance_template?ref=v7.8.0 kind: terraform - id: batch-compute-template use: [batch-startup-script] settings: # Boiler plate to work with Cloud Foundation Toolkit @@ -65,9 +65,9 @@ deployment_groups: source_image_family: hpc-centos-7 source_image_project: cloud-hpc-image-public - - source: ./community/modules/scheduler/cloud-batch-job + - id: batch-job + source: ./community/modules/scheduler/cloud-batch-job kind: terraform - id: batch-job use: [network1, appfs, batch-startup-script] settings: runnable: "cat /sw/hello.txt" diff --git a/tools/validate_configs/test_configs/complex-data.yaml b/tools/validate_configs/test_configs/complex-data.yaml index f44268a467..2421496c7b 100644 --- a/tools/validate_configs/test_configs/complex-data.yaml +++ b/tools/validate_configs/test_configs/complex-data.yaml @@ -41,15 +41,15 @@ vars: deployment_groups: - group: infrastructure modules: - - source: modules/network/vpc + - id: network + source: modules/network/vpc kind: terraform - id: network settings: network_name: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network] settings: name: homefs @@ -59,9 +59,9 @@ deployment_groups: ghpc_role: storage-home number_label: 44 - - source: modules/scripts/startup-script + - id: startup + source: modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -74,9 +74,9 @@ deployment_groups: source: "modules/startup-script/examples/mount.yaml" destination: mount.yaml - - source: modules/compute/vm-instance + - id: license-server-1 + source: modules/compute/vm-instance kind: terraform - id: license-server-1 use: [network] settings: name_prefix: ls1 diff --git a/tools/validate_configs/test_configs/dashboards.yaml b/tools/validate_configs/test_configs/dashboards.yaml index ca251e5778..42d587cbb7 100644 --- a/tools/validate_configs/test_configs/dashboards.yaml +++ b/tools/validate_configs/test_configs/dashboards.yaml @@ -25,9 +25,9 @@ vars: deployment_groups: - group: primary modules: - - source: modules/monitoring/dashboard + - id: hpc_dash + source: modules/monitoring/dashboard kind: terraform - id: hpc_dash settings: widgets: - | @@ -46,9 +46,9 @@ deployment_groups: }, "title": "HPC Toolkit - TEST 2" } - - source: modules/monitoring/dashboard + - id: empty_dash + source: modules/monitoring/dashboard kind: terraform - id: empty_dash settings: base_dashboard: Empty widgets: diff --git a/tools/validate_configs/test_configs/debian-ss.yaml b/tools/validate_configs/test_configs/debian-ss.yaml index 49c74ee521..8f3a622e0b 100644 --- a/tools/validate_configs/test_configs/debian-ss.yaml +++ b/tools/validate_configs/test_configs/debian-ss.yaml @@ -25,49 +25,40 @@ vars: deployment_groups: - group: primary modules: - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: appsfs + source: modules/file-system/filestore kind: terraform - id: appsfs use: [network1] settings: name: appsfs local_mount: /apps - - source: community/modules/file-system/nfs-server + - id: nfs + source: community/modules/file-system/nfs-server kind: terraform - id: nfs use: [network1] settings: - image: debian-cloud/debian-10 auto_delete_disk: true - - source: ./community//modules/scripts/spack-install + - id: spack + source: ./community//modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack - spack_ref: v0.17.0 - spack_cache_url: - - mirror_name: 'gcs_cache' - mirror_url: gs://example-buildcache/linux-centos7 + spack_ref: v0.18.0 compilers: - gcc@10.3.0 target=x86_64 packages: - cmake%gcc@10.3.0 target=x86_64 - - intel-mkl%gcc@10.3.0 target=skylake - - intel-mpi@2018.4.274%gcc@10.3.0 target=skylake - - >- - fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5 - target=x86_64 - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -93,14 +84,15 @@ deployment_groups: content: $(nfs.install_nfs_client) destination: "install-nfs-client.sh" - $(appsfs.install_nfs_client_runner) - - $(nfs.mount_runner) + - $(appsfs.mount_runner) + - $(spack.install_spack_deps_runner) - type: shell content: $(spack.startup_script) destination: "/apps/spack-install.sh" - - source: ./modules/compute/vm-instance + - id: instance + source: ./modules/compute/vm-instance kind: terraform - id: instance use: [network1, startup, nfs, appsfs] settings: machine_type: e2-standard-4 diff --git a/tools/validate_configs/test_configs/exascaler-existing-vpc.yaml b/tools/validate_configs/test_configs/exascaler-existing-vpc.yaml index 8862f732e6..b242a60aec 100644 --- a/tools/validate_configs/test_configs/exascaler-existing-vpc.yaml +++ b/tools/validate_configs/test_configs/exascaler-existing-vpc.yaml @@ -25,13 +25,13 @@ vars: deployment_groups: - group: primary modules: - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: community/modules/file-system/DDN-EXAScaler + - id: scratchfs + source: community/modules/file-system/DDN-EXAScaler kind: terraform - id: scratchfs use: [network1] settings: local_mount: /scratch diff --git a/tools/validate_configs/test_configs/exascaler-new-vpc.yaml b/tools/validate_configs/test_configs/exascaler-new-vpc.yaml index 5f29f6b761..86b46909fd 100644 --- a/tools/validate_configs/test_configs/exascaler-new-vpc.yaml +++ b/tools/validate_configs/test_configs/exascaler-new-vpc.yaml @@ -25,13 +25,13 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: ./community/modules/file-system/DDN-EXAScaler + - id: scratchfs + source: ./community/modules/file-system/DDN-EXAScaler kind: terraform - id: scratchfs use: [network1] settings: local_mount: /scratch diff --git a/tools/validate_configs/test_configs/gpu.yaml b/tools/validate_configs/test_configs/gpu.yaml index f6c0b30d96..790e0de517 100644 --- a/tools/validate_configs/test_configs/gpu.yaml +++ b/tools/validate_configs/test_configs/gpu.yaml @@ -28,13 +28,13 @@ deployment_groups: # Source is an embedded module, denoted by "modules/*" without ./, ../, / # as a prefix. To refer to a local or community module, prefix with ./, ../ or / # Example - ./modules/network/vpc - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: ./modules/compute/vm-instance + - id: workstation + source: ./modules/compute/vm-instance kind: terraform - id: workstation use: - network1 settings: diff --git a/tools/validate_configs/test_configs/hpc-centos-ss.yaml b/tools/validate_configs/test_configs/hpc-centos-ss.yaml index f1fbdbd030..4e550f8e80 100644 --- a/tools/validate_configs/test_configs/hpc-centos-ss.yaml +++ b/tools/validate_configs/test_configs/hpc-centos-ss.yaml @@ -25,48 +25,40 @@ vars: deployment_groups: - group: primary modules: - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: appsfs + source: modules/file-system/filestore kind: terraform - id: appsfs use: [network1] settings: name: appsfs local_mount: /apps - - source: community/modules/file-system/nfs-server + - id: nfs + source: community/modules/file-system/nfs-server kind: terraform - id: nfs use: [network1] settings: auto_delete_disk: true - - source: ./community//modules/scripts/spack-install + - id: spack + source: ./community//modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack - spack_ref: v0.17.0 - spack_cache_url: - - mirror_name: 'gcs_cache' - mirror_url: gs://example-buildcache/linux-centos7 + spack_ref: v0.18.0 compilers: - gcc@10.3.0 target=x86_64 packages: - cmake%gcc@10.3.0 target=x86_64 - - intel-mkl%gcc@10.3.0 target=skylake - - intel-mpi@2018.4.274%gcc@10.3.0 target=skylake - - >- - fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5 - target=x86_64 - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -93,13 +85,14 @@ deployment_groups: destination: "install-nfs-client.sh" - $(appsfs.install_nfs_client_runner) - $(nfs.mount_runner) + - $(spack.install_spack_deps_runner) - type: shell content: $(spack.startup_script) destination: "/apps/spack-install.sh" - - source: ./modules/compute/vm-instance + - id: instance + source: ./modules/compute/vm-instance kind: terraform - id: instance use: [network1, startup, nfs, appsfs] settings: machine_type: e2-standard-4 diff --git a/tools/validate_configs/test_configs/hpc-cluster-high-io-remote-state.yaml b/tools/validate_configs/test_configs/hpc-cluster-high-io-remote-state.yaml index 9f094a7818..6615ce89df 100644 --- a/tools/validate_configs/test_configs/hpc-cluster-high-io-remote-state.yaml +++ b/tools/validate_configs/test_configs/hpc-cluster-high-io-remote-state.yaml @@ -31,36 +31,36 @@ terraform_backend_defaults: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: modules/file-system/filestore + - id: projectsfs + source: modules/file-system/filestore kind: terraform - id: projectsfs use: [network1] settings: filestore_tier: HIGH_SCALE_SSD size_gb: 10240 local_mount: /projects - - source: community/modules/file-system/DDN-EXAScaler + - id: scratchfs + source: community/modules/file-system/DDN-EXAScaler kind: terraform - id: scratchfs use: [network1] settings: local_mount: /scratch - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - homefs - scratchfs @@ -70,9 +70,9 @@ deployment_groups: max_node_count: 200 partition_name: compute - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - homefs - scratchfs @@ -80,9 +80,9 @@ deployment_groups: - compute_partition - network1 - - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - homefs - scratchfs diff --git a/tools/validate_configs/test_configs/hpc-cluster-project.yaml b/tools/validate_configs/test_configs/hpc-cluster-project.yaml index 622a18d3ec..bf420d690e 100644 --- a/tools/validate_configs/test_configs/hpc-cluster-project.yaml +++ b/tools/validate_configs/test_configs/hpc-cluster-project.yaml @@ -32,18 +32,18 @@ terraform_backend_defaults: deployment_groups: - group: onboarding modules: - - source: ./community/modules/project/new-project + - id: project + source: ./community/modules/project/new-project kind: terraform - id: project settings: project_id: $(vars.project_id) folder_id: 334688113020 # random number billing_account: "111110-M2N704-854685" # random billing number org_id: 123456789 # random org id - - source: ./community/modules/project/service-enablement + - id: enable-apis + source: ./community/modules/project/service-enablement kind: terraform - id: enable-apis use: [project] settings: gcp_service_list: @@ -55,20 +55,20 @@ deployment_groups: # Source is an embedded module, denoted by "modules/*" without ./, ../, / # as a prefix. To refer to a local module, prefix with ./, ../ or / # Example - ./modules/network/vpc - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -78,9 +78,9 @@ deployment_groups: enable_placement: false max_node_count: 20 - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -88,9 +88,9 @@ deployment_groups: settings: login_node_count: 1 - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/tools/validate_configs/test_configs/hpc-cluster-service-acct.yaml b/tools/validate_configs/test_configs/hpc-cluster-service-acct.yaml index 2e14808d4d..4e1068c02b 100644 --- a/tools/validate_configs/test_configs/hpc-cluster-service-acct.yaml +++ b/tools/validate_configs/test_configs/hpc-cluster-service-acct.yaml @@ -25,22 +25,22 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/pre-existing-network-storage + - id: homefs + source: modules/file-system/pre-existing-network-storage kind: terraform - id: homefs settings: server_ip: '$controller' remote_mount: /home local_mount: /home fs_type: nfs - - source: ./community/modules/project/service-account + - id: service_acct + source: ./community/modules/project/service-account kind: terraform - id: service_acct settings: project_id: $(vars.project_id) names: @@ -49,18 +49,18 @@ deployment_groups: - "compute.instanceAdmin.v1" - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute-partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute-partition use: [network1] settings: partition_name: compute network_storage: - $(homefs.network_storage) - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm use: [network1] settings: network_storage: diff --git a/tools/validate_configs/test_configs/hpc-cluster-simple-nfs-sql.yaml b/tools/validate_configs/test_configs/hpc-cluster-simple-nfs-sql.yaml index fdaaa90655..1e9470e3ec 100644 --- a/tools/validate_configs/test_configs/hpc-cluster-simple-nfs-sql.yaml +++ b/tools/validate_configs/test_configs/hpc-cluster-simple-nfs-sql.yaml @@ -25,29 +25,29 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: ./community/modules/file-system/nfs-server + - id: homefs + source: ./community/modules/file-system/nfs-server kind: terraform - id: homefs use: [network1] settings: labels: ghpc_role: storage-home - - source: ./community/modules/database/slurm-cloudsql-federation + - id: slurm-sql + source: ./community/modules/database/slurm-cloudsql-federation kind: terraform - id: slurm-sql use: [network1] settings: sql_instance_name: slurm-sql8 tier: "db-f1-micro" - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute-partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute-partition use: - homefs - network1 @@ -56,9 +56,9 @@ deployment_groups: max_node_count: 20 machine_type: c2-standard-4 - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm-controller + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm-controller use: - homefs - compute-partition @@ -69,9 +69,9 @@ deployment_groups: disable_compute_public_ips: true disable_controller_public_ips: true - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm-login + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm-login use: - slurm-controller - network1 diff --git a/tools/validate_configs/test_configs/hpc-cluster-simple.yaml b/tools/validate_configs/test_configs/hpc-cluster-simple.yaml index d92d61dfb8..bcf2b053b7 100644 --- a/tools/validate_configs/test_configs/hpc-cluster-simple.yaml +++ b/tools/validate_configs/test_configs/hpc-cluster-simple.yaml @@ -25,22 +25,22 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home labels: ghpc_role: storage-home - - source: modules/scripts/startup-script + - id: startup + source: modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -53,9 +53,9 @@ deployment_groups: source: "modules/startup-script/examples/mount.yaml" destination: mount.yaml - - source: modules/compute/vm-instance + - id: workstation + source: modules/compute/vm-instance kind: terraform - id: workstation use: - network1 - homefs diff --git a/tools/validate_configs/test_configs/hpc-cluster-slurm-with-startup.yaml b/tools/validate_configs/test_configs/hpc-cluster-slurm-with-startup.yaml index 0150a5a61b..ae022dbb7a 100644 --- a/tools/validate_configs/test_configs/hpc-cluster-slurm-with-startup.yaml +++ b/tools/validate_configs/test_configs/hpc-cluster-slurm-with-startup.yaml @@ -28,29 +28,29 @@ deployment_groups: # Source is an embedded module, denoted by "modules/*" without ./, ../, / # as a prefix. To refer to a local or community module, prefix with ./, ../ or / # Example - ./modules/network/vpc - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: modules/scripts/startup-script + - id: startup + source: modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell source: "modules/startup-script/examples/install_ansible.sh" destination: install_ansible.sh - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -61,9 +61,9 @@ deployment_groups: partition_name: compute max_node_count: 20 - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - network1 - homefs @@ -73,9 +73,9 @@ deployment_groups: controller_startup_script: $(startup.startup_script) compute_startup_script: $(startup.startup_script) - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - network1 - homefs diff --git a/tools/validate_configs/test_configs/hpc-cluster-small-slurm-v5.yaml b/tools/validate_configs/test_configs/hpc-cluster-small-slurm-v5.yaml index 8e94221836..aba6e5b910 100644 --- a/tools/validate_configs/test_configs/hpc-cluster-small-slurm-v5.yaml +++ b/tools/validate_configs/test_configs/hpc-cluster-small-slurm-v5.yaml @@ -29,20 +29,20 @@ deployment_groups: # Source is an embedded resource, denoted by "resources/*" without ./, ../, / # as a prefix. To refer to a local resource, prefix with ./, ../ or / # Example - ./resources/network/vpc - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: community/modules/compute/schedmd-slurm-gcp-v5-partition + - id: debug_partition + source: community/modules/compute/schedmd-slurm-gcp-v5-partition kind: terraform - id: debug_partition use: - network1 - homefs @@ -53,9 +53,9 @@ deployment_groups: machine_type: n2-standard-2 is_default: true - - source: community/modules/compute/schedmd-slurm-gcp-v5-partition + - id: compute_partition + source: community/modules/compute/schedmd-slurm-gcp-v5-partition kind: terraform - id: compute_partition use: - network1 - homefs @@ -63,18 +63,18 @@ deployment_groups: partition_name: compute node_count_dynamic_max: 20 - - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller + - id: slurm_controller + source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller kind: terraform - id: slurm_controller use: - network1 - debug_partition - compute_partition - homefs - - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login + - id: slurm_login + source: community/modules/scheduler/schedmd-slurm-gcp-v5-login kind: terraform - id: slurm_login use: - network1 - slurm_controller diff --git a/tools/validate_configs/test_configs/htcondor-pool.yaml b/tools/validate_configs/test_configs/htcondor-pool.yaml index 229dd178cb..f61e2c56de 100644 --- a/tools/validate_configs/test_configs/htcondor-pool.yaml +++ b/tools/validate_configs/test_configs/htcondor-pool.yaml @@ -27,25 +27,25 @@ vars: deployment_groups: - group: htcondor-env modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 outputs: - network_name - - source: community/modules/scripts/htcondor-install + - id: htcondor_install + source: community/modules/scripts/htcondor-install kind: terraform - id: htcondor_install - - source: community/modules/project/service-enablement + - id: htcondor_services + source: community/modules/project/service-enablement kind: terraform - id: htcondor_services use: - htcondor_install - - source: modules/scripts/startup-script + - id: htcondor_install_scripts + source: modules/scripts/startup-script kind: terraform - id: htcondor_install_scripts settings: runners: - type: shell @@ -57,39 +57,39 @@ deployment_groups: - group: packer modules: - - source: modules/packer/custom-image + - id: custom-image + source: modules/packer/custom-image kind: packer - id: custom-image settings: image_family: $(vars.htcondor_image_family) - group: pool modules: - - source: modules/network/pre-existing-vpc + - id: cluster_network + source: modules/network/pre-existing-vpc kind: terraform - id: cluster_network - - source: community/modules/scheduler/htcondor-configure + - id: htcondor_configure + source: community/modules/scheduler/htcondor-configure kind: terraform - id: htcondor_configure - - source: modules/scripts/startup-script + - id: htcondor_configure_central_manager + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_central_manager settings: runners: - $(htcondor_configure.central_manager_runner) - - source: modules/scripts/startup-script + - id: htcondor_configure_access_point + source: modules/scripts/startup-script kind: terraform - id: htcondor_configure_access_point settings: runners: - $(htcondor_configure.access_point_runner) - - source: modules/compute/vm-instance + - id: htcondor_cm + source: modules/compute/vm-instance kind: terraform - id: htcondor_cm use: - cluster_network - htcondor_configure_central_manager @@ -107,9 +107,9 @@ deployment_groups: outputs: - internal_ip - - source: modules/compute/vm-instance + - id: htcondor_access + source: modules/compute/vm-instance kind: terraform - id: htcondor_access use: - cluster_network - htcondor_configure_access_point diff --git a/tools/validate_configs/test_configs/instance-with-startup.yaml b/tools/validate_configs/test_configs/instance-with-startup.yaml index 8e0867b9c3..3b13ca6e4a 100644 --- a/tools/validate_configs/test_configs/instance-with-startup.yaml +++ b/tools/validate_configs/test_configs/instance-with-startup.yaml @@ -25,20 +25,20 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: modules/scripts/startup-script + - id: startup + source: modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -51,9 +51,9 @@ deployment_groups: source: "modules/startup-script/examples/mount.yaml" destination: "tmp.sh" - - source: modules/compute/vm-instance + - id: workstation + source: modules/compute/vm-instance kind: terraform - id: workstation use: - network1 - homefs @@ -62,8 +62,8 @@ deployment_groups: metadata: startup-script: $(startup.startup_script) - - source: ./community/modules/scripts/wait-for-startup + - id: wait + source: ./community/modules/scripts/wait-for-startup kind: terraform - id: wait settings: instance_name: ((module.workstation.name[0])) diff --git a/tools/validate_configs/test_configs/label_test.yaml b/tools/validate_configs/test_configs/label_test.yaml index a8bbd72892..b9777b2bbc 100644 --- a/tools/validate_configs/test_configs/label_test.yaml +++ b/tools/validate_configs/test_configs/label_test.yaml @@ -28,13 +28,13 @@ vars: deployment_groups: - group: infrastructure modules: - - source: modules/network/vpc + - id: network + source: modules/network/vpc kind: terraform - id: network - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network] settings: name: homefs @@ -44,9 +44,9 @@ deployment_groups: local_custom: "some_value" ghpc_deployment: "deployment_override" - - source: modules/file-system/filestore + - id: homefs1 + source: modules/file-system/filestore kind: terraform - id: homefs1 use: [network] settings: name: homefs diff --git a/tools/validate_configs/test_configs/new_project.yaml b/tools/validate_configs/test_configs/new_project.yaml index 3cc90da254..06563e5b8a 100644 --- a/tools/validate_configs/test_configs/new_project.yaml +++ b/tools/validate_configs/test_configs/new_project.yaml @@ -22,9 +22,9 @@ vars: deployment_groups: - group: primary modules: - - source: ./community/modules/project/new-project + - id: project + source: ./community/modules/project/new-project kind: terraform - id: project settings: project_id: test_project folder_id: 334688113020 # random number diff --git a/tools/validate_configs/test_configs/overwrite_labels.yaml b/tools/validate_configs/test_configs/overwrite_labels.yaml index d41472eda9..3d4b724bc4 100644 --- a/tools/validate_configs/test_configs/overwrite_labels.yaml +++ b/tools/validate_configs/test_configs/overwrite_labels.yaml @@ -29,13 +29,13 @@ vars: deployment_groups: - group: infrastructure modules: - - source: modules/network/vpc + - id: network + source: modules/network/vpc kind: terraform - id: network - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network] settings: name: homefs @@ -44,9 +44,9 @@ deployment_groups: labels: custom_label: some_value - - source: modules/file-system/filestore + - id: homefs1 + source: modules/file-system/filestore kind: terraform - id: homefs1 use: [network] settings: name: homefs @@ -55,9 +55,9 @@ deployment_groups: labels: ghpc_role: storage-home - - source: modules/file-system/filestore + - id: homefs2 + source: modules/file-system/filestore kind: terraform - id: homefs2 use: [network] settings: name: homefs @@ -66,9 +66,9 @@ deployment_groups: labels: ghpc_deployment: storage_deployment - - source: modules/file-system/filestore + - id: homefs3 + source: modules/file-system/filestore kind: terraform - id: homefs3 use: [network] settings: name: homefs diff --git a/tools/validate_configs/test_configs/packer.yaml b/tools/validate_configs/test_configs/packer.yaml index e93780de16..24af11c96d 100644 --- a/tools/validate_configs/test_configs/packer.yaml +++ b/tools/validate_configs/test_configs/packer.yaml @@ -27,14 +27,14 @@ vars: deployment_groups: - group: network modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - group: packer modules: - - source: modules/packer/custom-image + - id: my-custom-image + source: modules/packer/custom-image kind: packer - id: my-custom-image settings: use_iap: true omit_external_ip: true diff --git a/tools/validate_configs/test_configs/pre-existing-fs.yaml b/tools/validate_configs/test_configs/pre-existing-fs.yaml index df885129fa..6a89e74840 100644 --- a/tools/validate_configs/test_configs/pre-existing-fs.yaml +++ b/tools/validate_configs/test_configs/pre-existing-fs.yaml @@ -29,37 +29,37 @@ deployment_groups: modules: # the pre-existing-vpc is not needed here, since filestore will use the # network-name from deployment vars - - source: modules/file-system/filestore + - id: homefs-filestore + source: modules/file-system/filestore kind: terraform - id: homefs-filestore - group: compute modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/pre-existing-network-storage + - id: homefs + source: modules/file-system/pre-existing-network-storage kind: terraform - id: homefs settings: server_ip: "" # for now, must be completed manually in compute/main.tf remote_mount: nfsshare local_mount: $(vars.local_mount) # automatic, added here for clarity fs_type: nfs - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute-partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute-partition use: - homefs - network1 settings: partition_name: compute - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm use: - homefs - compute-partition diff --git a/tools/validate_configs/test_configs/rocky-linux.yaml b/tools/validate_configs/test_configs/rocky-ss.yaml similarity index 77% rename from tools/validate_configs/test_configs/rocky-linux.yaml rename to tools/validate_configs/test_configs/rocky-ss.yaml index 4eabb31282..b06679749a 100644 --- a/tools/validate_configs/test_configs/rocky-linux.yaml +++ b/tools/validate_configs/test_configs/rocky-ss.yaml @@ -25,49 +25,42 @@ vars: deployment_groups: - group: primary modules: - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: appsfs + source: modules/file-system/filestore kind: terraform - id: appsfs use: [network1] settings: name: appsfs local_mount: /apps - - source: community/modules/file-system/nfs-server + - id: nfs + source: community/modules/file-system/nfs-server kind: terraform - id: nfs use: [network1] settings: image: rocky-linux-cloud/rocky-linux-8 auto_delete_disk: true - - source: ./community//modules/scripts/spack-install + - id: spack + source: ./community//modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack - spack_ref: v0.17.0 + spack_ref: v0.18.0 spack_cache_url: - - mirror_name: 'gcs_cache' - mirror_url: gs://example-buildcache/linux-centos7 compilers: - gcc@10.3.0 target=x86_64 packages: - cmake%gcc@10.3.0 target=x86_64 - - intel-mkl%gcc@10.3.0 target=skylake - - intel-mpi@2018.4.274%gcc@10.3.0 target=skylake - - >- - fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5 - target=x86_64 - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -94,13 +87,14 @@ deployment_groups: destination: "install-nfs-client.sh" - $(appsfs.install_nfs_client_runner) - $(nfs.mount_runner) + - $(spack.install_spack_deps_runner) - type: shell content: $(spack.startup_script) destination: "/apps/spack-install.sh" - - source: ./modules/compute/vm-instance + - id: instance + source: ./modules/compute/vm-instance kind: terraform - id: instance use: [network1, startup, nfs, appsfs] settings: machine_type: e2-standard-4 diff --git a/tools/validate_configs/test_configs/simple-startup.yaml b/tools/validate_configs/test_configs/simple-startup.yaml index 7c75e6e643..3940714717 100644 --- a/tools/validate_configs/test_configs/simple-startup.yaml +++ b/tools/validate_configs/test_configs/simple-startup.yaml @@ -25,13 +25,13 @@ vars: deployment_groups: - group: primary modules: - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -48,15 +48,15 @@ deployment_groups: tar zxvf /tmp/$1 -C / args: "foo.tgz 'Expanding the file'" - - source: ./modules/compute/vm-instance + - id: instance + source: ./modules/compute/vm-instance kind: terraform - id: instance use: [network1, startup] settings: machine_type: e2-standard-4 - - source: ./community/modules/scripts/wait-for-startup + - id: waiter + source: ./community/modules/scripts/wait-for-startup kind: terraform - id: waiter settings: instance_name: ((module.instance.name[0])) diff --git a/tools/validate_configs/test_configs/slurm-two-partitions-workstation.yaml b/tools/validate_configs/test_configs/slurm-two-partitions-workstation.yaml index 7959bb93f5..4f36bb0dde 100644 --- a/tools/validate_configs/test_configs/slurm-two-partitions-workstation.yaml +++ b/tools/validate_configs/test_configs/slurm-two-partitions-workstation.yaml @@ -25,20 +25,20 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/vpc + - id: network1 + source: modules/network/vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home - - source: modules/scripts/startup-script + - id: startup + source: modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -51,9 +51,9 @@ deployment_groups: source: "modules/startup-script/examples/mount.yaml" destination: "tmp.sh" - - source: modules/compute/vm-instance + - id: workstation + source: modules/compute/vm-instance kind: terraform - id: workstation use: - network1 - homefs @@ -63,27 +63,27 @@ deployment_groups: metadata: startup-script: $(startup.startup_script) - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute-partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute-partition use: - homefs - network1 settings: partition_name: compute - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: debug-partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: debug-partition use: - homefs - network1 settings: partition_name: debug - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm use: - homefs - compute-partition diff --git a/tools/validate_configs/test_configs/spack-buildcache.yaml b/tools/validate_configs/test_configs/spack-buildcache.yaml index 1f2803d1f8..b8322d2c94 100644 --- a/tools/validate_configs/test_configs/spack-buildcache.yaml +++ b/tools/validate_configs/test_configs/spack-buildcache.yaml @@ -25,13 +25,13 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: ./community/modules/scripts/spack-install + - id: spack + source: ./community/modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack @@ -51,15 +51,15 @@ deployment_groups: path: '/tmp/spack_key.gpg' caches_to_populate: - type: 'mirror' - path: ## Add GCS bucket to populate here ## + path: ## Add GCS bucket to populate here ## - - source: modules/scripts/startup-script + - id: spack-startup + source: modules/scripts/startup-script kind: terraform - id: spack-startup settings: runners: - type: data - source: ## Add path to GPG key here ## + source: ## Add path to GPG key here ## destination: /tmp/spack_key.gpg - type: shell content: | @@ -80,9 +80,9 @@ deployment_groups: destination: shutdown.sh content: shutdown -h - - source: modules/compute/vm-instance + - id: spack-build + source: modules/compute/vm-instance kind: terraform - id: spack-build use: - network1 - spack-startup diff --git a/tools/validate_configs/test_configs/spack-environments.yaml b/tools/validate_configs/test_configs/spack-environments.yaml index d8d13f8c8a..ffe5ece48b 100644 --- a/tools/validate_configs/test_configs/spack-environments.yaml +++ b/tools/validate_configs/test_configs/spack-environments.yaml @@ -25,13 +25,13 @@ vars: deployment_groups: - group: primary modules: - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: ./community/modules/scripts/spack-install + - id: spack + source: ./community/modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack @@ -79,15 +79,15 @@ deployment_groups: path: '/tmp/spack_key.gpg' caches_to_populate: - type: 'mirror' - path: ## Add GCS bucket to populate here ## + path: ## Add GCS bucket to populate here ## - - source: modules/scripts/startup-script + - id: spack-startup + source: modules/scripts/startup-script kind: terraform - id: spack-startup settings: runners: - type: data - source: ## Add path to GPG key here ## + source: ## Add path to GPG key here ## destination: /tmp/spack_key.gpg - type: shell content: | @@ -108,9 +108,9 @@ deployment_groups: destination: shutdown.sh content: shutdown -h - - source: modules/compute/vm-instance + - id: spack-build + source: modules/compute/vm-instance kind: terraform - id: spack-build use: - network1 - spack-startup diff --git a/tools/validate_configs/test_configs/startup-options.yaml b/tools/validate_configs/test_configs/startup-options.yaml index 0579f6180d..cbfe2764e3 100644 --- a/tools/validate_configs/test_configs/startup-options.yaml +++ b/tools/validate_configs/test_configs/startup-options.yaml @@ -25,13 +25,13 @@ vars: deployment_groups: - group: primary modules: - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -48,35 +48,35 @@ deployment_groups: tar zxvf /tmp/$1 -C / args: "foo.tgz 'Expanding the file'" - - source: ./modules/compute/vm-instance + - id: instance-explicit-startup + source: ./modules/compute/vm-instance kind: terraform - id: instance-explicit-startup use: [network1] settings: name_prefix: explicit machine_type: e2-standard-4 startup_script: $(startup.startup_script) - - source: ./modules/compute/vm-instance + - id: instance-no-startup + source: ./modules/compute/vm-instance kind: terraform - id: instance-no-startup use: [network1] settings: name_prefix: no-startup machine_type: e2-standard-4 - - source: ./modules/compute/vm-instance + - id: instance-use-startup + source: ./modules/compute/vm-instance kind: terraform - id: instance-use-startup use: [network1, startup] settings: name_prefix: use-startup machine_type: e2-standard-4 startup_script: $(startup.startup_script) - - source: ./modules/compute/vm-instance + - id: instance-metadata-startup + source: ./modules/compute/vm-instance kind: terraform - id: instance-metadata-startup use: [network1] settings: name_prefix: metadata-startup diff --git a/tools/validate_configs/test_configs/test_outputs.yaml b/tools/validate_configs/test_configs/test_outputs.yaml index 1a9ed048df..103f038974 100644 --- a/tools/validate_configs/test_configs/test_outputs.yaml +++ b/tools/validate_configs/test_configs/test_outputs.yaml @@ -25,15 +25,15 @@ vars: deployment_groups: - group: primary modules: - - source: modules/compute/vm-instance + - id: instance + source: modules/compute/vm-instance kind: terraform - id: instance outputs: - name - - source: community/modules/database/slurm-cloudsql-federation + - id: sql + source: community/modules/database/slurm-cloudsql-federation kind: terraform - id: sql outputs: - cloudsql settings: @@ -42,24 +42,24 @@ deployment_groups: nat_ips: - ip - - source: modules/file-system/filestore + - id: filestore + source: modules/file-system/filestore kind: terraform - id: filestore use: [vpc] outputs: - network_storage - install_nfs_client - - source: ./community/modules/file-system/nfs-server + - id: nfs + source: ./community/modules/file-system/nfs-server kind: terraform - id: nfs outputs: - network_storage - install_nfs_client - - source: modules/file-system/pre-existing-network-storage + - id: pre-existing-storage + source: modules/file-system/pre-existing-network-storage kind: terraform - id: pre-existing-storage outputs: - network_storage settings: @@ -68,9 +68,9 @@ deployment_groups: local_mount: /home fs_type: nfs - - source: modules/network/pre-existing-vpc + - id: pre-existing-vpc + source: modules/network/pre-existing-vpc kind: terraform - id: pre-existing-vpc outputs: - network_name - network_self_link @@ -79,9 +79,9 @@ deployment_groups: - subnetwork_self_link - subnetwork_address - - source: modules/network/vpc + - id: vpc + source: modules/network/vpc kind: terraform - id: vpc outputs: - network_name - network_self_link @@ -91,9 +91,9 @@ deployment_groups: - subnetwork_address - nat_ips - - source: community/modules/project/new-project + - id: new-project + source: community/modules/project/new-project kind: terraform - id: new-project outputs: - project_name - project_id @@ -117,9 +117,9 @@ deployment_groups: billing_account: "111110-M2N704-854685" # random billing number org_id: 123456789 # random org id - - source: community/modules/project/service-account + - id: sa + source: community/modules/project/service-account kind: terraform - id: sa outputs: - email - emails @@ -138,31 +138,31 @@ deployment_groups: project_roles: - "compute.instanceAdmin.v1" - - source: community/modules/scripts/spack-install + - id: spack + source: community/modules/scripts/spack-install kind: terraform - id: spack outputs: - startup_script - controller_startup_script - - source: modules/scripts/startup-script + - id: startup + source: modules/scripts/startup-script kind: terraform - id: startup outputs: - startup_script - - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: partition + source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: partition use: [vpc] outputs: - partition settings: partition_name: compute - - source: ./community/modules/file-system/DDN-EXAScaler + - id: lustre + source: ./community/modules/file-system/DDN-EXAScaler kind: terraform - id: lustre outputs: - private_addresses - ssh_console @@ -170,9 +170,9 @@ deployment_groups: - http_console - network_storage - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: controller + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: controller use: - partition - vpc diff --git a/tools/validate_configs/test_configs/threads_per_core.yaml b/tools/validate_configs/test_configs/threads_per_core.yaml index 441f809226..de06cab879 100644 --- a/tools/validate_configs/test_configs/threads_per_core.yaml +++ b/tools/validate_configs/test_configs/threads_per_core.yaml @@ -28,13 +28,13 @@ deployment_groups: # Source is an embedded module, denoted by "modules/*" without ./, ../, / # as a prefix. To refer to a local or community module, prefix with ./, ../ or / # Example - ./modules/network/vpc - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: ./modules/compute/vm-instance + - id: n1-2-threads + source: ./modules/compute/vm-instance kind: terraform - id: n1-2-threads use: - network1 settings: @@ -42,9 +42,9 @@ deployment_groups: machine_type: n1-standard-32 threads_per_core: 2 - - source: ./modules/compute/vm-instance + - id: n1-1-thread + source: ./modules/compute/vm-instance kind: terraform - id: n1-1-thread use: - network1 settings: @@ -52,9 +52,9 @@ deployment_groups: machine_type: n1-standard-32 threads_per_core: 1 - - source: ./modules/compute/vm-instance + - id: n1-0-threads + source: ./modules/compute/vm-instance kind: terraform - id: n1-0-threads use: - network1 settings: @@ -62,9 +62,9 @@ deployment_groups: machine_type: n1-standard-32 threads_per_core: 0 - - source: ./modules/compute/vm-instance + - id: n1-null-threads + source: ./modules/compute/vm-instance kind: terraform - id: n1-null-threads use: - network1 settings: @@ -72,9 +72,9 @@ deployment_groups: machine_type: n1-standard-32 threads_per_core: null - - source: ./modules/compute/vm-instance + - id: n2-2-threads + source: ./modules/compute/vm-instance kind: terraform - id: n2-2-threads use: - network1 settings: @@ -82,9 +82,9 @@ deployment_groups: machine_type: n2-standard-32 threads_per_core: 2 - - source: ./modules/compute/vm-instance + - id: n2-1-thread + source: ./modules/compute/vm-instance kind: terraform - id: n2-1-thread use: - network1 settings: @@ -92,9 +92,9 @@ deployment_groups: machine_type: n2-standard-32 threads_per_core: 1 - - source: ./modules/compute/vm-instance + - id: c2-2-threads + source: ./modules/compute/vm-instance kind: terraform - id: c2-2-threads use: - network1 settings: @@ -102,9 +102,9 @@ deployment_groups: machine_type: c2-standard-30 threads_per_core: 2 - - source: ./modules/compute/vm-instance + - id: c2-1-thread + source: ./modules/compute/vm-instance kind: terraform - id: c2-1-thread use: - network1 settings: @@ -112,9 +112,9 @@ deployment_groups: machine_type: c2-standard-30 threads_per_core: 1 - - source: ./modules/compute/vm-instance + - id: e2-medium-0-thread + source: ./modules/compute/vm-instance kind: terraform - id: e2-medium-0-thread use: - network1 settings: @@ -122,9 +122,9 @@ deployment_groups: machine_type: e2-medium threads_per_core: 0 - - source: ./modules/compute/vm-instance + - id: e2-medium-null-thread + source: ./modules/compute/vm-instance kind: terraform - id: e2-medium-null-thread use: - network1 settings: diff --git a/tools/validate_configs/test_configs/ubuntu-ss.yaml b/tools/validate_configs/test_configs/ubuntu-ss.yaml index 2fb18d80b6..b2cf676059 100644 --- a/tools/validate_configs/test_configs/ubuntu-ss.yaml +++ b/tools/validate_configs/test_configs/ubuntu-ss.yaml @@ -25,36 +25,33 @@ vars: deployment_groups: - group: primary modules: - - source: ./modules/network/pre-existing-vpc + - id: network1 + source: ./modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: appsfs + source: modules/file-system/filestore kind: terraform - id: appsfs use: [network1] settings: name: appsfs local_mount: /apps - - source: community/modules/file-system/nfs-server + - id: nfs + source: community/modules/file-system/nfs-server kind: terraform - id: nfs use: [network1] settings: - image: ubuntu-os-cloud/ubuntu-1804-lts auto_delete_disk: true - - source: ./community//modules/scripts/spack-install + - id: spack + source: ./community//modules/scripts/spack-install kind: terraform - id: spack settings: install_dir: /apps/spack spack_url: https://github.com/spack/spack - spack_ref: v0.17.0 + spack_ref: v0.18.0 spack_cache_url: - - mirror_name: 'gcs_cache' - mirror_url: gs://example-buildcache/linux-centos7 compilers: - gcc@10.3.0 target=x86_64 packages: @@ -65,9 +62,9 @@ deployment_groups: fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5 target=x86_64 - - source: ./modules/scripts/startup-script + - id: startup + source: ./modules/scripts/startup-script kind: terraform - id: startup settings: runners: - type: shell @@ -94,13 +91,14 @@ deployment_groups: destination: "install-nfs-client.sh" - $(appsfs.install_nfs_client_runner) - $(nfs.mount_runner) + - $(spack.install_spack_deps_runner) - type: shell content: $(spack.startup_script) destination: "/apps/spack-install.sh" - - source: ./modules/compute/vm-instance + - id: instance + source: ./modules/compute/vm-instance kind: terraform - id: instance use: [network1, startup, nfs, appsfs] settings: machine_type: e2-standard-4 diff --git a/tools/validate_configs/test_configs/use-resources.yaml b/tools/validate_configs/test_configs/use-resources.yaml index 4897534fd1..5ef30961e9 100644 --- a/tools/validate_configs/test_configs/use-resources.yaml +++ b/tools/validate_configs/test_configs/use-resources.yaml @@ -28,36 +28,36 @@ deployment_groups: # Source is an embedded module, denoted by "modules/*" without ./, ../, # / as a prefix. To refer to a local module, prefix with ./, ../ or / # Example - ./modules/network/pre-existing-vpc - - source: modules/network/pre-existing-vpc + - id: network1 + source: modules/network/pre-existing-vpc kind: terraform - id: network1 - - source: modules/file-system/filestore + - id: homefs + source: modules/file-system/filestore kind: terraform - id: homefs use: [network1] settings: local_mount: /home network_name: $(network1.network_name) - - source: community/modules/file-system/nfs-server + - id: projectsfs + source: community/modules/file-system/nfs-server kind: terraform - id: projectsfs use: [network1] - - source: community/modules/file-system/DDN-EXAScaler + - id: scratchfs + source: community/modules/file-system/DDN-EXAScaler kind: terraform - id: scratchfs settings: local_mount: /scratch network_self_link: $(network1.network_self_link) subnetwork_self_link: $(network1.subnetwork_self_link) subnetwork_address: $(network1.subnetwork_address) - - source: community/modules/compute/SchedMD-slurm-on-gcp-partition + - id: compute_partition + source: community/modules/compute/SchedMD-slurm-on-gcp-partition kind: terraform - id: compute_partition use: - homefs - scratchfs @@ -66,17 +66,17 @@ deployment_groups: max_node_count: 200 partition_name: compute - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller + - id: slurm_controller + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller kind: terraform - id: slurm_controller use: - projectsfs - compute_partition - network1 - - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node + - id: slurm_login + source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node kind: terraform - id: slurm_login use: - homefs - scratchfs diff --git a/tools/validate_configs/validate_configs.sh b/tools/validate_configs/validate_configs.sh index 5a75cf951d..0e746caf83 100755 --- a/tools/validate_configs/validate_configs.sh +++ b/tools/validate_configs/validate_configs.sh @@ -19,7 +19,7 @@ run_test() { example=$1 tmpdir="$(mktemp -d)" exampleFile=$(basename "$example") - DEPLOYMENT="${exampleFile%.yaml}-$(basename "${tmpdir##*.}")" + DEPLOYMENT=$(echo "${exampleFile%.yaml}-$(basename "${tmpdir##*.}")" | sed -e 's/\(.*\)/\L\1/') PROJECT="invalid-project" echo "testing ${example} in ${tmpdir}"