diff --git a/cmd/root.go b/cmd/root.go
index 67073d912e..a533033863 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`,
log.Fatalf("cmd.Help function failed: %s", err)
}
},
- Version: "v1.3.0",
+ Version: "v1.4.0",
}
)
diff --git a/community/examples/cloud-batch.yaml b/community/examples/cloud-batch.yaml
index 80ba4cfb51..acfede4f20 100644
--- a/community/examples/cloud-batch.yaml
+++ b/community/examples/cloud-batch.yaml
@@ -27,19 +27,19 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: appfs
+ source: modules/file-system/filestore
kind: terraform
- id: appfs
use: [network1]
settings: {local_mount: /sw}
- - source: modules/scripts/startup-script
+ - id: hello-startup-script
+ source: modules/scripts/startup-script
kind: terraform
- id: hello-startup-script
settings:
runners:
- type: shell
@@ -53,9 +53,9 @@ deployment_groups:
#!/bin/sh
echo "Hello World" > /sw/hello.txt
- - source: community/modules/scheduler/cloud-batch-job
+ - id: batch-job
+ source: community/modules/scheduler/cloud-batch-job
kind: terraform
- id: batch-job
use: [network1, appfs, hello-startup-script]
settings:
runnable: "cat /sw/hello.txt"
@@ -64,8 +64,8 @@ deployment_groups:
family: centos-7
project: centos-cloud
- - source: community/modules/scheduler/cloud-batch-login-node
+ - id: batch-login
+ source: community/modules/scheduler/cloud-batch-login-node
kind: terraform
- id: batch-login
use: [batch-job]
outputs: [instructions]
diff --git a/community/examples/hpc-cluster-small-sharedvpc.yaml b/community/examples/hpc-cluster-small-sharedvpc.yaml
index 4b87fbffd1..e70e22ba80 100644
--- a/community/examples/hpc-cluster-small-sharedvpc.yaml
+++ b/community/examples/hpc-cluster-small-sharedvpc.yaml
@@ -41,17 +41,17 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
settings:
project_id: $(vars.host_project_id)
network_name: your-shared-network
subnetwork_name: your-shared-subnetwork
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
@@ -59,9 +59,9 @@ deployment_groups:
network_name: $(network1.network_id)
# This debug_partition will work out of the box without requesting additional GCP quota.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: debug_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: debug_partition
use:
- network1
- homefs
@@ -73,9 +73,9 @@ deployment_groups:
machine_type: n2-standard-2
# This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -83,9 +83,9 @@ deployment_groups:
partition_name: compute
max_node_count: 20
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -95,9 +95,9 @@ deployment_groups:
login_node_count: 1
shared_vpc_host_project: $(vars.host_project_id)
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/community/examples/htcondor-pool.yaml b/community/examples/htcondor-pool.yaml
index cb798fac3e..f4d82168cd 100644
--- a/community/examples/htcondor-pool.yaml
+++ b/community/examples/htcondor-pool.yaml
@@ -27,32 +27,32 @@ vars:
deployment_groups:
- group: htcondor
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
settings:
network_name: htcondor-pool
subnetwork_name: htcondor-pool-usc1
outputs:
- network_name
- - source: community/modules/scripts/htcondor-install
+ - id: htcondor_install
+ source: community/modules/scripts/htcondor-install
kind: terraform
- id: htcondor_install
- - source: community/modules/project/service-enablement
+ - id: htcondor_services
+ source: community/modules/project/service-enablement
kind: terraform
- id: htcondor_services
use:
- htcondor_install
- - source: community/modules/scheduler/htcondor-configure
+ - id: htcondor_configure
+ source: community/modules/scheduler/htcondor-configure
kind: terraform
- id: htcondor_configure
- - source: modules/scripts/startup-script
+ - id: htcondor_configure_central_manager
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_central_manager
settings:
runners:
- type: shell
@@ -61,9 +61,9 @@ deployment_groups:
- $(htcondor_install.install_htcondor_runner)
- $(htcondor_configure.central_manager_runner)
- - source: modules/compute/vm-instance
+ - id: htcondor_cm
+ source: modules/compute/vm-instance
kind: terraform
- id: htcondor_cm
use:
- network1
- htcondor_configure_central_manager
@@ -78,9 +78,9 @@ deployment_groups:
outputs:
- internal_ip
- - source: modules/scripts/startup-script
+ - id: htcondor_configure_execute_point
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_execute_point
settings:
runners:
- type: shell
@@ -89,9 +89,9 @@ deployment_groups:
- $(htcondor_install.install_htcondor_runner)
- $(htcondor_configure.execute_point_runner)
- - source: community/modules/compute/htcondor-execute-point
+ - id: htcondor_execute_point
+ source: community/modules/compute/htcondor-execute-point
kind: terraform
- id: htcondor_execute_point
use:
- network1
- htcondor_configure_execute_point
@@ -104,9 +104,9 @@ deployment_groups:
scopes:
- cloud-platform
- - source: modules/scripts/startup-script
+ - id: htcondor_configure_access_point
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_access_point
settings:
runners:
- type: shell
@@ -128,9 +128,9 @@ deployment_groups:
request_cpus = 1
request_memory = 100MB
queue
- - source: modules/compute/vm-instance
+ - id: htcondor_access
+ source: modules/compute/vm-instance
kind: terraform
- id: htcondor_access
use:
- network1
- htcondor_configure_access_point
diff --git a/community/examples/intel/daos-cluster.yaml b/community/examples/intel/daos-cluster.yaml
index ec7864d4a8..0fab4f4431 100644
--- a/community/examples/intel/daos-cluster.yaml
+++ b/community/examples/intel/daos-cluster.yaml
@@ -28,16 +28,16 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
# This module creates a DAOS server. Server images MUST be created before running this.
# https://github.com/daos-stack/google-cloud-daos/tree/main/images
# more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server
- - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
+ - id: daos-server
+ source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
kind: terraform
- id: daos-server
use: [network1]
settings:
number_of_instances: 2
@@ -46,9 +46,9 @@ deployment_groups:
# This module creates a MIG with DAOS clients. Client images MUST be created before running this.
# https://github.com/daos-stack/google-cloud-daos/tree/main/images
# more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_client
- - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.2.1
+ - id: daos-client
+ source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.2.1
kind: terraform
- id: daos-client
use: [network1, daos-server]
settings:
number_of_instances: 2
diff --git a/community/examples/intel/daos-slurm.yaml b/community/examples/intel/daos-slurm.yaml
index 10d2378a98..beb5598b3b 100644
--- a/community/examples/intel/daos-slurm.yaml
+++ b/community/examples/intel/daos-slurm.yaml
@@ -28,13 +28,13 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: "/home"
@@ -42,9 +42,9 @@ deployment_groups:
# This module creates a DAOS server. Server images MUST be created before running this.
# https://github.com/daos-stack/google-cloud-daos/tree/main/images
# more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server
- - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
+ - id: daos
+ source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
kind: terraform
- id: daos
use: [network1]
settings:
labels: {ghpc_role: file-system}
@@ -68,9 +68,9 @@ deployment_groups:
reclaim: "lazy"
containers: []
- - source: modules/scripts/startup-script
+ - id: daos-client-script
+ source: modules/scripts/startup-script
kind: terraform
- id: daos-client-script
settings:
runners:
- type: shell
@@ -87,9 +87,9 @@ deployment_groups:
destination: /var/daos/daos_client_config.sh
## This debug_partition will work out of the box without requesting additional GCP quota.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: debug_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: debug_partition
use:
- network1
- homefs
@@ -100,9 +100,9 @@ deployment_groups:
machine_type: n2-standard-2
# This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -110,9 +110,9 @@ deployment_groups:
partition_name: compute
max_node_count: 20
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -127,9 +127,9 @@ deployment_groups:
- "https://www.googleapis.com/auth/devstorage.read_only"
- "https://www.googleapis.com/auth/cloud-platform"
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/community/examples/intel/hpc-cluster-intel-select.yaml b/community/examples/intel/hpc-cluster-intel-select.yaml
index ecfb42a703..6e6372a855 100644
--- a/community/examples/intel/hpc-cluster-intel-select.yaml
+++ b/community/examples/intel/hpc-cluster-intel-select.yaml
@@ -31,12 +31,12 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/scripts/startup-script
+ - id: startup_controller
+ source: modules/scripts/startup-script
kind: terraform
- id: startup_controller
settings:
runners:
- type: shell
@@ -47,9 +47,9 @@ deployment_groups:
google_install_mpi --prefix /apps --intel_compliance
outputs:
- startup_script
- - source: modules/scripts/startup-script
+ - id: startup_compute
+ source: modules/scripts/startup-script
kind: terraform
- id: startup_compute
settings:
runners:
- type: shell
@@ -78,17 +78,17 @@ deployment_groups:
- startup_script
- group: packer
modules:
- - source: modules/packer/custom-image
+ - id: controller-image
+ source: modules/packer/custom-image
kind: packer
- id: controller-image
settings:
disk_size: 20
source_image_project_id: [schedmd-slurm-public]
source_image_family: schedmd-slurm-21-08-8-hpc-centos-7
image_family: $(vars.controller_image_family)
- - source: modules/packer/custom-image
+ - id: compute-image
+ source: modules/packer/custom-image
kind: packer
- id: compute-image
settings:
disk_size: 20
source_image_project_id: [schedmd-slurm-public]
@@ -96,20 +96,20 @@ deployment_groups:
image_family: $(vars.compute_image_family)
- group: cluster
modules:
- - source: modules/network/pre-existing-vpc
+ - id: cluster-network
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: cluster-network
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use:
- cluster-network
settings:
local_mount: /home
# This debug_partition will work out of the box without requesting additional GCP quota.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: debug_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: debug_partition
use:
- cluster-network
- homefs
@@ -122,9 +122,9 @@ deployment_groups:
instance_image:
family: $(vars.compute_image_family)
project: $(vars.project_id)
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- cluster-network
- homefs
@@ -135,9 +135,9 @@ deployment_groups:
project: $(vars.project_id)
max_node_count: 100
machine_type: c2-standard-60
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- cluster-network
- compute_partition
@@ -148,9 +148,9 @@ deployment_groups:
family: $(vars.controller_image_family)
project: $(vars.project_id)
controller_machine_type: c2-standard-4
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- cluster-network
- slurm_controller
diff --git a/community/examples/omnia-cluster.yaml b/community/examples/omnia-cluster.yaml
index 83496d96e4..63871743d5 100644
--- a/community/examples/omnia-cluster.yaml
+++ b/community/examples/omnia-cluster.yaml
@@ -21,6 +21,9 @@ vars:
deployment_name: omnia-cluster
zone: us-central1-c
region: us-central1
+ instance_image:
+ family: rocky-linux-8
+ project: rocky-linux-cloud
# Documentation for each of the modules used below can be found at
# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md
@@ -30,30 +33,30 @@ deployment_groups:
modules:
## Network
- - source: modules/network/vpc
+ - id: network
+ source: modules/network/vpc
kind: terraform
- id: network
## File Systems
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network]
settings:
local_mount: "/home"
## Installation Scripts
- - source: community/modules/scripts/omnia-install
+ - id: omnia
+ source: community/modules/scripts/omnia-install
kind: terraform
- id: omnia
outputs: [inventory_file, omnia_user_warning]
settings:
manager_ips: [localhost]
compute_ips: $(compute.internal_ip)
- - source: modules/scripts/startup-script
+ - id: startup-manager
+ source: modules/scripts/startup-script
kind: terraform
- id: startup-manager
settings:
runners:
- type: shell
@@ -65,9 +68,9 @@ deployment_groups:
- $(omnia.copy_inventory_runner)
- $(omnia.install_omnia_runner)
- - source: modules/scripts/startup-script
+ - id: startup-compute
+ source: modules/scripts/startup-script
kind: terraform
- id: startup-compute
settings:
runners:
- type: shell
@@ -78,9 +81,9 @@ deployment_groups:
- $(omnia.setup_omnia_node_runner)
## Compute
- - source: modules/compute/vm-instance
+ - id: manager
+ source: modules/compute/vm-instance
kind: terraform
- id: manager
use:
- network
- homefs
@@ -89,9 +92,9 @@ deployment_groups:
name_prefix: omnia-manager
machine_type: n2-standard-4
- - source: modules/compute/vm-instance
+ - id: compute
+ source: modules/compute/vm-instance
kind: terraform
- id: compute
use:
- network
- homefs
@@ -101,9 +104,9 @@ deployment_groups:
instance_count: 2
# This module simply makes terraform wait until the startup script is complete
- - source: community/modules/scripts/wait-for-startup
+ - id: wait
+ source: community/modules/scripts/wait-for-startup
kind: terraform
- id: wait
use:
- network
settings:
diff --git a/community/examples/slurm-gcp-v5-hpc-centos7.yaml b/community/examples/slurm-gcp-v5-hpc-centos7.yaml
index 44b847419d..e913af43f7 100644
--- a/community/examples/slurm-gcp-v5-hpc-centos7.yaml
+++ b/community/examples/slurm-gcp-v5-hpc-centos7.yaml
@@ -31,20 +31,20 @@ deployment_groups:
# Source is an embedded resource, denoted by "resources/*" without ./, ../, /
# as a prefix. To refer to a local resource, prefix with ./, ../ or /
# Example - ./resources/network/vpc
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: community/modules/compute/schedmd-slurm-gcp-v5-partition
+ - id: debug_partition
+ source: community/modules/compute/schedmd-slurm-gcp-v5-partition
kind: terraform
- id: debug_partition
use:
- network1
- homefs
@@ -55,9 +55,9 @@ deployment_groups:
machine_type: n2-standard-2
is_default: true
- - source: community/modules/compute/schedmd-slurm-gcp-v5-partition
+ - id: compute_partition
+ source: community/modules/compute/schedmd-slurm-gcp-v5-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -65,18 +65,18 @@ deployment_groups:
partition_name: compute
node_count_dynamic_max: 20
- - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
kind: terraform
- id: slurm_controller
use:
- network1
- debug_partition
- compute_partition
- homefs
- - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
+ - id: slurm_login
+ source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
kind: terraform
- id: slurm_login
use:
- network1
- slurm_controller
diff --git a/community/examples/slurm-gcp-v5-ubuntu2004.yaml b/community/examples/slurm-gcp-v5-ubuntu2004.yaml
index c5af848861..f42e707147 100644
--- a/community/examples/slurm-gcp-v5-ubuntu2004.yaml
+++ b/community/examples/slurm-gcp-v5-ubuntu2004.yaml
@@ -31,20 +31,20 @@ deployment_groups:
# Source is an embedded resource, denoted by "resources/*" without ./, ../, /
# as a prefix. To refer to a local resource, prefix with ./, ../ or /
# Example - ./resources/network/vpc
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: community/modules/compute/schedmd-slurm-gcp-v5-partition
+ - id: debug_partition
+ source: community/modules/compute/schedmd-slurm-gcp-v5-partition
kind: terraform
- id: debug_partition
use:
- network1
- homefs
@@ -55,9 +55,9 @@ deployment_groups:
machine_type: n2-standard-2
is_default: true
- - source: community/modules/compute/schedmd-slurm-gcp-v5-partition
+ - id: compute_partition
+ source: community/modules/compute/schedmd-slurm-gcp-v5-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -65,18 +65,18 @@ deployment_groups:
partition_name: compute
node_count_dynamic_max: 20
- - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
kind: terraform
- id: slurm_controller
use:
- network1
- debug_partition
- compute_partition
- homefs
- - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
+ - id: slurm_login
+ source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
kind: terraform
- id: slurm_login
use:
- network1
- slurm_controller
diff --git a/community/examples/spack-gromacs.yaml b/community/examples/spack-gromacs.yaml
index 2ab1e88049..ef499f81e9 100644
--- a/community/examples/spack-gromacs.yaml
+++ b/community/examples/spack-gromacs.yaml
@@ -28,29 +28,29 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
## Filesystems
- - source: modules/file-system/filestore
+ - id: appsfs
+ source: modules/file-system/filestore
kind: terraform
- id: appsfs
use: [network1]
settings:
local_mount: /sw
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
## Install Scripts
- - source: community/modules/scripts/spack-install
+ - id: spack
+ source: community/modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /sw/spack
spack_url: https://github.com/spack/spack
@@ -83,9 +83,9 @@ deployment_groups:
# - mirror_name: gcs_cache
# mirror_url: gs://bucket-name/...
- - source: modules/scripts/startup-script
+ - id: spack-startup
+ source: modules/scripts/startup-script
kind: terraform
- id: spack-startup
settings:
runners:
- type: shell
@@ -94,9 +94,9 @@ deployment_groups:
- $(spack.install_spack_deps_runner)
- $(spack.install_spack_runner)
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -105,9 +105,9 @@ deployment_groups:
partition_name: compute
max_node_count: 20
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -116,9 +116,9 @@ deployment_groups:
settings:
login_node_count: 1
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/community/modules/compute/SchedMD-slurm-on-gcp-partition/README.md b/community/modules/compute/SchedMD-slurm-on-gcp-partition/README.md
index c9863995e5..6b9d45500e 100644
--- a/community/modules/compute/SchedMD-slurm-on-gcp-partition/README.md
+++ b/community/modules/compute/SchedMD-slurm-on-gcp-partition/README.md
@@ -18,9 +18,9 @@ The following code snippet creates a partition module with:
* Mounted to homefs via `use`
```yaml
-- source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+- id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use: [network1, homefs]
settings:
max_node_count: 200
diff --git a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf
index ed0c14c6cb..2aec7e09c7 100644
--- a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf
+++ b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf
@@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/community/modules/compute/htcondor-execute-point/README.md b/community/modules/compute/htcondor-execute-point/README.md
index f335c41d8c..a54a4e3234 100644
--- a/community/modules/compute/htcondor-execute-point/README.md
+++ b/community/modules/compute/htcondor-execute-point/README.md
@@ -22,9 +22,9 @@ a startup script and network created in previous steps.
> OS Login on all HTCondor nodes, including execute points.
```yaml
-- source: community/modules/compute/htcondor-execute-point
+- id: htcondor_execute_point
+ source: community/modules/compute/htcondor-execute-point
kind: terraform
- id: htcondor_execute_point
use:
- network1
- htcondor_configure_execute_point
diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md
index c344404a00..d977d584fd 100644
--- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md
+++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md
@@ -71,7 +71,7 @@ No providers.
| Name | Source | Version |
|------|--------|---------|
-| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | v5.0.3 |
+| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | v5.1.0 |
## Resources
@@ -82,6 +82,7 @@ No resources.
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. |
list(object({
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no |
+| [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no |
| [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example. | `bool` | `false` | no |
| [deployment\_name](#input\_deployment\_name) | Name of the deployment. | `string` | n/a | yes |
| [disable\_smt](#input\_disable\_smt) | Disables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no |
diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf
index 1f42304480..e44081e99a 100644
--- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf
+++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf
@@ -29,6 +29,7 @@ locals {
# Template By Definition
additional_disks = var.additional_disks
+ bandwidth_tier = var.bandwidth_tier
can_ip_forward = var.can_ip_forward
disable_smt = var.disable_smt
disk_auto_delete = var.disk_auto_delete
@@ -69,7 +70,7 @@ locals {
module "slurm_partition" {
- source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=v5.0.3"
+ source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=v5.1.0"
slurm_cluster_name = local.slurm_cluster_name
partition_nodes = local.partition_nodes
diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf
index 0dd61d1a95..03230e5c1f 100644
--- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf
+++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf
@@ -15,7 +15,7 @@
*/
# Most variables have been sourced and modified from the SchedMD/slurm-gcp
-# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3
+# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.1.0
variable "deployment_name" {
description = "Name of the deployment."
@@ -359,3 +359,22 @@ variable "spot_instance_config" {
})
default = null
}
+
+variable "bandwidth_tier" {
+ description = < **_WARNING:_** This module has only been tested against the HPC centos7 OS
+> disk image (the default). Using other images may work, but have not been
+> verified.
+
[disk]: https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk
### Example
```yaml
-- source: community/modules/file-system/nfs-server
+- id: homefs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: homefs
use: [network1]
settings:
auto_delete_disk: true
diff --git a/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh b/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh
index 8ad49f4780..6c49163eb2 100644
--- a/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh
+++ b/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh
@@ -24,7 +24,7 @@ if [ ! "$(which mount.nfs)" ]; then
enable_repo="baseos"
else
echo "Unsupported version of centos/RHEL/Rocky"
- exit 1
+ return 1
fi
yum install --disablerepo="*" --enablerepo=${enable_repo} -y nfs-utils
elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then
@@ -32,6 +32,6 @@ if [ ! "$(which mount.nfs)" ]; then
apt-get -y install nfs-common
else
echo 'Unsuported distribution'
- exit 1
+ return 1
fi
fi
diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf
index 36b6144abc..129594c1f4 100644
--- a/community/modules/file-system/nfs-server/versions.tf
+++ b/community/modules/file-system/nfs-server/versions.tf
@@ -26,7 +26,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/community/modules/project/new-project/README.md b/community/modules/project/new-project/README.md
index 166f2c3eaa..6301b04d3f 100644
--- a/community/modules/project/new-project/README.md
+++ b/community/modules/project/new-project/README.md
@@ -9,9 +9,9 @@ This module is meant for use with Terraform 0.13.
### Example
```yaml
-- source: community/modules/project/new-project
+- id: project
+ source: community/modules/project/new-project
kind: terraform
- id: project
settings:
project_id: test_project
folder_id: 334688113020 # random number
diff --git a/community/modules/project/service-account/README.md b/community/modules/project/service-account/README.md
index e0831db438..cf9331f2d2 100644
--- a/community/modules/project/service-account/README.md
+++ b/community/modules/project/service-account/README.md
@@ -5,9 +5,9 @@ Allows creation of service accounts for a Google Cloud Platform project.
### Example
```yaml
-- source: community/modules/project/service-account
+- id: service_acct
+ source: community/modules/project/service-account
kind: terraform
- id: service_acct
settings:
- project_id: $(vars.project_id)
- names: [ "instance_acct" ]
diff --git a/community/modules/project/service-enablement/README.md b/community/modules/project/service-enablement/README.md
index 8184996eb8..f03091a28b 100644
--- a/community/modules/project/service-enablement/README.md
+++ b/community/modules/project/service-enablement/README.md
@@ -5,9 +5,9 @@ Allows management of multiple API services for a Google Cloud Platform project.
### Example
```yaml
-- source: community/modules/project/service-enablement
+- id: services-api
+ source: community/modules/project/service-enablement
kind: terraform
- id: services-api
settings:
gcp_service_list: [
"file.googleapis.com",
diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf
index 16d9699e8f..7f5fbc7e64 100644
--- a/community/modules/project/service-enablement/versions.tf
+++ b/community/modules/project/service-enablement/versions.tf
@@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/README.md b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/README.md
index eb195a0f20..ff3787ad52 100644
--- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/README.md
+++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/README.md
@@ -17,9 +17,9 @@ controller for optimal performance at different scales.
### Example
```yaml
-- source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+- id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf
index 36750c362e..5e4b748824 100644
--- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf
+++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf
@@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/README.md b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/README.md
index ed19144dec..ba18a7adf4 100644
--- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/README.md
+++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/README.md
@@ -14,9 +14,9 @@ node is used in conjunction with the
### Example
```yaml
-- source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+- id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf
index 1eacbe1e96..819c7fbecc 100644
--- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf
+++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf
@@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/community/modules/scheduler/cloud-batch-job/README.md b/community/modules/scheduler/cloud-batch-job/README.md
index 2f7151b5df..5b5328b16a 100644
--- a/community/modules/scheduler/cloud-batch-job/README.md
+++ b/community/modules/scheduler/cloud-batch-job/README.md
@@ -15,9 +15,9 @@ job unless one is provided. See the
## Example
```yaml
-- source: community/modules/scheduler/cloud-batch-job
+- id: batch-job
+ source: community/modules/scheduler/cloud-batch-job
kind: terraform
- id: batch-job
use: [network1]
settings:
runnable: "echo 'hello world'"
@@ -51,24 +51,24 @@ trying to set a property not natively supported in the `cloud-batch-job` module.
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: appfs
+ source: modules/file-system/filestore
kind: terraform
- id: appfs
use: [network1]
- - source: modules/scripts/startup-script
+ - id: batch-startup-script
+ source: modules/scripts/startup-script
kind: terraform
- id: batch-startup-script
settings:
runners: ...
- - source: github.com/terraform-google-modules/terraform-google-vm//modules/instance_template?ref=v7.8.0
+ - id: batch-compute-template
+ source: github.com/terraform-google-modules/terraform-google-vm//modules/instance_template?ref=v7.8.0
kind: terraform
- id: batch-compute-template
use: [batch-startup-script]
settings:
# Boiler plate to work with Cloud Foundation Toolkit
@@ -82,9 +82,9 @@ deployment_groups:
source_image_family: hpc-centos-7
source_image_project: cloud-hpc-image-public
- - source: ./community/modules/scheduler/cloud-batch-job
+ - id: batch-job
+ source: ./community/modules/scheduler/cloud-batch-job
kind: terraform
- id: batch-job
settings:
instance_template: $(batch-compute-template.self_link)
outputs: [instructions]
diff --git a/community/modules/scheduler/cloud-batch-job/outputs.tf b/community/modules/scheduler/cloud-batch-job/outputs.tf
index fcabec3d5d..4822c45bdb 100644
--- a/community/modules/scheduler/cloud-batch-job/outputs.tf
+++ b/community/modules/scheduler/cloud-batch-job/outputs.tf
@@ -29,7 +29,7 @@ output "instructions" {
gcloud ${var.gcloud_version} batch jobs delete ${local.job_id} --location=${var.region} --project=${var.project_id}
List all jobs in region:
- gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id}
+ gcloud ${var.gcloud_version} batch jobs list --project=${var.project_id}
EOT
}
diff --git a/community/modules/scheduler/cloud-batch-login-node/README.md b/community/modules/scheduler/cloud-batch-login-node/README.md
index 89cdb27349..a208288162 100644
--- a/community/modules/scheduler/cloud-batch-login-node/README.md
+++ b/community/modules/scheduler/cloud-batch-login-node/README.md
@@ -18,14 +18,14 @@ systems and test installed software before submitting a Google Cloud Batch job.
## Example
```yaml
-- source: community/modules/scheduler/cloud-batch-job
+- id: batch-job
+ source: community/modules/scheduler/cloud-batch-job
kind: terraform
- id: batch-job
...
-- source: community/modules/scheduler/cloud-batch-login-node
+- id: batch-login
+ source: community/modules/scheduler/cloud-batch-login-node
kind: terraform
- id: batch-login
use: [batch-job]
outputs: [instructions]
```
diff --git a/community/modules/scheduler/cloud-batch-login-node/outputs.tf b/community/modules/scheduler/cloud-batch-login-node/outputs.tf
index 2c5ec8b613..a7e34f0223 100644
--- a/community/modules/scheduler/cloud-batch-login-node/outputs.tf
+++ b/community/modules/scheduler/cloud-batch-login-node/outputs.tf
@@ -37,6 +37,6 @@ output "instructions" {
gcloud ${var.gcloud_version} batch jobs delete ${var.job_id} --location=${var.region} --project=${var.project_id}
List all jobs in region:
- gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id}
+ gcloud ${var.gcloud_version} batch jobs list --project=${var.project_id}
EOT
}
diff --git a/community/modules/scheduler/cloud-batch-login-node/versions.tf b/community/modules/scheduler/cloud-batch-login-node/versions.tf
index ee89adf9c7..47ce3c04ce 100644
--- a/community/modules/scheduler/cloud-batch-login-node/versions.tf
+++ b/community/modules/scheduler/cloud-batch-login-node/versions.tf
@@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/community/modules/scheduler/htcondor-configure/README.md b/community/modules/scheduler/htcondor-configure/README.md
index b7f7e98d5d..5e36ea81e7 100644
--- a/community/modules/scheduler/htcondor-configure/README.md
+++ b/community/modules/scheduler/htcondor-configure/README.md
@@ -24,13 +24,13 @@ install the HTCondor software and adds custom configurations using
[htcondor-configure] and [htcondor-execute-point].
```yaml
-- source: community/modules/scripts/htcondor-install
+- id: htcondor_install
+ source: community/modules/scripts/htcondor-install
kind: terraform
- id: htcondor_install
-- source: modules/scripts/startup-script
+- id: htcondor_configure_central_manager
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_central_manager
settings:
runners:
- type: shell
@@ -39,9 +39,9 @@ install the HTCondor software and adds custom configurations using
- $(htcondor_install.install_htcondor_runner)
- $(htcondor_configure.central_manager_runner)
-- source: modules/scripts/startup-script
+- id: htcondor_configure_access_point
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_access_point
settings:
runners:
- type: shell
diff --git a/community/modules/scheduler/htcondor-configure/versions.tf b/community/modules/scheduler/htcondor-configure/versions.tf
index 8f044df3e8..f9221d2919 100644
--- a/community/modules/scheduler/htcondor-configure/versions.tf
+++ b/community/modules/scheduler/htcondor-configure/versions.tf
@@ -26,7 +26,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.4.0"
}
required_version = ">= 0.13.0"
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md
index a678b1b973..56a4830e3d 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md
@@ -28,9 +28,9 @@ controller for optimal performance at different scales.
### Example
```yaml
-- source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
+- id: slurm_controller
+ source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -90,8 +90,8 @@ No providers.
| Name | Source | Version |
|------|--------|---------|
-| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | v5.0.3 |
-| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.0.3 |
+| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | v5.1.0 |
+| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.1.0 |
## Resources
@@ -133,7 +133,7 @@ No resources.
| [network\_self\_link](#input\_network\_self\_link) | Network to deploy to. Either network\_self\_link or subnetwork\_self\_link must be specified. | `string` | `null` | no |
| [network\_storage](#input\_network\_storage) | Storage to mounted on all instances.
server\_ip : Address of the storage server.
remote\_mount : The location in the remote instance filesystem to mount from.
local\_mount : The location on the instance filesystem to mount to.
fs\_type : Filesystem type (e.g. "nfs").
mount\_options : Options to mount with. | list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
}))
| `[]` | no |
| [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy. | `string` | `"MIGRATE"` | no |
-| [partition](#input\_partition) | Cluster partitions as a list. | list(object({
compute_list = list(string)
partition = object({
enable_job_exclusive = bool
enable_placement_groups = bool
network_storage = list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
}))
partition_conf = map(string)
partition_name = string
partition_nodes = map(object({
node_count_dynamic_max = number
node_count_static = number
enable_spot_vm = bool
group_name = string
instance_template = string
node_conf = map(string)
spot_instance_config = object({
termination_action = string
})
}))
subnetwork = string
zone_policy_allow = list(string)
zone_policy_deny = list(string)
})
}))
| `[]` | no |
+| [partition](#input\_partition) | Cluster partitions as a list. | list(object({
compute_list = list(string)
partition = object({
enable_job_exclusive = bool
enable_placement_groups = bool
network_storage = list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
}))
partition_conf = map(string)
partition_name = string
partition_nodes = map(object({
bandwidth_tier = string
node_count_dynamic_max = number
node_count_static = number
enable_spot_vm = bool
group_name = string
instance_template = string
node_conf = map(string)
spot_instance_config = object({
termination_action = string
})
}))
subnetwork = string
zone_policy_allow = list(string)
zone_policy_deny = list(string)
})
}))
| `[]` | no |
| [preemptible](#input\_preemptible) | Allow the instance to be preempted. | `bool` | `false` | no |
| [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes |
| [prolog\_scripts](#input\_prolog\_scripts) | List of scripts to be used for Prolog. Programs for the slurmd to execute
whenever it is asked to run a job step from a new job allocation.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Prolog. | list(object({
filename = string
content = string
}))
| `[]` | no |
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf
index b6a38ed12f..52b14457ed 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf
@@ -25,7 +25,7 @@ locals {
}]
# Since deployment name may be used to create a cluster name, we remove any invalid character from the beginning
# Also, slurm imposed a lot of restrictions to this name, so we format it to an acceptable string
- tmp_cluster_name = substr(replace(lower(var.deployment_name), "/^[^a-z]*|[^a-z0-9]/", ""), 0, 8)
+ tmp_cluster_name = substr(replace(lower(var.deployment_name), "/^[^a-z]*|[^a-z0-9]/", ""), 0, 10)
slurm_cluster_name = var.slurm_cluster_name != null ? var.slurm_cluster_name : local.tmp_cluster_name
enable_public_ip_access_config = var.disable_controller_public_ips ? [] : [{ nat_ip = null, network_tier = null }]
@@ -34,7 +34,7 @@ locals {
}
module "slurm_controller_instance" {
- source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=v5.0.3"
+ source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=v5.1.0"
access_config = local.access_config
slurm_cluster_name = local.slurm_cluster_name
@@ -66,7 +66,7 @@ module "slurm_controller_instance" {
}
module "slurm_controller_template" {
- source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.0.3"
+ source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.1.0"
additional_disks = var.additional_disks
can_ip_forward = var.can_ip_forward
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf
index cbde26cffd..98e806c46b 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf
@@ -15,7 +15,7 @@
*/
# Most variables have been sourced and modified from the SchedMD/slurm-gcp
-# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3
+# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.1.0
variable "access_config" {
description = "Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet."
@@ -318,6 +318,7 @@ variable "partition" {
partition_conf = map(string)
partition_name = string
partition_nodes = map(object({
+ bandwidth_tier = string
node_count_dynamic_max = number
node_count_static = number
enable_spot_vm = bool
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md
index 422710eba4..e5d75fb538 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md
@@ -12,9 +12,9 @@ terraform modules. The login node is used in conjunction with the
### Example
```yaml
-- source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
+- id: slurm_login
+ source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
kind: terraform
- id: slurm_login
use:
- network1
- slurm_controller
@@ -72,8 +72,8 @@ No providers.
| Name | Source | Version |
|------|--------|---------|
-| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | v5.0.3 |
-| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.0.3 |
+| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | v5.1.0 |
+| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.1.0 |
## Resources
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf
index 380af82af7..7e69227157 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf
@@ -21,7 +21,7 @@ locals {
}]
# Since deployment name may be used to create a cluster name, we remove any invalid character from the beginning
# Also, slurm imposed a lot of restrictions to this name, so we format it to an acceptable string
- tmp_cluster_name = substr(replace(lower(var.deployment_name), "/^[^a-z]*|[^a-z0-9]/", ""), 0, 8)
+ tmp_cluster_name = substr(replace(lower(var.deployment_name), "/^[^a-z]*|[^a-z0-9]/", ""), 0, 10)
slurm_cluster_name = var.slurm_cluster_name != null ? var.slurm_cluster_name : local.tmp_cluster_name
enable_public_ip_access_config = var.disable_login_public_ips ? [] : [{ nat_ip = null, network_tier = null }]
@@ -29,9 +29,10 @@ locals {
}
module "slurm_login_template" {
- source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.0.3"
+ source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.1.0"
additional_disks = var.additional_disks
+ bandwidth_tier = "platform_default"
can_ip_forward = var.can_ip_forward
slurm_cluster_name = local.slurm_cluster_name
disable_smt = var.disable_smt
@@ -65,7 +66,7 @@ module "slurm_login_template" {
}
module "slurm_login_instance" {
- source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=v5.0.3"
+ source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=v5.1.0"
access_config = local.access_config
slurm_cluster_name = local.slurm_cluster_name
diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf
index 0e465449c8..0f565de63b 100644
--- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf
+++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf
@@ -15,7 +15,7 @@
*/
# Most variables have been sourced and modified from the SchedMD/slurm-gcp
-# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3
+# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.1.0
variable "project_id" {
type = string
diff --git a/community/modules/scripts/htcondor-install/README.md b/community/modules/scripts/htcondor-install/README.md
index 69c1fe6965..80c085d6ff 100644
--- a/community/modules/scripts/htcondor-install/README.md
+++ b/community/modules/scripts/htcondor-install/README.md
@@ -21,13 +21,13 @@ install the HTCondor software and adds custom configurations using
[htcondor-configure] and [htcondor-execute-point].
```yaml
-- source: community/modules/scripts/htcondor-install
+- id: htcondor_install
+ source: community/modules/scripts/htcondor-install
kind: terraform
- id: htcondor_install
-- source: modules/scripts/startup-script
+- id: htcondor_configure_central_manager
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_central_manager
settings:
runners:
- type: shell
@@ -36,9 +36,9 @@ install the HTCondor software and adds custom configurations using
- $(htcondor_install.install_htcondor_runner)
- $(htcondor_configure.central_manager_runner)
-- source: modules/scripts/startup-script
+- id: htcondor_configure_access_point
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_access_point
settings:
runners:
- type: shell
diff --git a/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml b/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml
index c3109bbb94..0351dfd75f 100644
--- a/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml
+++ b/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml
@@ -24,13 +24,13 @@
- name: Create virtual environment for HTCondor autoscaler
ansible.builtin.pip:
name: pip
- version: 21.3.1 # last Python 2.7-compatible release
+ version: 21.3.1 # last Python 2.7-compatible release
virtualenv: /usr/local/htcondor
virtualenv_command: /usr/bin/python3 -m venv
- name: Install latest setuptools
ansible.builtin.pip:
name: setuptools
- state: 44.1.1 # last Python 2.7-compatible release
+ state: 44.1.1 # last Python 2.7-compatible release
virtualenv: /usr/local/htcondor
virtualenv_command: /usr/bin/python3 -m venv
- name: Install HTCondor autoscaler dependencies
@@ -41,6 +41,6 @@
- htcondor
ansible.builtin.pip:
name: "{{ item }}"
- state: present # rely on pip resolver to pick latest compatible releases
+ state: present # rely on pip resolver to pick latest compatible releases
virtualenv: /usr/local/htcondor
virtualenv_command: /usr/bin/python3 -m venv
diff --git a/community/modules/scripts/omnia-install/README.md b/community/modules/scripts/omnia-install/README.md
index 7556b4f59b..e65514df21 100644
--- a/community/modules/scripts/omnia-install/README.md
+++ b/community/modules/scripts/omnia-install/README.md
@@ -1,14 +1,16 @@
## Description
-This module will install [DellHPC Omnia](https://github.com/dellhpc/omnia)
-onto a cluster supporting a slurm controller and compute nodes. To see a full
-example using omnia-install, see the
-[omnia-cluster example](../../../community/examples/omnia-cluster.yaml).
+This module will create a set of startup-script runners that will install and
+run [DellHPC Omnia](https://github.com/dellhpc/omnia) version 1.3 onto a set of
+VMs representing a slurm controller and compute nodes. For a full example using
+omnia-install, see the [omnia-cluster example].
**Warning**: This module will create a user named "omnia" by default which has
sudo permissions. You may want to remove this user and/or it's permissions from
each node.
+[omnia-cluster example]: ../../../community/examples/omnia-cluster.yaml
+
## License
@@ -53,6 +55,7 @@ No resources.
| [manager\_ips](#input\_manager\_ips) | IPs of the Omnia manager nodes | `list(string)` | n/a | yes |
| [omnia\_username](#input\_omnia\_username) | Name of the user that installs omnia | `string` | `"omnia"` | no |
| [slurm\_uid](#input\_slurm\_uid) | User ID of the slurm user | `number` | `981` | no |
+| [virtualenv](#input\_virtualenv) | Path to a virtual environment on the Omnia manager and compute VMs that
should be used for installing packages with pip. Defaults to the virtual
environment created by the startup-scripts module, /usr/local/ghpc-venv.
If the virtual environment cannot be found, the system environment will be
used instead. | `string` | `"/usr/local/ghpc-venv"` | no |
## Outputs
diff --git a/community/modules/scripts/omnia-install/main.tf b/community/modules/scripts/omnia-install/main.tf
index ee8edb2a18..f9de94b5a3 100644
--- a/community/modules/scripts/omnia-install/main.tf
+++ b/community/modules/scripts/omnia-install/main.tf
@@ -27,18 +27,20 @@ locals {
setup_omnia_node_file = templatefile(
"${path.module}/templates/setup_omnia_node.tpl",
{
- username = var.omnia_username
- install_dir = local.install_dir
+ username = var.omnia_username
+ install_dir = local.install_dir
+ virtualenv_path = var.virtualenv
}
)
install_file = templatefile(
"${path.module}/templates/install_omnia.tpl",
{
- username = var.omnia_username
- install_dir = local.install_dir
- omnia_compute = var.compute_ips
- nodecount = local.nodecount
- slurm_uid = var.slurm_uid
+ username = var.omnia_username
+ install_dir = local.install_dir
+ omnia_compute = var.compute_ips
+ nodecount = local.nodecount
+ slurm_uid = var.slurm_uid
+ virtualenv_path = var.virtualenv
}
)
inventory_path = "${local.install_dir}/inventory"
diff --git a/community/modules/scripts/omnia-install/templates/install_omnia.tpl b/community/modules/scripts/omnia-install/templates/install_omnia.tpl
index c812aac070..5989e8f9b1 100644
--- a/community/modules/scripts/omnia-install/templates/install_omnia.tpl
+++ b/community/modules/scripts/omnia-install/templates/install_omnia.tpl
@@ -19,34 +19,33 @@
vars:
username: ${username}
pub_key_path: "/home/{{ username }}/.ssh"
- pub_key_file: "{{pub_key_path}}/id_rsa"
- auth_key_file: "{{pub_key_path}}/authorized_keys"
+ pub_key_file: "{{ pub_key_path }}/id_rsa"
+ auth_key_file: "{{ pub_key_path }}/authorized_keys"
tasks:
- - name: "Create {{pub_key_path}} folder"
- file:
- path: "{{pub_key_path}}"
+ - name: "Create {{ pub_key_path }} folder"
+ ansible.builtin.file:
+ path: "{{ pub_key_path }}"
state: directory
mode: 0700
- owner: "{{username}}"
+ owner: "{{ username }}"
- name: Create keys
- openssh_keypair:
- path: "{{pub_key_file}}"
- owner: "{{username}}"
+ ansible.builtin.openssh_keypair:
+ path: "{{ pub_key_file }}"
+ owner: "{{ username }}"
- name: Copy public key to authorized keys
- copy:
- src: "{{pub_key_file}}.pub"
- dest: "{{auth_key_file}}"
- owner: "{{username}}"
+ ansible.builtin.copy:
+ src: "{{ pub_key_file }}.pub"
+ dest: "{{ auth_key_file }}"
+ owner: "{{ username }}"
mode: 0644
- name: Install necessary dependencies
hosts: localhost
tasks:
- - name: Install git and epel-release
- package:
+ - name: Install git
+ ansible.builtin.package:
name:
- git
- - epel-release
state: latest
- name: Prepare the system for Omnia installation
@@ -56,40 +55,29 @@
omnia_dir: "{{ install_dir }}/omnia"
slurm_uid: ${slurm_uid}
tasks:
- - name: Unmask and restart firewalld
- become: true
- command: systemctl unmask firewalld && systemctl restart firewalld
- name: Git checkout
- git:
+ ansible.builtin.git:
repo: 'https://github.com/dellhpc/omnia.git'
dest: "{{ omnia_dir }}"
- version: release-1.0
+ version: v1.3
update: false
- name: Copy inventory file with owner and permissions
- copy:
+ ansible.builtin.copy:
src: "{{ install_dir }}/inventory"
dest: "{{ omnia_dir }}/inventory"
mode: 0644
- - name: Update omnia.yml setting become to yes
- replace:
- path: "{{ omnia_dir }}/omnia.yml"
- regexp: '- name(.*)'
- replace: '- name\1\n become: yes'
- - name: Patch Slurm source URL
- replace:
- path: "{{ omnia_dir }}/roles/slurm_manager/vars/main.yml"
- regexp: '(.*)slurm-20.11.7.tar.bz2(.*)'
- replace: '\1slurm-20.11.9.tar.bz2\2'
- - name: Patch Slurm source checksum
- replace:
- path: "{{ omnia_dir }}/roles/slurm_manager/vars/main.yml"
- regexp: '^slurm_md5: .*'
- replace: 'slurm_md5: "md5:79b39943768ef21b83585e2f5087d9af"'
- - name: Add slurm user ID to the omnia vars
- replace:
+ - name: Force update the ansible.utils collection
+ command: ansible-galaxy collection install ansible.utils --force
+ - name: Update omnia config to not use a login node
+ ansible.builtin.lineinfile:
+ path: "{{ omnia_dir }}/omnia_config.yml"
+ regexp: '^login_node_required: .*'
+ line: 'login_node_required: false'
+ - name: Update omnia config to set the slurm UID
+ ansible.builtin.lineinfile:
path: "{{ omnia_dir }}/roles/slurm_common/vars/main.yml"
regexp: '^slurm_uid: ".*"'
- replace: 'slurm_uid: "{{ slurm_uid }}"'
+ line: 'slurm_uid: "{{ slurm_uid }}"'
- name: Run the Omnia installation once all nodes are ready
hosts: localhost
@@ -97,28 +85,46 @@
nodecount: ${nodecount}
install_dir: ${install_dir}
username: ${username}
+ venv: ${virtualenv_path}
omnia_dir: "{{ install_dir }}/omnia"
state_dir: "{{ install_dir }}/state"
become_user: "{{ username }}"
remote_user: "{{ username }}"
tasks:
- name: Wait for nodes to setup
- shell: |
+ ansible.builtin.shell: |
files=$(ls {{ state_dir }} | wc -l)
if [ $files -eq ${nodecount} ]; then exit 0; fi
echo "Waiting for ${nodecount} nodes to be ready, found $${files} nodes ready"
exit 1
delay: 2
retries: 300
+ - name: Checking if the provided virtualenv exists
+ stat:
+ path: "{{ venv }}"
+ register: venv_dir
+ - name: Run omnia using provided virtualenv for the python provider
+ ansible.builtin.shell: |
+ ansible-playbook omnia.yml \
+ --private-key /home/{{ username }}/.ssh/id_rsa \
+ --inventory inventory \
+ --user "{{ username }}" --become \
+ --e "ansible_python_interpreter={{ venv }}/bin/python3" \
+ --skip-tags "kubernetes,nfs_client"
+ args:
+ chdir: "{{ omnia_dir }}"
+ environment:
+ ANSIBLE_HOST_KEY_CHECKING: False
+ when: venv_dir.stat.exists
- name: Run omnia
- shell: |
+ ansible.builtin.shell: |
ansible-playbook omnia.yml \
--private-key /home/{{ username }}/.ssh/id_rsa \
--inventory inventory \
- --user "{{ username }}" \
- --e "ansible_python_interpreter=/usr/bin/python2" \
- --skip-tags "kubernetes"
+ --user "{{ username }}" --become \
+ --skip-tags "kubernetes,nfs_client"
args:
chdir: "{{ omnia_dir }}"
environment:
ANSIBLE_HOST_KEY_CHECKING: False
+ when: not venv_dir.stat.exists
diff --git a/community/modules/scripts/omnia-install/templates/inventory.tpl b/community/modules/scripts/omnia-install/templates/inventory.tpl
index f2f7f92b9e..2e8a275a16 100644
--- a/community/modules/scripts/omnia-install/templates/inventory.tpl
+++ b/community/modules/scripts/omnia-install/templates/inventory.tpl
@@ -6,3 +6,7 @@ ${vm}
%{for vm in omnia_compute ~}
${vm}
%{endfor}
+
+[nfs_node]
+
+[login_node]
diff --git a/community/modules/scripts/omnia-install/templates/setup_omnia_node.tpl b/community/modules/scripts/omnia-install/templates/setup_omnia_node.tpl
index bd0fbc5b9d..071390f9e8 100644
--- a/community/modules/scripts/omnia-install/templates/setup_omnia_node.tpl
+++ b/community/modules/scripts/omnia-install/templates/setup_omnia_node.tpl
@@ -13,21 +13,42 @@
# limitations under the License.
---
-- name: Create Omnia User
+- name: Create user for installing Omnia
hosts: localhost
vars:
username: ${username}
tasks:
- - name: Create user omnia
- user:
+ - name: Create a new user
+ ansible.builtin.user:
name: "{{ username }}"
- - name: Allow '{{ username }}' user to have passwordless sudo
- lineinfile:
+ - name: Allow '{{ username }}' user to have passwordless sudo access
+ ansible.builtin.lineinfile:
dest: /etc/sudoers
state: present
regexp: '^%%{{ username }}'
line: '%%{{ username }} ALL=(ALL) NOPASSWD: ALL'
+- name: Setup selinux
+ hosts: localhost
+ vars:
+ venv: ${virtualenv_path}
+ tasks:
+ - name: Checking if the provided virtualenv exists
+ stat:
+ path: "{{ venv }}"
+ register: venv_dir
+ - name: Install selinux using system pip
+ ansible.builtin.pip:
+ name: selinux
+ when: not venv_dir.stat.exists
+ - name: Install selinux into provided virtualenv
+ ansible.builtin.pip:
+ name: selinux
+ virtualenv: /usr/local/ghpc-venv
+ when: venv_dir.stat.exists
+ - name: Allow SSH on NFS-based home directory
+ ansible.builtin.command: setsebool -P use_nfs_home_dirs 1
+
- name: Set Status file
hosts: localhost
vars:
@@ -35,13 +56,13 @@
state_dir: "{{ install_dir }}/state"
tasks:
- name: Get hostname
- command: hostname
+ ansible.builtin.command: hostname
register: machine_hostname
- name: Create state dir if not already created
- file:
+ ansible.builtin.file:
path: "{{ state_dir }}"
state: directory
- name: Create file
- file:
+ ansible.builtin.file:
path: "{{ state_dir }}/{{ machine_hostname.stdout }}"
state: touch
diff --git a/community/modules/scripts/omnia-install/variables.tf b/community/modules/scripts/omnia-install/variables.tf
index 65172c02cc..93011303e6 100644
--- a/community/modules/scripts/omnia-install/variables.tf
+++ b/community/modules/scripts/omnia-install/variables.tf
@@ -44,3 +44,15 @@ variable "slurm_uid" {
default = 981
type = number
}
+
+variable "virtualenv" {
+ description = <<-EOT
+ Path to a virtual environment on the Omnia manager and compute VMs that
+ should be used for installing packages with pip. Defaults to the virtual
+ environment created by the startup-scripts module, /usr/local/ghpc-venv.
+ If the virtual environment cannot be found, the system environment will be
+ used instead.
+ EOT
+ default = "/usr/local/ghpc-venv"
+ type = string
+}
diff --git a/community/modules/scripts/spack-install/README.md b/community/modules/scripts/spack-install/README.md
index 8069ab4330..8c6c1c539b 100644
--- a/community/modules/scripts/spack-install/README.md
+++ b/community/modules/scripts/spack-install/README.md
@@ -30,9 +30,9 @@ As an example, the below is a possible definition of a spack installation. To
see this module used in a full blueprint, see the [spack-gromacs.yaml] example.
```yaml
- - source: community/modules/scripts/spack-install
+ - id: spack
+ source: community/modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /sw/spack
spack_url: https://github.com/spack/spack
@@ -95,23 +95,23 @@ Following the above description of this module, it can be added to a Slurm
deployment via the following:
```yaml
-- source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
- kind: terraform
- id: slurm_controller
- use: [spack]
- settings:
- subnetwork_name: ((module.network1.primary_subnetwork.name))
- login_node_count: 1
- partitions:
- - $(compute_partition.partition)
+- id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ kind: terraform
+ use: [spack]
+ settings:
+ subnetwork_name: ((module.network1.primary_subnetwork.name))
+ login_node_count: 1
+ partitions:
+ - $(compute_partition.partition)
```
Alternatively, it can be added as a startup script via:
```yaml
- - source: modules/scripts/startup-script
+ - id: startup
+ source: modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- $(spack.install_spack_deps_runner)
diff --git a/community/modules/scripts/spack-install/templates/install_spack.tpl b/community/modules/scripts/spack-install/templates/install_spack.tpl
index d48ccc89b1..620ca3e7ff 100755
--- a/community/modules/scripts/spack-install/templates/install_spack.tpl
+++ b/community/modules/scripts/spack-install/templates/install_spack.tpl
@@ -9,6 +9,11 @@ if [[ $EUID -ne 0 ]]; then
exit 1
fi
+# Activate ghpc-venv virtual environment if it exists
+if [ -d /usr/local/ghpc-venv ]; then
+ source /usr/local/ghpc-venv/bin/activate
+fi
+
# Only install and configure spack if ${INSTALL_DIR} doesn't exist
if [ ! -d ${INSTALL_DIR} ]; then
@@ -151,4 +156,3 @@ echo "source ${INSTALL_DIR}/share/spack/setup-env.sh" >> /etc/profile.d/spack.sh
chmod a+rx /etc/profile.d/spack.sh
echo "$PREFIX Setup complete..."
-exit 0
diff --git a/community/modules/scripts/wait-for-startup/README.md b/community/modules/scripts/wait-for-startup/README.md
index 5973290416..705699f8b7 100644
--- a/community/modules/scripts/wait-for-startup/README.md
+++ b/community/modules/scripts/wait-for-startup/README.md
@@ -15,9 +15,9 @@ up a node.
### Example
```yaml
-- source: community/modules/scripts/wait-for-startup
+- id: wait
+ source: community/modules/scripts/wait-for-startup
kind: terraform
- id: wait
settings:
instance_name: ((module.workstation.name[0]))
```
diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf
index 8364495d6a..372387666c 100644
--- a/community/modules/scripts/wait-for-startup/versions.tf
+++ b/community/modules/scripts/wait-for-startup/versions.tf
@@ -26,7 +26,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/docs/tutorials/README.md b/docs/tutorials/README.md
index 66c5e75d06..27b43c78a0 100644
--- a/docs/tutorials/README.md
+++ b/docs/tutorials/README.md
@@ -53,15 +53,15 @@ minutes.
### Gromacs
-[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=develop&cloudshell_open_in_editor=docs%2Ftutorials%2Fgromacs%2Fspack-gromacs.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fgromacs%2Fspack-gromacs.md)
+[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=main&cloudshell_open_in_editor=docs%2Ftutorials%2Fgromacs%2Fspack-gromacs.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fgromacs%2Fspack-gromacs.md)
### Openfoam
-[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=develop&cloudshell_open_in_editor=docs%2Ftutorials%2Fopenfoam%2Fspack-openfoam.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fopenfoam%2Fspack-openfoam.md)
+[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=main&cloudshell_open_in_editor=docs%2Ftutorials%2Fopenfoam%2Fspack-openfoam.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fopenfoam%2Fspack-openfoam.md)
### Weather Research and Forecasting (WRF) Model
-[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=develop&cloudshell_open_in_editor=docs%2Ftutorials%2Fwrfv3%2Fspack-wrfv3.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fwrfv3%2Fspack-wrfv3.md)
+[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://shell.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2Fhpc-toolkit&cloudshell_git_branch=main&cloudshell_open_in_editor=docs%2Ftutorials%2Fwrfv3%2Fspack-wrfv3.yaml&cloudshell_tutorial=docs%2Ftutorials%2Fwrfv3%2Fspack-wrfv3.md)
### Blueprint Diagram for Application Tutorials
diff --git a/docs/tutorials/gromacs/spack-gromacs.md b/docs/tutorials/gromacs/spack-gromacs.md
index 16caa06811..09d7b4ab53 100644
--- a/docs/tutorials/gromacs/spack-gromacs.md
+++ b/docs/tutorials/gromacs/spack-gromacs.md
@@ -46,9 +46,9 @@ PROJECT_NUMBER=$(gcloud projects list --filter= --forma
echo "granting roles/editor to $PROJECT_NUMBER-compute@developer.gserviceaccount.com"
-gcloud iam service-accounts enable --project "$PROJECT_NUMBER"-compute@developer.gserviceaccount.com
+gcloud iam service-accounts enable --project $PROJECT_NUMBER-compute@developer.gserviceaccount.com
-gcloud projects add-iam-policy-binding --member=serviceAccount:"$PROJECT_NUMBER"-compute@developer.gserviceaccount.com --role=roles/editor
+gcloud projects add-iam-policy-binding --member=serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com --role=roles/editor
```
## Build the Toolkit Binary
diff --git a/docs/tutorials/gromacs/spack-gromacs.yaml b/docs/tutorials/gromacs/spack-gromacs.yaml
index fd23f9fac6..e7d350a386 100644
--- a/docs/tutorials/gromacs/spack-gromacs.yaml
+++ b/docs/tutorials/gromacs/spack-gromacs.yaml
@@ -26,18 +26,18 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/monitoring/dashboard
+ - id: hpc_dash
+ source: modules/monitoring/dashboard
kind: terraform
- id: hpc_dash
## Install Scripts
- - source: community/modules/scripts/spack-install
+ - id: spack
+ source: community/modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
@@ -97,9 +97,9 @@ deployment_groups:
- mirror_name: gcs_cache
mirror_url: $(vars.spack_cache_mirror_url)
- - source: modules/scripts/startup-script
+ - id: controller-setup
+ source: modules/scripts/startup-script
kind: terraform
- id: controller-setup
settings:
runners:
- type: shell
@@ -142,18 +142,18 @@ deployment_groups:
gmx_mpi grompp -f pme.mdp -c conf.gro -p topol.top -o input.tpr
mpirun -n 60 -hostfile hostfile -ppn 30 gmx_mpi mdrun -notunepme -dlb yes -v -resethway -noconfout -nsteps 4000 -s input.tpr
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
settings:
partition_name: compute
max_node_count: 20
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- compute_partition
@@ -161,9 +161,9 @@ deployment_groups:
controller_startup_script: $(controller-setup.startup_script)
login_node_count: 1
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- slurm_controller
diff --git a/docs/tutorials/intel-select/hpc-cluster-intel-select.yaml b/docs/tutorials/intel-select/hpc-cluster-intel-select.yaml
index d90b938090..c1ea22ae51 100644
--- a/docs/tutorials/intel-select/hpc-cluster-intel-select.yaml
+++ b/docs/tutorials/intel-select/hpc-cluster-intel-select.yaml
@@ -25,20 +25,20 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: modules/scripts/startup-script
+ - id: startup-controller
+ source: modules/scripts/startup-script
kind: terraform
- id: startup-controller
settings:
runners:
- type: shell
@@ -48,9 +48,9 @@ deployment_groups:
google_install_mpi --prefix /apps --intel_compliance
destination: /var/tmp/install_intel_controller.sh
- - source: modules/scripts/startup-script
+ - id: startup-compute
+ source: modules/scripts/startup-script
kind: terraform
- id: startup-compute
settings:
runners:
- type: shell
@@ -61,9 +61,9 @@ deployment_groups:
destination: /var/tmp/install_intel_compute.sh
# This debug_partition will work out of the box without requesting additional GCP quota.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: debug_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: debug_partition
use:
- network1
- homefs
@@ -75,9 +75,9 @@ deployment_groups:
machine_type: n2-standard-2
# This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -85,9 +85,9 @@ deployment_groups:
partition_name: compute
max_node_count: 20
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -99,9 +99,9 @@ deployment_groups:
compute_startup_script: $(startup-compute.startup_script)
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/docs/tutorials/intel-select/intel-select.md b/docs/tutorials/intel-select/intel-select.md
index be04c71621..32da17f8b1 100644
--- a/docs/tutorials/intel-select/intel-select.md
+++ b/docs/tutorials/intel-select/intel-select.md
@@ -37,9 +37,9 @@ PROJECT_NUMBER=$(gcloud projects list --filter= --forma
echo "granting roles/editor to $PROJECT_NUMBER-compute@developer.gserviceaccount.com"
-gcloud iam service-accounts enable --project "$PROJECT_NUMBER"-compute@developer.gserviceaccount.com
+gcloud iam service-accounts enable --project $PROJECT_NUMBER-compute@developer.gserviceaccount.com
-gcloud projects add-iam-policy-binding --member=serviceAccount:"$PROJECT_NUMBER"-compute@developer.gserviceaccount.com --role=roles/editor
+gcloud projects add-iam-policy-binding --member=serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com --role=roles/editor
```
## Build the Toolkit Binary
diff --git a/docs/tutorials/openfoam/spack-openfoam.md b/docs/tutorials/openfoam/spack-openfoam.md
index fd573bce49..f31f510cc0 100644
--- a/docs/tutorials/openfoam/spack-openfoam.md
+++ b/docs/tutorials/openfoam/spack-openfoam.md
@@ -46,9 +46,9 @@ PROJECT_NUMBER=$(gcloud projects list --filter= --forma
echo "granting roles/editor to $PROJECT_NUMBER-compute@developer.gserviceaccount.com"
-gcloud iam service-accounts enable --project "$PROJECT_NUMBER"-compute@developer.gserviceaccount.com
+gcloud iam service-accounts enable --project $PROJECT_NUMBER-compute@developer.gserviceaccount.com
-gcloud projects add-iam-policy-binding --member=serviceAccount:"$PROJECT_NUMBER"-compute@developer.gserviceaccount.com --role=roles/editor
+gcloud projects add-iam-policy-binding --member=serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com --role=roles/editor
```
## Build the Toolkit Binary
diff --git a/docs/tutorials/openfoam/spack-openfoam.yaml b/docs/tutorials/openfoam/spack-openfoam.yaml
index ceb43699d0..5f2fae7f6f 100644
--- a/docs/tutorials/openfoam/spack-openfoam.yaml
+++ b/docs/tutorials/openfoam/spack-openfoam.yaml
@@ -26,18 +26,18 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/monitoring/dashboard
+ - id: hpc_dash
+ source: modules/monitoring/dashboard
kind: terraform
- id: hpc_dash
## Install Scripts
- - source: community/modules/scripts/spack-install
+ - id: spack
+ source: community/modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
@@ -104,9 +104,9 @@ deployment_groups:
- mirror_name: gcs_cache
mirror_url: $(vars.spack_cache_mirror_url)
- - source: modules/scripts/startup-script
+ - id: controller-setup
+ source: modules/scripts/startup-script
kind: terraform
- id: controller-setup
settings:
runners:
- type: shell
@@ -153,18 +153,18 @@ deployment_groups:
mpirun -n 60 -npernode 30 -hostfile hostfile snappyHexMesh -overwrite -parallel
mpirun -n 60 -npernode 30 -hostfile hostfile potentialFoam -parallel
mpirun -n 60 -npernode 30 -hostfile hostfile simpleFoam -parallel
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
settings:
partition_name: compute
max_node_count: 20
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- compute_partition
@@ -172,9 +172,9 @@ deployment_groups:
controller_startup_script: $(controller-setup.startup_script)
login_node_count: 1
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- slurm_controller
diff --git a/docs/tutorials/wrfv3/spack-wrfv3.md b/docs/tutorials/wrfv3/spack-wrfv3.md
index 886b7c8da7..039739250d 100644
--- a/docs/tutorials/wrfv3/spack-wrfv3.md
+++ b/docs/tutorials/wrfv3/spack-wrfv3.md
@@ -46,9 +46,9 @@ PROJECT_NUMBER=$(gcloud projects list --filter= --forma
echo "granting roles/editor to $PROJECT_NUMBER-compute@developer.gserviceaccount.com"
-gcloud iam service-accounts enable --project "$PROJECT_NUMBER"-compute@developer.gserviceaccount.com
+gcloud iam service-accounts enable --project $PROJECT_NUMBER-compute@developer.gserviceaccount.com
-gcloud projects add-iam-policy-binding --member=serviceAccount:"$PROJECT_NUMBER"-compute@developer.gserviceaccount.com --role=roles/editor
+gcloud projects add-iam-policy-binding --member=serviceAccount:$PROJECT_NUMBER-compute@developer.gserviceaccount.com --role=roles/editor
```
## Build the Toolkit Binary
diff --git a/docs/tutorials/wrfv3/spack-wrfv3.yaml b/docs/tutorials/wrfv3/spack-wrfv3.yaml
index 6ff08c933e..e5503b0132 100644
--- a/docs/tutorials/wrfv3/spack-wrfv3.yaml
+++ b/docs/tutorials/wrfv3/spack-wrfv3.yaml
@@ -26,18 +26,18 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/monitoring/dashboard
+ - id: hpc_dash
+ source: modules/monitoring/dashboard
kind: terraform
- id: hpc_dash
## Install Scripts
- - source: community/modules/scripts/spack-install
+ - id: spack
+ source: community/modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
@@ -97,9 +97,9 @@ deployment_groups:
- mirror_name: gcs_cache
mirror_url: $(vars.spack_cache_mirror_url)
- - source: modules/scripts/startup-script
+ - id: controller-setup
+ source: modules/scripts/startup-script
kind: terraform
- id: controller-setup
settings:
runners:
- type: shell
@@ -140,18 +140,18 @@ deployment_groups:
mpirun -n 60 -hostfile hostfile -ppn ${SLURM_NTASKS_PER_NODE} wrf.exe
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
settings:
partition_name: compute
max_node_count: 20
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- compute_partition
@@ -159,9 +159,9 @@ deployment_groups:
controller_startup_script: $(controller-setup.startup_script)
login_node_count: 1
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- slurm_controller
diff --git a/examples/README.md b/examples/README.md
index d50038d336..3814532779 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -484,9 +484,9 @@ spack load gromacs
### [omnia-cluster.yaml] ![community-badge] ![experimental-badge]
Creates a simple [Dell Omnia][omnia-github] provisioned cluster with an
-omnia-manager node and 2 omnia-compute nodes on the pre-existing default
-network. Omnia will be automatically installed after the nodes are provisioned.
-All nodes mount a filestore instance on `/home`.
+omnia-manager node that acts as the slurm manager and 2 omnia-compute nodes on
+the pre-existing default network. Omnia will be automatically installed after
+the nodes are provisioned. All nodes mount a filestore instance on `/home`.
> **_NOTE:_** The omnia-cluster.yaml example uses `vm-instance` modules to
> create the cluster. For these instances, Simultaneous Multithreading (SMT) is
@@ -526,7 +526,7 @@ A user defined blueprint should follow the following schema:
```yaml
# Required: Name your blueprint.
-blueprint_name: MyBlueprintName
+blueprint_name: my-blueprint-name
# Top-level variables, these will be pulled from if a required variable is not
# provided as part of a module. Any variables can be set here by the user,
@@ -551,9 +551,9 @@ deployment_groups:
modules:
# Local source, prefixed with ./ (/ and ../ also accepted)
- - source: ./modules/role/module-name # Required: Points to the module directory.
+ - id: # Required: Name of this module used to uniquely identify it.
+ source: ./modules/role/module-name # Required: Points to the module directory.
kind: < terraform | packer > # Required: Type of module, currently choose from terraform or packer.
- id: # Required: Name of this module used to uniquely identify it.
# Optional: All configured settings for the module. For terraform, each
# variable listed in variables.tf can be set here, and are mandatory if no
# default was provided and are not defined elsewhere (like the top-level vars)
@@ -586,6 +586,10 @@ below.
* **blueprint_name** (required): This name can be used to track resources and
usage across multiple deployments that come from the same blueprint.
+ `blueprint_name` is used as a value for the `ghpc_blueprint` label key, and
+ must abide to label value naming constraints: `blueprint_name` must be at most
+ 63 characters long, and can only contain lowercase letters, numeric
+ characters, underscores and dashes.
### Deployment Variables
@@ -711,10 +715,11 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: path/to/module/1
- id: resource1
+ - id: resource1
+ source: path/to/module/1
...
- - source: path/to/module/2
+ - id: resource2
+ source: path/to/module/2
...
settings:
key1: $(vars.zone)
diff --git a/examples/hpc-cluster-high-io.yaml b/examples/hpc-cluster-high-io.yaml
index 719130556d..fd7b82a552 100644
--- a/examples/hpc-cluster-high-io.yaml
+++ b/examples/hpc-cluster-high-io.yaml
@@ -31,36 +31,36 @@ deployment_groups:
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
# as a prefix. To refer to a local or community module, prefix with ./, ../ or /
# Example - ./modules/network/pre-existing-vpc
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: modules/file-system/filestore
+ - id: projectsfs
+ source: modules/file-system/filestore
kind: terraform
- id: projectsfs
use: [network1]
settings:
filestore_tier: HIGH_SCALE_SSD
size_gb: 10240
local_mount: /projects
- - source: community/modules/file-system/DDN-EXAScaler
+ - id: scratchfs
+ source: community/modules/file-system/DDN-EXAScaler
kind: terraform
- id: scratchfs
use: [network1]
settings:
local_mount: /scratch
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: low_cost_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: low_cost_partition
use:
- network1
- homefs
@@ -74,9 +74,9 @@ deployment_groups:
machine_type: n2-standard-4
# This compute_partition is far more performant than low_cost_partition.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -86,9 +86,9 @@ deployment_groups:
max_node_count: 200
partition_name: compute
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -100,9 +100,9 @@ deployment_groups:
controller_machine_type: c2-standard-8
suspend_time: 60
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
@@ -112,7 +112,7 @@ deployment_groups:
settings:
login_machine_type: n2-standard-4
- - source: modules/monitoring/dashboard
+ - id: hpc_dashboard
+ source: modules/monitoring/dashboard
kind: terraform
- id: hpc_dashboard
outputs: [instructions]
diff --git a/examples/hpc-cluster-small.yaml b/examples/hpc-cluster-small.yaml
index 9bc3a1f750..ff922090e1 100644
--- a/examples/hpc-cluster-small.yaml
+++ b/examples/hpc-cluster-small.yaml
@@ -31,21 +31,21 @@ deployment_groups:
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
# as a prefix. To refer to a local or community module, prefix with ./, ../ or /
# Example - ./modules/network/vpc
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
# This debug_partition will work out of the box without requesting additional GCP quota.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: debug_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: debug_partition
use:
- network1
- homefs
@@ -57,9 +57,9 @@ deployment_groups:
machine_type: n2-standard-2
# This compute_partition is far more performant than debug_partition but may require requesting GCP quotas first.
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -67,9 +67,9 @@ deployment_groups:
partition_name: compute
max_node_count: 20
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -79,9 +79,9 @@ deployment_groups:
login_node_count: 1
suspend_time: 60
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/examples/image-builder.yaml b/examples/image-builder.yaml
index fd74a13c4b..6893c612cf 100644
--- a/examples/image-builder.yaml
+++ b/examples/image-builder.yaml
@@ -30,12 +30,12 @@ vars:
deployment_groups:
- group: builder-env
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/scripts/startup-script
+ - id: scripts_for_image
+ source: modules/scripts/startup-script
kind: terraform
- id: scripts_for_image
settings:
runners:
- type: shell
@@ -47,9 +47,9 @@ deployment_groups:
- group: packer
modules:
- - source: modules/packer/custom-image
+ - id: custom-image
+ source: modules/packer/custom-image
kind: packer
- id: custom-image
settings:
disk_size: 20
source_image_project_id: [schedmd-slurm-public]
@@ -58,12 +58,12 @@ deployment_groups:
- group: cluster
modules:
- - source: modules/network/pre-existing-vpc
+ - id: cluster-network
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: cluster-network
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use: [cluster-network]
settings:
partition_name: compute
@@ -71,18 +71,18 @@ deployment_groups:
instance_image:
family: $(vars.new_image_family)
project: $(vars.project_id)
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use: [cluster-network, compute_partition]
settings:
login_node_count: 1
instance_image:
family: $(vars.new_image_family)
project: $(vars.project_id)
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use: [cluster-network, slurm_controller]
settings:
instance_image:
diff --git a/go.mod b/go.mod
index 98f95ebbe9..3f87f0fd9a 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module hpc-toolkit
go 1.16
require (
- cloud.google.com/go/compute v1.8.0
+ cloud.google.com/go/compute v1.9.0
github.com/hashicorp/go-getter v1.6.2
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/hashicorp/hcl/v2 v2.13.0
diff --git a/go.sum b/go.sum
index cf85e02c4a..380138b951 100644
--- a/go.sum
+++ b/go.sum
@@ -44,8 +44,8 @@ cloud.google.com/go/compute v1.5.0/go.mod h1:9SMHyhJlzhlkJqrPAc839t2BZFTSk6Jdj6m
cloud.google.com/go/compute v1.6.0/go.mod h1:T29tfhtVbq1wvAPo0E3+7vhgmkOYeXjhFvz/FMzPu0s=
cloud.google.com/go/compute v1.6.1/go.mod h1:g85FgpzFvNULZ+S8AYq87axRKuf2Kh7deLqV/jJ3thU=
cloud.google.com/go/compute v1.7.0/go.mod h1:435lt8av5oL9P3fv1OEzSbSUe+ybHXGMPQHHZWZxy9U=
-cloud.google.com/go/compute v1.8.0 h1:NLtR56/eKx9K1s2Tw/4hec2vsU1S3WeKRMj8HXbBo6E=
-cloud.google.com/go/compute v1.8.0/go.mod h1:boQ44qJsMqZjKzzsEkoJWQGj4h8ygmyk17UArClWzmg=
+cloud.google.com/go/compute v1.9.0 h1:ED/FP4xv8GJw63v556/ASNc1CeeLUO2Bs8nzaHchkHg=
+cloud.google.com/go/compute v1.9.0/go.mod h1:lWv1h/zUWTm/LozzfTJhBSkd6ShQq8la8VeeuOEGxfY=
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
cloud.google.com/go/iam v0.3.0 h1:exkAomrVUuzx9kWFI1wm3KI0uoDeUFPB4kKGzx6x+Gc=
@@ -764,8 +764,9 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
+google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w=
+google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
diff --git a/modules/README.md b/modules/README.md
index bbb900c227..e5947d1242 100644
--- a/modules/README.md
+++ b/modules/README.md
@@ -146,8 +146,8 @@ Modules that are still in development and less stable are labeled with the
* **[htcondor-install]** ![community-badge] ![experimental-badge] : Creates
a startup script to install HTCondor and exports a list of required APIs
* **[omnia-install]** ![community-badge] ![experimental-badge] : Installs Slurm
- via [Dell Omnia](https://github.com/dellhpc/omnia) onto a cluster of compute
- VMs.
+ via [Dell Omnia](https://github.com/dellhpc/omnia) onto a cluster of VMs
+ instances.
* **[spack-install]** ![community-badge] ![experimental-badge] : Creates a
startup script to install [Spack](https://github.com/spack/spack) on an
instance or a slurm login or controller.
@@ -184,9 +184,9 @@ review the directory structure of [the core modules](./) and
example, the following code is using the embedded pre-existing-vpc module:
```yaml
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
```
#### Local Modules
@@ -197,9 +197,9 @@ the source to a path starting with `/`, `./`, or `../`. For instance, the
following module definition refers the local pre-existing-vpc modules.
```yaml
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
```
> **_NOTE:_** This example would have to be run from the HPC Toolkit repository
@@ -216,17 +216,17 @@ Toolkit GitHub repository:
Get module from GitHub over SSH:
```yaml
- - source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc
+ - id: network1
+ source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc
kind: terraform
- id: network1
```
Get module from GitHub over HTTPS:
```yaml
- - source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc
+ - id: network1
+ source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc
kind: terraform
- id: network1
```
Both examples above use the [double-slash notation][tfsubdir] (`//`) to indicate
@@ -240,9 +240,9 @@ of this feature. For example, to temporarily point to a development copy of the
Toolkit vpc module, use:
```yaml
- - source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc?ref=develop
+ - id: network1
+ source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc?ref=develop
kind: terraform
- id: network1
```
[tfrev]: https://www.terraform.io/language/modules/sources#selecting-a-revision
@@ -289,16 +289,16 @@ the used module's output. For example, see the following blueprint snippet:
```yaml
modules:
-- source: modules/network/vpc
+- id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
-- resource: modules/compute/vm-instance
+- id: workstation
+ source: modules/compute/vm-instance
kind: terraform
- id: workstation
use: [network1]
settings:
- ...
+ ...
```
In this snippet, the VM instance `workstation` uses the outputs of vpc
diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md
index 129e746f99..03813e2fa1 100644
--- a/modules/compute/vm-instance/README.md
+++ b/modules/compute/vm-instance/README.md
@@ -6,9 +6,9 @@ This module creates one or more
### Example
```yaml
-- source: modules/compute/vm-instance
+- id: compute
+ source: modules/compute/vm-instance
kind: terraform
- id: compute
use: [network1]
settings:
instance_count: 8
diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf
index 503e7e908b..b77d6ded49 100644
--- a/modules/compute/vm-instance/versions.tf
+++ b/modules/compute/vm-instance/versions.tf
@@ -27,10 +27,10 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.4.0"
}
provider_meta "google-beta" {
- module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/modules/file-system/filestore/README.md b/modules/file-system/filestore/README.md
index 9bfcc6eec2..5eddd6d6d2 100644
--- a/modules/file-system/filestore/README.md
+++ b/modules/file-system/filestore/README.md
@@ -46,9 +46,9 @@ The Filestore instance defined below will have the following attributes:
- connected to the network defined in the `network1` module
```yaml
-- source: modules/file-system/filestore
+- id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
@@ -65,9 +65,9 @@ The Filestore instance defined below will have the following attributes:
- connected to the VPC network defined in the `network1` module
```yaml
-- source: modules/file-system/filestore
+- id: highscale
+ source: modules/file-system/filestore
kind: terraform
- id: highscale
use: [network1]
settings:
filestore_tier: HIGH_SCALE_SSD
diff --git a/modules/file-system/filestore/scripts/install-nfs-client.sh b/modules/file-system/filestore/scripts/install-nfs-client.sh
index 8ad49f4780..6c49163eb2 100644
--- a/modules/file-system/filestore/scripts/install-nfs-client.sh
+++ b/modules/file-system/filestore/scripts/install-nfs-client.sh
@@ -24,7 +24,7 @@ if [ ! "$(which mount.nfs)" ]; then
enable_repo="baseos"
else
echo "Unsupported version of centos/RHEL/Rocky"
- exit 1
+ return 1
fi
yum install --disablerepo="*" --enablerepo=${enable_repo} -y nfs-utils
elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then
@@ -32,6 +32,6 @@ if [ ! "$(which mount.nfs)" ]; then
apt-get -y install nfs-common
else
echo 'Unsuported distribution'
- exit 1
+ return 1
fi
fi
diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf
index 5e3b0feefb..c0cf4c6d07 100644
--- a/modules/file-system/filestore/versions.tf
+++ b/modules/file-system/filestore/versions.tf
@@ -26,10 +26,10 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.4.0"
}
provider_meta "google-beta" {
- module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/modules/file-system/pre-existing-network-storage/README.md b/modules/file-system/pre-existing-network-storage/README.md
index 978ffa9070..338af4caad 100644
--- a/modules/file-system/pre-existing-network-storage/README.md
+++ b/modules/file-system/pre-existing-network-storage/README.md
@@ -11,9 +11,9 @@ Toolkit supported file-system such as [filestore](../filestore/README.md).
### Example
```yaml
-- source: modules/file-system/pre-existing-network-storage
+- id: homefs
+ source: modules/file-system/pre-existing-network-storage
kind: terraform
- id: homefs
settings:
server_ip: ## Set server IP here ##
remote_mount: nfsshare
diff --git a/modules/monitoring/dashboard/README.md b/modules/monitoring/dashboard/README.md
index 4f5b4d6953..920ef99e0c 100644
--- a/modules/monitoring/dashboard/README.md
+++ b/modules/monitoring/dashboard/README.md
@@ -10,9 +10,9 @@ needed.
## Example
```yaml
-- source: modules/monitoring/dashboard
+- id: hpc_dash
+ source: modules/monitoring/dashboard
kind: terraform
- id: hpc_dash
settings:
widgets:
- |
diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf
index 49d8cae75c..b039758539 100644
--- a/modules/monitoring/dashboard/versions.tf
+++ b/modules/monitoring/dashboard/versions.tf
@@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/modules/network/pre-existing-vpc/README.md b/modules/network/pre-existing-vpc/README.md
index a9eaa2a90a..c6e64a7e19 100644
--- a/modules/network/pre-existing-vpc/README.md
+++ b/modules/network/pre-existing-vpc/README.md
@@ -12,9 +12,9 @@ sharing a single network module between deployment groups.
### Example
```yaml
-- source: modules/network/pre-existing-vpc
+- id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
settings:
- project_id: $(vars.project_id)
```
diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf
index 0310cf4f00..9e4ad633cf 100644
--- a/modules/network/pre-existing-vpc/versions.tf
+++ b/modules/network/pre-existing-vpc/versions.tf
@@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/modules/network/vpc/README.md b/modules/network/vpc/README.md
index 24e84de1ab..9133a531da 100644
--- a/modules/network/vpc/README.md
+++ b/modules/network/vpc/README.md
@@ -106,9 +106,9 @@ compact set of subnetworks possible.
### Example
```yaml
-- source: modules/network/vpc
+- id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
settings:
- deployment_name: $(vars.deployment_name)
```
diff --git a/modules/scripts/startup-script/README.md b/modules/scripts/startup-script/README.md
index c6937eba6c..f20b98739c 100644
--- a/modules/scripts/startup-script/README.md
+++ b/modules/scripts/startup-script/README.md
@@ -53,6 +53,49 @@ Each runner receives the following attributes:
Therefore`args` should not include any arguments that alter this behavior,
such as `--connection`, `--inventory`, or `--limit`.
+### Runner dependencies
+
+The `ansible-local` runner requires ansible to be installed in the VM before
+running. To support other playbook runners in the HPC Toolkit, we require
+version 2.11 of ansible-core or higher. Note that this is distinct from the
+package version used to install ansible with pip. The minimum pip package
+of ansible is 4.10.0.
+
+To install ansible, a runner supplied by this module can be added as a prior
+runner. An example of this can be found in the [Example](#example) section below
+as the first runner in the list of runners. This script will do the following in
+your VM instance:
+
+- Install system-wide python3 if not already installed using system package
+ managers (yum, apt-get, etc)
+- Install `python3-distutils` system-wide in debian and ubuntu based
+ environments. This can be a missing dependency on system installations of
+ python3 for installing and upgrading pip.
+- Install system-wide pip3 if not already installed and upgrade pip3 if the
+ version is not at least 18.0.
+- Install and create a virtual environment located at `/usr/local/ghpc-venv`.
+- Install ansible into this virtual environment if the current version of
+ ansible is not version 2.11 or higher.
+
+To use the virtual environment created by this script, you can activate it by
+running the following commmand on the VM:
+
+```shell
+source /usr/local/ghpc-venv/bin/activate
+```
+
+You may also need to provide the correct python interpreter as the python3
+binary in the virtual environment. This can be done by adding the following flag
+when calling `ansible-playbook`:
+
+```shell
+-e ansible_python_interpreter=/usr/local/ghpc-venv/bin/activate
+```
+
+> **_NOTE:_** ansible-playbook and other ansible command line tools will only be
+> accessible from the command line (and in your PATH variable) after activating
+> this environment.
+
### Staging the runners
Runners will be uploaded to a
@@ -96,9 +139,9 @@ sudo journalctl -u google-startup-scripts.service
### Example
```yaml
-- source: ./modules/scripts/startup-script
+- id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -124,9 +167,9 @@ sudo journalctl -u google-startup-scripts.service
tar zxvf /tmp/$1 -C /
args: "bar.tgz 'Expanding file'"
-- source: ./modules/compute/vm-instance
+- id: compute-cluster
+ source: ./modules/compute/vm-instance
kind: terraform
- id: compute-cluster
use: [homefs, startup]
```
diff --git a/modules/scripts/startup-script/examples/install_ansible.sh b/modules/scripts/startup-script/examples/install_ansible.sh
index c9b5a4326a..5f4fd236f2 100644
--- a/modules/scripts/startup-script/examples/install_ansible.sh
+++ b/modules/scripts/startup-script/examples/install_ansible.sh
@@ -13,6 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+REQ_ANSIBLE_VERSION=2.11
+REQ_ANSIBLE_PIP_VERSION=4.10.0
+REQ_PIP_MINOR_VERSION=18
+REQ_PYTHON3_VERSION=6
+
apt_wait() {
while fuser /var/lib/dpkg/lock >/dev/null 2>&1; do
echo "Sleeping for dpkg lock"
@@ -30,24 +35,158 @@ apt_wait() {
fi
}
-if [ ! -h /usr/bin/ansible-playbook ] || [ ! -f /usr/bin/ansible-playbook ]; then
- if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then
- if [ ! -f /bin/pip ]; then
- curl -Os https://bootstrap.pypa.io/pip/2.7/get-pip.py
- /usr/bin/python get-pip.py
+# Installs any dependencies needed for python based on the OS
+install_python_deps() {
+ if [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release 2>/dev/null ||
+ grep -qi ubuntu /etc/os-release 2>/dev/null; then
+ apt-get install -y python3-distutils
+ fi
+}
+
+# Gets the name of the python executable for python starting with python3, then
+# checking python. Sets the variable to an empty string if neither are found.
+get_python_path() {
+ python_path=""
+ if which python3 2>/dev/null; then
+ python_path=$(which python3 2>/dev/null)
+ elif which python 2>/dev/null; then
+ python_path=$(which python 2>/dev/null)
+ fi
+}
+
+# Returns the python major version. If provided, it will use the first argument
+# as the python executable, otherwise it will default to simply "python".
+get_python_major_version() {
+ python_path=${1:-python}
+ python_major_version=$(${python_path} -c "import sys; print(sys.version_info.major)")
+}
+
+# Returns the python minor version. If provided, it will use the first argument
+# as the python executable, otherwise it will default to simply "python".
+get_python_minor_version() {
+ python_path=${1:-python}
+ python_minor_version=$(${python_path} -c "import sys; print(sys.version_info.minor)")
+}
+
+# Install python3 with the yum package manager. Updates python_path to the
+# newly installed packaged.
+install_python3_yum() {
+ major_version=$(rpm -E "%{rhel}")
+ enable_repo=""
+ if [ "${major_version}" -eq "7" ]; then
+ enable_repo="base,epel"
+ elif [ "${major_version}" -eq "8" ]; then
+ enable_repo="baseos"
+ else
+ echo "Unsupported version of centos/RHEL/Rocky"
+ return 1
+ fi
+ yum install --disablerepo="*" --enablerepo=${enable_repo} -y python3 python3-pip
+ python_path=$(rpm -ql python3 | grep 'bin/python3$')
+}
+
+# Install python3 with the apt package manager. Updates python_path to the
+# newly installed packaged.
+install_python3_apt() {
+ apt_wait
+ apt-get install -y python3 python3-distutils python3-pip
+ python_path=$(which python3)
+}
+
+install_python3() {
+ if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] ||
+ [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then
+ install_python3_yum
+ elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release 2>/dev/null ||
+ grep -qi ubuntu /etc/os-release 2>/dev/null; then
+ install_python3_apt
+ else
+ echo "Error: Unsupported Distribution"
+ return 1
+ fi
+}
+
+# Install python3 with the yum package manager. Updates python_path to the
+# newly installed packaged.
+install_pip3_yum() {
+ major_version=$(rpm -E "%{rhel}")
+ enable_repo=""
+ if [ "${major_version}" -eq "7" ]; then
+ enable_repo="base,epel"
+ elif [ "${major_version}" -eq "8" ]; then
+ enable_repo="baseos"
+ else
+ echo "Unsupported version of centos/RHEL/Rocky"
+ return 1
+ fi
+ yum install --disablerepo="*" --enablerepo=${enable_repo} -y python3-pip
+}
+
+# Install python3 with the apt package manager. Updates python_path to the
+# newly installed packaged.
+install_pip3_apt() {
+ apt-get update
+ apt-get install -y python3-pip
+}
+
+install_pip3() {
+ if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] ||
+ [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then
+ install_pip3_yum
+ elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release 2>/dev/null ||
+ grep -qi ubuntu /etc/os-release 2>/dev/null; then
+ install_pip3_apt
+ else
+ echo "Error: Unsupported Distribution"
+ return 1
+ fi
+}
+
+main() {
+ # Get the python3 executable, or install it if not found
+ get_python_path
+ get_python_major_version "${python_path}"
+ get_python_minor_version "${python_path}"
+ if [ "${python_path}" = "" ] || [ "${python_major_version}" = "2" ] || [ "${python_minor_version}" -lt "${REQ_PYTHON3_VERSION}" ]; then
+ if ! install_python3; then
+ return 1
fi
- /usr/bin/python -m pip install virtualenv
- /usr/bin/python -m virtualenv /usr/local/toolkit
- /usr/local/toolkit/bin/python -m pip install wheel
- /usr/local/toolkit/bin/python -m pip install ansible==2.9.27
- ln -s /usr/local/toolkit/bin/ansible-playbook /usr/bin/ansible-playbook
- elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then
- echo 'WARNING: unsupported installation of ansible in debian / ubuntu'
- apt_wait
- apt-get update
- DEBIAN_FRONTEND=noninteractive apt-get install -y ansible
+ get_python_major_version "${python_path}"
+ get_python_minor_version "${python_path}"
else
- echo 'Unsupported distribution'
- exit 1
+ install_python_deps
fi
-fi
+
+ # Install and/or upgrade pip
+ if ! ${python_path} -m pip --version 2>/dev/null; then
+ if ! install_pip3; then
+ return 1
+ fi
+ fi
+ pip_version=$(${python_path} -m pip --version | sed -nr 's/^pip ([0-9]+\.[0-9]+).*$/\1/p')
+ pip_major_version=$(echo "${pip_version}" | cut -d '.' -f 1)
+ if [ "${pip_major_version}" -lt "${REQ_PIP_MINOR_VERSION}" ]; then
+ ${python_path} -m pip install --upgrade pip
+ fi
+
+ # Create pip virtual environment for HPC Toolkit
+ ${python_path} -m pip install virtualenv
+ ${python_path} -m virtualenv /usr/local/ghpc-venv
+ python_path=/usr/local/ghpc-venv/bin/python3
+
+ # Install ansible
+ ansible_version=""
+ if which ansible-playbook 2>/dev/null; then
+ ansible_version=$(ansible-playbook --version 2>/dev/null | sed -nr 's/^ansible-playbook.*([0-9]+\.[0-9]+\.[0-9]+).*/\1/p')
+ ansible_major_vers=$(echo "${ansible_version}" | cut -d '.' -f 1)
+ ansible_minor_vers=$(echo "${ansible_version}" | cut -d '.' -f 2)
+ ansible_req_major_vers=$(echo "${REQ_ANSIBLE_VERSION}" | cut -d '.' -f 1)
+ ansible_req_minor_vers=$(echo "${REQ_ANSIBLE_VERSION}" | cut -d '.' -f 2)
+ fi
+ if [ -z "${ansible_version}" ] || [ "${ansible_major_vers}" -ne "${ansible_req_major_vers}" ] ||
+ [ "${ansible_minor_vers}" -lt "${ansible_req_minor_vers}" ]; then
+ ${python_path} -m pip install ansible==${REQ_ANSIBLE_PIP_VERSION}
+ fi
+}
+
+main
diff --git a/modules/scripts/startup-script/templates/startup-script-custom.tpl b/modules/scripts/startup-script/templates/startup-script-custom.tpl
index 2c1ca2e124..8dad11cc3c 100644
--- a/modules/scripts/startup-script/templates/startup-script-custom.tpl
+++ b/modules/scripts/startup-script/templates/startup-script-custom.tpl
@@ -1,13 +1,22 @@
stdlib::run_playbook() {
+ python_interpreter_flag=""
+ if [ -d /usr/local/ghpc-venv ]; then
+ . /usr/local/ghpc-venv/bin/activate
+ python_interpreter_flag="-e ansible_python_interpreter=/usr/local/ghpc-venv/bin/python3"
+ fi
if [ ! "$(which ansible-playbook)" ]; then
stdlib::error "ansible-playbook not found"\
"Please install ansible before running ansible-local runners."
exit 1
fi
- /usr/bin/ansible-playbook --connection=local --inventory=localhost, --limit localhost $1 $2
- return $?
+ ansible-playbook $${python_interpreter_flag} --connection=local --inventory=localhost, --limit localhost $1 $2
+ ret_code=$?
+ if [ -d /usr/local/ghpc-venv ]; then
+ deactivate
+ fi
+ return $${ret_code}
}
stdlib::runner() {
@@ -27,6 +36,7 @@ stdlib::runner() {
stdlib::get_from_bucket -u "gs://${bucket}/$object" -d "$destpath" -f "$filename"
+ stdlib::info "=== start executing runner: $object ==="
case "$1" in
ansible-local) stdlib::run_playbook "$destpath/$filename" "$args";;
shell) . $destpath/$filename $args;;
diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf
index cea3ab22b0..7a67e58b5e 100644
--- a/modules/scripts/startup-script/versions.tf
+++ b/modules/scripts/startup-script/versions.tf
@@ -30,7 +30,7 @@ terraform {
}
}
provider_meta "google" {
- module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.3.0"
+ module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.4.0"
}
required_version = ">= 0.14.0"
diff --git a/pkg/config/config.go b/pkg/config/config.go
index 89248ab4f1..e311d0f94b 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -35,7 +35,10 @@ import (
"hpc-toolkit/pkg/sourcereader"
)
-const expectedVarFormat = "$(vars.var_name) or $(module_id.var_name)"
+const (
+ expectedVarFormat string = "$(vars.var_name) or $(module_id.var_name)"
+ matchLabelExp string = `^[\p{Ll}\p{Lo}\p{N}_-]{1,63}$`
+)
var errorMessages = map[string]string{
// general
@@ -66,6 +69,10 @@ var errorMessages = map[string]string{
"emptyGroupName": "group name must be set for each deployment group",
"illegalChars": "invalid character(s) found in group name",
"invalidOutput": "requested output was not found in the module",
+ "varNotDefined": "variable not defined",
+ "valueNotString": "value was not of type string",
+ "valueEmptyString": "value is an empty string",
+ "labelReqs": "value can only contain lowercase letters, numeric characters, underscores and dashes, and must be between 1 and 63 characters long.",
}
// DeploymentGroup defines a group of Modules that are all executed together
@@ -402,6 +409,10 @@ func (dc *DeploymentConfig) validateConfig() {
if err != nil {
log.Fatal(err)
}
+ err = dc.Config.checkBlueprintName()
+ if err != nil {
+ log.Fatal(err)
+ }
moduleToGroup, err := checkModuleAndGroupNames(dc.Config.DeploymentGroups)
if err != nil {
log.Fatal(err)
@@ -559,13 +570,14 @@ func ResolveVariables(
return nil
}
-// DeploymentNameError signifies a problem with the blueprint deployment name.
-type DeploymentNameError struct {
- cause string
+// InputValueError signifies a problem with the blueprint name.
+type InputValueError struct {
+ inputKey string
+ cause string
}
-func (err *DeploymentNameError) Error() string {
- return fmt.Sprintf("deployment_name must be a string and cannot be empty, cause: %v", err.cause)
+func (err *InputValueError) Error() string {
+ return fmt.Sprintf("%v input error, cause: %v", err.inputKey, err.cause)
}
// ResolveGlobalVariables will resolve literal variables "((var.*))" in the
@@ -579,20 +591,66 @@ func (b Blueprint) ResolveGlobalVariables(ctyVars map[string]cty.Value) error {
return ResolveVariables(ctyVars, origin)
}
+// isValidLabelValue checks if a string is a valid value for a GCP label.
+// For more information on valid label values, see the docs at:
+// https://cloud.google.com/resource-manager/docs/creating-managing-labels#requirements
+func isValidLabelValue(value string) bool {
+ return regexp.MustCompile(matchLabelExp).MatchString(value)
+}
+
// DeploymentName returns the deployment_name from the config and does approperate checks.
func (b *Blueprint) DeploymentName() (string, error) {
nameInterface, found := b.Vars["deployment_name"]
if !found {
- return "", &DeploymentNameError{"deployment_name variable not defined."}
+ return "", &InputValueError{
+ inputKey: "deployment_name",
+ cause: errorMessages["varNotFound"],
+ }
}
deploymentName, ok := nameInterface.(string)
if !ok {
- return "", &DeploymentNameError{"deployment_name was not of type string."}
+ return "", &InputValueError{
+ inputKey: "deployment_name",
+ cause: errorMessages["valueNotString"],
+ }
}
if len(deploymentName) == 0 {
- return "", &DeploymentNameError{"deployment_name was an empty string."}
+ return "", &InputValueError{
+ inputKey: "deployment_name",
+ cause: errorMessages["valueEmptyString"],
+ }
+ }
+
+ // Check that deployment_name is a valid label
+ if !isValidLabelValue(deploymentName) {
+ return "", &InputValueError{
+ inputKey: "deployment_name",
+ cause: errorMessages["labelReqs"],
+ }
}
+
return deploymentName, nil
}
+
+// checkBlueprintName returns an error if blueprint_name does not comply with
+// requirements for correct GCP label values.
+func (b *Blueprint) checkBlueprintName() error {
+
+ if len(b.BlueprintName) == 0 {
+ return &InputValueError{
+ inputKey: "blueprint_name",
+ cause: errorMessages["valueEmptyString"],
+ }
+ }
+
+ if !isValidLabelValue(b.BlueprintName) {
+ return &InputValueError{
+ inputKey: "blueprint_name",
+ cause: errorMessages["labelReqs"],
+ }
+ }
+
+ return nil
+}
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index 5fe2723885..7b8fd9eb03 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -232,6 +232,7 @@ func getBasicDeploymentConfigWithTestModule() DeploymentConfig {
}
return DeploymentConfig{
Config: Blueprint{
+ BlueprintName: "simple",
Vars: map[string]interface{}{"deployment_name": "deployment_name"},
DeploymentGroups: []DeploymentGroup{testDeploymentGroup},
},
@@ -318,7 +319,7 @@ func (s *MySuite) TestCheckModuleAndGroupNames(c *C) {
func (s *MySuite) TestDeploymentName(c *C) {
dc := getDeploymentConfigForTest()
- var e *DeploymentNameError
+ var e *InputValueError
// Is deployment_name a valid string?
deploymentName, err := dc.Config.DeploymentName()
@@ -333,7 +334,25 @@ func (s *MySuite) TestDeploymentName(c *C) {
// Is deployment_name not a string?
dc.Config.Vars["deployment_name"] = 100
- _, err = dc.Config.DeploymentName()
+ deploymentName, err = dc.Config.DeploymentName()
+ c.Assert(deploymentName, Equals, "")
+ c.Check(errors.As(err, &e), Equals, true)
+
+ // Is deployment_names longer than 63 characters?
+ dc.Config.Vars["deployment_name"] = "deployment_name-deployment_name-deployment_name-deployment_name-0123"
+ deploymentName, err = dc.Config.DeploymentName()
+ c.Assert(deploymentName, Equals, "")
+ c.Check(errors.As(err, &e), Equals, true)
+
+ // Does deployment_name contain special characters other than dashes or underscores?
+ dc.Config.Vars["deployment_name"] = "deployment.name"
+ deploymentName, err = dc.Config.DeploymentName()
+ c.Assert(deploymentName, Equals, "")
+ c.Check(errors.As(err, &e), Equals, true)
+
+ // Does deployment_name contain capital letters?
+ dc.Config.Vars["deployment_name"] = "Deployment_name"
+ deploymentName, err = dc.Config.DeploymentName()
c.Assert(deploymentName, Equals, "")
c.Check(errors.As(err, &e), Equals, true)
@@ -344,6 +363,40 @@ func (s *MySuite) TestDeploymentName(c *C) {
c.Check(errors.As(err, &e), Equals, true)
}
+func (s *MySuite) TestCheckBlueprintName(c *C) {
+ dc := getDeploymentConfigForTest()
+ var e *InputValueError
+
+ // Is blueprint_name a valid string?
+ err := dc.Config.checkBlueprintName()
+ c.Assert(err, IsNil)
+
+ // Is blueprint_name a valid string with an underscore and dash?
+ dc.Config.BlueprintName = "blue-print_name"
+ err = dc.Config.checkBlueprintName()
+ c.Check(err, IsNil)
+
+ // Is blueprint_name an empty string?
+ dc.Config.BlueprintName = ""
+ err = dc.Config.checkBlueprintName()
+ c.Check(errors.As(err, &e), Equals, true)
+
+ // Is blueprint_name longer than 63 characters?
+ dc.Config.BlueprintName = "blueprint-name-blueprint-name-blueprint-name-blueprint-name-0123"
+ err = dc.Config.checkBlueprintName()
+ c.Check(errors.As(err, &e), Equals, true)
+
+ // Does blueprint_name contain special characters other than dashes or underscores?
+ dc.Config.BlueprintName = "blueprint.name"
+ err = dc.Config.checkBlueprintName()
+ c.Check(errors.As(err, &e), Equals, true)
+
+ // Does blueprint_name contain capital letters?
+ dc.Config.BlueprintName = "Blueprint_name"
+ err = dc.Config.checkBlueprintName()
+ c.Check(errors.As(err, &e), Equals, true)
+}
+
func (s *MySuite) TestNewBlueprint(c *C) {
dc := getDeploymentConfigForTest()
outFile := filepath.Join(tmpTestDir, "out_TestNewBlueprint.yaml")
diff --git a/pkg/modulewriter/modulewriter_test.go b/pkg/modulewriter/modulewriter_test.go
index d2da694b3e..d202b8fb70 100644
--- a/pkg/modulewriter/modulewriter_test.go
+++ b/pkg/modulewriter/modulewriter_test.go
@@ -227,7 +227,7 @@ func (s *MySuite) TestWriteDeployment(c *C) {
func (s *MySuite) TestWriteDeployment_BadDeploymentName(c *C) {
testBlueprint := getBlueprintForTest()
- var e *config.DeploymentNameError
+ var e *config.InputValueError
testBlueprint.Vars = map[string]interface{}{"deployment_name": 100}
err := WriteDeployment(&testBlueprint, testDir, false /* overwriteFlag */)
diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml
index 8a382dc40e..be963b4463 100644
--- a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml
+++ b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml
@@ -140,7 +140,7 @@
- name: Delete Firewall Rule
register: fw_deleted
changed_when: fw_deleted.rc == 0
- failed_when: false # keep cleaning up
+ failed_when: false # keep cleaning up
ansible.builtin.command:
argv:
- gcloud
@@ -149,8 +149,8 @@
- delete
- "{{ deployment_name }}"
- name: Tear Down Pool
- changed_when: true # assume something destroyed
- failed_when: false # keep cleaning up
+ changed_when: true # assume something destroyed
+ failed_when: false # keep cleaning up
run_once: true
delegate_to: localhost
environment:
@@ -177,7 +177,7 @@
ansible.builtin.command:
cmd: gcloud compute images delete {{ image_name.stdout }}
- name: Tear Down Network
- changed_when: true # assume something destroyed
+ changed_when: true # assume something destroyed
delegate_to: localhost
environment:
TF_IN_AUTOMATION: "TRUE"
@@ -190,8 +190,8 @@
- name: Run Integration Tests
hosts: remote_host
- gather_facts: no # must wait until host is reachable
- ignore_unreachable: true # ensure always block will run even if SSH fails
+ gather_facts: false # must wait until host is reachable
+ ignore_unreachable: true # ensure always block will run even if SSH fails
tasks:
- name: HTCondor Test Block
vars:
@@ -218,7 +218,7 @@
loop_var: test
always:
- name: Tear Down Pool
- changed_when: true # assume something destroyed
+ changed_when: true # assume something destroyed
delegate_to: localhost
run_once: true
environment:
@@ -243,7 +243,7 @@
- name: Delete custom image
register: image_deleted
changed_when: image_deleted.rc == 0
- failed_when: false # keep cleaning up
+ failed_when: false # keep cleaning up
run_once: true
delegate_to: localhost
ansible.builtin.command:
@@ -251,7 +251,7 @@
- name: Delete Firewall Rule
register: fw_deleted
changed_when: fw_deleted.rc == 0
- failed_when: false # keep cleaning up
+ failed_when: false # keep cleaning up
run_once: true
delegate_to: localhost
ansible.builtin.command:
@@ -262,7 +262,7 @@
- delete
- "{{ deployment_name }}"
- name: Tear Down Network
- changed_when: true # assume something destroyed
+ changed_when: true # assume something destroyed
run_once: true
delegate_to: localhost
environment:
diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml
index 144b5a4b85..f64847dad1 100644
--- a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml
+++ b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml
@@ -127,7 +127,7 @@
- name: Delete Firewall Rule
register: fw_deleted
changed_when: fw_deleted.rc == 0
- failed_when: false # keep cleaning up
+ failed_when: false # keep cleaning up
command:
argv:
- gcloud
@@ -136,7 +136,7 @@
- delete
- "{{ deployment_name }}"
- name: Tear Down Cluster
- changed_when: true # assume something destroyed
+ changed_when: true # assume something destroyed
run_once: true
delegate_to: localhost
environment:
@@ -151,8 +151,8 @@
- name: Run Integration Tests
hosts: remote_host
- gather_facts: no # must wait until host is reachable
- ignore_unreachable: true # ensure always block will run even if SSH fails
+ gather_facts: false # must wait until host is reachable
+ ignore_unreachable: true # ensure always block will run even if SSH fails
tasks:
- name: Slurm Test Block
vars:
@@ -201,7 +201,7 @@
- name: Delete Firewall Rule
register: fw_deleted
changed_when: fw_deleted.rc == 0
- failed_when: false # keep cleaning up
+ failed_when: false # keep cleaning up
run_once: true
delegate_to: localhost
command:
@@ -212,7 +212,7 @@
- delete
- "{{ deployment_name }}"
- name: Tear Down Cluster
- changed_when: true # assume something destroyed
+ changed_when: true # assume something destroyed
run_once: true
delegate_to: localhost
environment:
diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml
index ff7a2630c5..2e46579485 100644
--- a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml
+++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml
@@ -31,6 +31,9 @@
until: result.stdout.find("SUCCEEDED") != -1
retries: 60
delay: 10
+ - name: Call batch list command printed in instructions
+ changed_when: false
+ ansible.builtin.command: gcloud alpha batch jobs list --project={{ custom_vars.project }}
always:
- name: delete job
diff --git a/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml b/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml
index 266f506351..f3d0bfb097 100644
--- a/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml
+++ b/tools/cloud-build/daily-tests/blueprints/lustre-with-new-vpc.yaml
@@ -28,23 +28,23 @@ deployment_groups:
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
# as a prefix. To refer to a local or community module, prefix with ./, ../ or /
# Example - ./modules/network/pre-existing-vpc
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
settings:
network_name: lustre-new-vpc
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
# Explicitly picking the local version of the module
- - source: community/modules/file-system/DDN-EXAScaler
+ - id: scratchfs
+ source: community/modules/file-system/DDN-EXAScaler
kind: terraform
- id: scratchfs
settings:
local_mount: /scratch
network_self_link: $(network1.network_self_link)
@@ -52,9 +52,9 @@ deployment_groups:
subnetwork_address: $(network1.subnetwork_address)
# Create a separate workstation to catch regressions in vm-instance
- - source: ./modules/compute/vm-instance
+ - id: workstation
+ source: ./modules/compute/vm-instance
kind: terraform
- id: workstation
use:
- network1
- homefs
@@ -63,9 +63,9 @@ deployment_groups:
name_prefix: test-workstation
machine_type: c2-standard-4
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -74,18 +74,18 @@ deployment_groups:
max_node_count: 2
partition_name: compute
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
- scratchfs
- compute_partition
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/tools/cloud-build/daily-tests/blueprints/monitoring.yaml b/tools/cloud-build/daily-tests/blueprints/monitoring.yaml
index 9b6731a7c5..5c47f9d102 100644
--- a/tools/cloud-build/daily-tests/blueprints/monitoring.yaml
+++ b/tools/cloud-build/daily-tests/blueprints/monitoring.yaml
@@ -25,23 +25,23 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/vpc
+ - id: network
+ source: modules/network/vpc
kind: terraform
- id: network
settings:
network_name: monitoring-net
- - source: community/modules/file-system/nfs-server
+ - id: homefs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: homefs
use: [network]
settings:
local_mounts: [/home]
auto_delete_disk: true
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -53,9 +53,9 @@ deployment_groups:
- $(homefs.install_nfs_client_runner)
- $(homefs.mount_runner)
- - source: ./modules/compute/vm-instance
+ - id: workstation
+ source: ./modules/compute/vm-instance
kind: terraform
- id: workstation
use:
- network
- homefs
@@ -63,10 +63,10 @@ deployment_groups:
settings:
machine_type: c2-standard-4
metadata:
- enable-oslogin: TRUE
+ enable-oslogin: true
- - source: ./modules/monitoring/dashboard
+ - id: hpc-dash
+ source: ./modules/monitoring/dashboard
kind: terraform
- id: hpc-dash
settings:
title: $(vars.deployment_name)
diff --git a/tools/validate_configs/test_configs/2-nfs-servers.yaml b/tools/validate_configs/test_configs/2-nfs-servers.yaml
index f0fb6066ff..26ed5cba0c 100644
--- a/tools/validate_configs/test_configs/2-nfs-servers.yaml
+++ b/tools/validate_configs/test_configs/2-nfs-servers.yaml
@@ -25,22 +25,22 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: community/modules/file-system/nfs-server
+ - id: homefs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: homefs
use: [network1]
outputs: [network_storage]
settings:
local_mounts: ["/home"]
auto_delete_disk: true
- - source: ./community/modules/file-system/nfs-server
+ - id: appsfs
+ source: ./community/modules/file-system/nfs-server
kind: terraform
- id: appsfs
use: [network1]
outputs: [network_storage]
settings:
diff --git a/tools/validate_configs/test_configs/2filestore-4instances.yaml b/tools/validate_configs/test_configs/2filestore-4instances.yaml
index 1146388f60..a1fe2c1291 100644
--- a/tools/validate_configs/test_configs/2filestore-4instances.yaml
+++ b/tools/validate_configs/test_configs/2filestore-4instances.yaml
@@ -25,13 +25,13 @@ vars:
deployment_groups:
- group: infrastructure
modules:
- - source: ./modules/network/vpc
+ - id: network
+ source: ./modules/network/vpc
kind: terraform
- id: network
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network]
settings:
name: homefs
@@ -39,9 +39,9 @@ deployment_groups:
labels:
ghpc_role: storage-home
- - source: ./modules/file-system/filestore
+ - id: apps
+ source: ./modules/file-system/filestore
kind: terraform
- id: apps
use: [network]
settings:
name: apps
@@ -49,9 +49,9 @@ deployment_groups:
labels:
ghpc_role: storage-apps
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -64,9 +64,9 @@ deployment_groups:
source: "modules/startup-script/examples/mount.yaml"
destination: "mount.yaml"
- - source: ./modules/compute/vm-instance
+ - id: license-server-1
+ source: ./modules/compute/vm-instance
kind: terraform
- id: license-server-1
use: [network]
settings:
name_prefix: ls1
@@ -74,9 +74,9 @@ deployment_groups:
labels:
ghpc_role: license
- - source: modules/compute/vm-instance
+ - id: license-server-2
+ source: modules/compute/vm-instance
kind: terraform
- id: license-server-2
use: [network]
settings:
name_prefix: ls2
@@ -84,9 +84,9 @@ deployment_groups:
labels:
ghpc_role: license
- - source: modules/compute/vm-instance
+ - id: head-node
+ source: modules/compute/vm-instance
kind: terraform
- id: head-node
use:
- network
- homefs
@@ -99,9 +99,9 @@ deployment_groups:
metadata:
startup-script: $(startup.startup_script)
- - source: modules/compute/vm-instance
+ - id: compute
+ source: modules/compute/vm-instance
kind: terraform
- id: compute
use:
- network
- homefs
diff --git a/tools/validate_configs/test_configs/centos8-ss.yaml b/tools/validate_configs/test_configs/centos8-ss.yaml
index 38630837c4..be1e56120e 100644
--- a/tools/validate_configs/test_configs/centos8-ss.yaml
+++ b/tools/validate_configs/test_configs/centos8-ss.yaml
@@ -25,49 +25,40 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: appsfs
+ source: modules/file-system/filestore
kind: terraform
- id: appsfs
use: [network1]
settings:
name: appsfs
local_mount: /apps
- - source: community/modules/file-system/nfs-server
+ - id: nfs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: nfs
use: [network1]
settings:
- image: centos-cloud/centos-stream-8
auto_delete_disk: true
- - source: ./community//modules/scripts/spack-install
+ - id: spack
+ source: ./community//modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
- spack_ref: v0.17.0
- spack_cache_url:
- - mirror_name: 'gcs_cache'
- mirror_url: gs://example-buildcache/linux-centos7
+ spack_ref: v0.18.0
compilers:
- gcc@10.3.0 target=x86_64
packages:
- cmake%gcc@10.3.0 target=x86_64
- - intel-mkl%gcc@10.3.0 target=skylake
- - intel-mpi@2018.4.274%gcc@10.3.0 target=skylake
- - >-
- fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5
- target=x86_64
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -94,13 +85,14 @@ deployment_groups:
destination: "install-nfs-client.sh"
- $(appsfs.install_nfs_client_runner)
- $(nfs.mount_runner)
+ - $(spack.install_spack_deps_runner)
- type: shell
content: $(spack.startup_script)
destination: "/apps/spack-install.sh"
- - source: ./modules/compute/vm-instance
+ - id: instance
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance
use: [network1, startup, nfs, appsfs]
settings:
machine_type: e2-standard-4
diff --git a/tools/validate_configs/test_configs/cloud-batch-cft-instance-template.yaml b/tools/validate_configs/test_configs/cloud-batch-cft-instance-template.yaml
index 0c63bf930a..d3a0ee919f 100644
--- a/tools/validate_configs/test_configs/cloud-batch-cft-instance-template.yaml
+++ b/tools/validate_configs/test_configs/cloud-batch-cft-instance-template.yaml
@@ -23,19 +23,19 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: appfs
+ source: modules/file-system/filestore
kind: terraform
- id: appfs
use: [network1]
settings: {local_mount: /sw}
- - source: modules/scripts/startup-script
+ - id: batch-startup-script
+ source: modules/scripts/startup-script
kind: terraform
- id: batch-startup-script
settings:
runners:
- type: shell
@@ -49,9 +49,9 @@ deployment_groups:
#!/bin/sh
echo "Hello World" > /sw/hello.txt
- - source: github.com/terraform-google-modules/terraform-google-vm//modules/instance_template?ref=v7.8.0
+ - id: batch-compute-template
+ source: github.com/terraform-google-modules/terraform-google-vm//modules/instance_template?ref=v7.8.0
kind: terraform
- id: batch-compute-template
use: [batch-startup-script]
settings:
# Boiler plate to work with Cloud Foundation Toolkit
@@ -65,9 +65,9 @@ deployment_groups:
source_image_family: hpc-centos-7
source_image_project: cloud-hpc-image-public
- - source: ./community/modules/scheduler/cloud-batch-job
+ - id: batch-job
+ source: ./community/modules/scheduler/cloud-batch-job
kind: terraform
- id: batch-job
use: [network1, appfs, batch-startup-script]
settings:
runnable: "cat /sw/hello.txt"
diff --git a/tools/validate_configs/test_configs/complex-data.yaml b/tools/validate_configs/test_configs/complex-data.yaml
index f44268a467..2421496c7b 100644
--- a/tools/validate_configs/test_configs/complex-data.yaml
+++ b/tools/validate_configs/test_configs/complex-data.yaml
@@ -41,15 +41,15 @@ vars:
deployment_groups:
- group: infrastructure
modules:
- - source: modules/network/vpc
+ - id: network
+ source: modules/network/vpc
kind: terraform
- id: network
settings:
network_name: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network]
settings:
name: homefs
@@ -59,9 +59,9 @@ deployment_groups:
ghpc_role: storage-home
number_label: 44
- - source: modules/scripts/startup-script
+ - id: startup
+ source: modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -74,9 +74,9 @@ deployment_groups:
source: "modules/startup-script/examples/mount.yaml"
destination: mount.yaml
- - source: modules/compute/vm-instance
+ - id: license-server-1
+ source: modules/compute/vm-instance
kind: terraform
- id: license-server-1
use: [network]
settings:
name_prefix: ls1
diff --git a/tools/validate_configs/test_configs/dashboards.yaml b/tools/validate_configs/test_configs/dashboards.yaml
index ca251e5778..42d587cbb7 100644
--- a/tools/validate_configs/test_configs/dashboards.yaml
+++ b/tools/validate_configs/test_configs/dashboards.yaml
@@ -25,9 +25,9 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/monitoring/dashboard
+ - id: hpc_dash
+ source: modules/monitoring/dashboard
kind: terraform
- id: hpc_dash
settings:
widgets:
- |
@@ -46,9 +46,9 @@ deployment_groups:
},
"title": "HPC Toolkit - TEST 2"
}
- - source: modules/monitoring/dashboard
+ - id: empty_dash
+ source: modules/monitoring/dashboard
kind: terraform
- id: empty_dash
settings:
base_dashboard: Empty
widgets:
diff --git a/tools/validate_configs/test_configs/debian-ss.yaml b/tools/validate_configs/test_configs/debian-ss.yaml
index 49c74ee521..8f3a622e0b 100644
--- a/tools/validate_configs/test_configs/debian-ss.yaml
+++ b/tools/validate_configs/test_configs/debian-ss.yaml
@@ -25,49 +25,40 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: appsfs
+ source: modules/file-system/filestore
kind: terraform
- id: appsfs
use: [network1]
settings:
name: appsfs
local_mount: /apps
- - source: community/modules/file-system/nfs-server
+ - id: nfs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: nfs
use: [network1]
settings:
- image: debian-cloud/debian-10
auto_delete_disk: true
- - source: ./community//modules/scripts/spack-install
+ - id: spack
+ source: ./community//modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
- spack_ref: v0.17.0
- spack_cache_url:
- - mirror_name: 'gcs_cache'
- mirror_url: gs://example-buildcache/linux-centos7
+ spack_ref: v0.18.0
compilers:
- gcc@10.3.0 target=x86_64
packages:
- cmake%gcc@10.3.0 target=x86_64
- - intel-mkl%gcc@10.3.0 target=skylake
- - intel-mpi@2018.4.274%gcc@10.3.0 target=skylake
- - >-
- fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5
- target=x86_64
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -93,14 +84,15 @@ deployment_groups:
content: $(nfs.install_nfs_client)
destination: "install-nfs-client.sh"
- $(appsfs.install_nfs_client_runner)
- - $(nfs.mount_runner)
+ - $(appsfs.mount_runner)
+ - $(spack.install_spack_deps_runner)
- type: shell
content: $(spack.startup_script)
destination: "/apps/spack-install.sh"
- - source: ./modules/compute/vm-instance
+ - id: instance
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance
use: [network1, startup, nfs, appsfs]
settings:
machine_type: e2-standard-4
diff --git a/tools/validate_configs/test_configs/exascaler-existing-vpc.yaml b/tools/validate_configs/test_configs/exascaler-existing-vpc.yaml
index 8862f732e6..b242a60aec 100644
--- a/tools/validate_configs/test_configs/exascaler-existing-vpc.yaml
+++ b/tools/validate_configs/test_configs/exascaler-existing-vpc.yaml
@@ -25,13 +25,13 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: community/modules/file-system/DDN-EXAScaler
+ - id: scratchfs
+ source: community/modules/file-system/DDN-EXAScaler
kind: terraform
- id: scratchfs
use: [network1]
settings:
local_mount: /scratch
diff --git a/tools/validate_configs/test_configs/exascaler-new-vpc.yaml b/tools/validate_configs/test_configs/exascaler-new-vpc.yaml
index 5f29f6b761..86b46909fd 100644
--- a/tools/validate_configs/test_configs/exascaler-new-vpc.yaml
+++ b/tools/validate_configs/test_configs/exascaler-new-vpc.yaml
@@ -25,13 +25,13 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: ./community/modules/file-system/DDN-EXAScaler
+ - id: scratchfs
+ source: ./community/modules/file-system/DDN-EXAScaler
kind: terraform
- id: scratchfs
use: [network1]
settings:
local_mount: /scratch
diff --git a/tools/validate_configs/test_configs/gpu.yaml b/tools/validate_configs/test_configs/gpu.yaml
index f6c0b30d96..790e0de517 100644
--- a/tools/validate_configs/test_configs/gpu.yaml
+++ b/tools/validate_configs/test_configs/gpu.yaml
@@ -28,13 +28,13 @@ deployment_groups:
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
# as a prefix. To refer to a local or community module, prefix with ./, ../ or /
# Example - ./modules/network/vpc
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: ./modules/compute/vm-instance
+ - id: workstation
+ source: ./modules/compute/vm-instance
kind: terraform
- id: workstation
use:
- network1
settings:
diff --git a/tools/validate_configs/test_configs/hpc-centos-ss.yaml b/tools/validate_configs/test_configs/hpc-centos-ss.yaml
index f1fbdbd030..4e550f8e80 100644
--- a/tools/validate_configs/test_configs/hpc-centos-ss.yaml
+++ b/tools/validate_configs/test_configs/hpc-centos-ss.yaml
@@ -25,48 +25,40 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: appsfs
+ source: modules/file-system/filestore
kind: terraform
- id: appsfs
use: [network1]
settings:
name: appsfs
local_mount: /apps
- - source: community/modules/file-system/nfs-server
+ - id: nfs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: nfs
use: [network1]
settings:
auto_delete_disk: true
- - source: ./community//modules/scripts/spack-install
+ - id: spack
+ source: ./community//modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
- spack_ref: v0.17.0
- spack_cache_url:
- - mirror_name: 'gcs_cache'
- mirror_url: gs://example-buildcache/linux-centos7
+ spack_ref: v0.18.0
compilers:
- gcc@10.3.0 target=x86_64
packages:
- cmake%gcc@10.3.0 target=x86_64
- - intel-mkl%gcc@10.3.0 target=skylake
- - intel-mpi@2018.4.274%gcc@10.3.0 target=skylake
- - >-
- fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5
- target=x86_64
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -93,13 +85,14 @@ deployment_groups:
destination: "install-nfs-client.sh"
- $(appsfs.install_nfs_client_runner)
- $(nfs.mount_runner)
+ - $(spack.install_spack_deps_runner)
- type: shell
content: $(spack.startup_script)
destination: "/apps/spack-install.sh"
- - source: ./modules/compute/vm-instance
+ - id: instance
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance
use: [network1, startup, nfs, appsfs]
settings:
machine_type: e2-standard-4
diff --git a/tools/validate_configs/test_configs/hpc-cluster-high-io-remote-state.yaml b/tools/validate_configs/test_configs/hpc-cluster-high-io-remote-state.yaml
index 9f094a7818..6615ce89df 100644
--- a/tools/validate_configs/test_configs/hpc-cluster-high-io-remote-state.yaml
+++ b/tools/validate_configs/test_configs/hpc-cluster-high-io-remote-state.yaml
@@ -31,36 +31,36 @@ terraform_backend_defaults:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: modules/file-system/filestore
+ - id: projectsfs
+ source: modules/file-system/filestore
kind: terraform
- id: projectsfs
use: [network1]
settings:
filestore_tier: HIGH_SCALE_SSD
size_gb: 10240
local_mount: /projects
- - source: community/modules/file-system/DDN-EXAScaler
+ - id: scratchfs
+ source: community/modules/file-system/DDN-EXAScaler
kind: terraform
- id: scratchfs
use: [network1]
settings:
local_mount: /scratch
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- homefs
- scratchfs
@@ -70,9 +70,9 @@ deployment_groups:
max_node_count: 200
partition_name: compute
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- homefs
- scratchfs
@@ -80,9 +80,9 @@ deployment_groups:
- compute_partition
- network1
- - source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- homefs
- scratchfs
diff --git a/tools/validate_configs/test_configs/hpc-cluster-project.yaml b/tools/validate_configs/test_configs/hpc-cluster-project.yaml
index 622a18d3ec..bf420d690e 100644
--- a/tools/validate_configs/test_configs/hpc-cluster-project.yaml
+++ b/tools/validate_configs/test_configs/hpc-cluster-project.yaml
@@ -32,18 +32,18 @@ terraform_backend_defaults:
deployment_groups:
- group: onboarding
modules:
- - source: ./community/modules/project/new-project
+ - id: project
+ source: ./community/modules/project/new-project
kind: terraform
- id: project
settings:
project_id: $(vars.project_id)
folder_id: 334688113020 # random number
billing_account: "111110-M2N704-854685" # random billing number
org_id: 123456789 # random org id
- - source: ./community/modules/project/service-enablement
+ - id: enable-apis
+ source: ./community/modules/project/service-enablement
kind: terraform
- id: enable-apis
use: [project]
settings:
gcp_service_list:
@@ -55,20 +55,20 @@ deployment_groups:
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
# as a prefix. To refer to a local module, prefix with ./, ../ or /
# Example - ./modules/network/vpc
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -78,9 +78,9 @@ deployment_groups:
enable_placement: false
max_node_count: 20
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -88,9 +88,9 @@ deployment_groups:
settings:
login_node_count: 1
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/tools/validate_configs/test_configs/hpc-cluster-service-acct.yaml b/tools/validate_configs/test_configs/hpc-cluster-service-acct.yaml
index 2e14808d4d..4e1068c02b 100644
--- a/tools/validate_configs/test_configs/hpc-cluster-service-acct.yaml
+++ b/tools/validate_configs/test_configs/hpc-cluster-service-acct.yaml
@@ -25,22 +25,22 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/pre-existing-network-storage
+ - id: homefs
+ source: modules/file-system/pre-existing-network-storage
kind: terraform
- id: homefs
settings:
server_ip: '$controller'
remote_mount: /home
local_mount: /home
fs_type: nfs
- - source: ./community/modules/project/service-account
+ - id: service_acct
+ source: ./community/modules/project/service-account
kind: terraform
- id: service_acct
settings:
project_id: $(vars.project_id)
names:
@@ -49,18 +49,18 @@ deployment_groups:
- "compute.instanceAdmin.v1"
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute-partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute-partition
use: [network1]
settings:
partition_name: compute
network_storage:
- $(homefs.network_storage)
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm
use: [network1]
settings:
network_storage:
diff --git a/tools/validate_configs/test_configs/hpc-cluster-simple-nfs-sql.yaml b/tools/validate_configs/test_configs/hpc-cluster-simple-nfs-sql.yaml
index fdaaa90655..1e9470e3ec 100644
--- a/tools/validate_configs/test_configs/hpc-cluster-simple-nfs-sql.yaml
+++ b/tools/validate_configs/test_configs/hpc-cluster-simple-nfs-sql.yaml
@@ -25,29 +25,29 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: ./community/modules/file-system/nfs-server
+ - id: homefs
+ source: ./community/modules/file-system/nfs-server
kind: terraform
- id: homefs
use: [network1]
settings:
labels:
ghpc_role: storage-home
- - source: ./community/modules/database/slurm-cloudsql-federation
+ - id: slurm-sql
+ source: ./community/modules/database/slurm-cloudsql-federation
kind: terraform
- id: slurm-sql
use: [network1]
settings:
sql_instance_name: slurm-sql8
tier: "db-f1-micro"
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute-partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute-partition
use:
- homefs
- network1
@@ -56,9 +56,9 @@ deployment_groups:
max_node_count: 20
machine_type: c2-standard-4
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm-controller
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm-controller
use:
- homefs
- compute-partition
@@ -69,9 +69,9 @@ deployment_groups:
disable_compute_public_ips: true
disable_controller_public_ips: true
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm-login
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm-login
use:
- slurm-controller
- network1
diff --git a/tools/validate_configs/test_configs/hpc-cluster-simple.yaml b/tools/validate_configs/test_configs/hpc-cluster-simple.yaml
index d92d61dfb8..bcf2b053b7 100644
--- a/tools/validate_configs/test_configs/hpc-cluster-simple.yaml
+++ b/tools/validate_configs/test_configs/hpc-cluster-simple.yaml
@@ -25,22 +25,22 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
labels:
ghpc_role: storage-home
- - source: modules/scripts/startup-script
+ - id: startup
+ source: modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -53,9 +53,9 @@ deployment_groups:
source: "modules/startup-script/examples/mount.yaml"
destination: mount.yaml
- - source: modules/compute/vm-instance
+ - id: workstation
+ source: modules/compute/vm-instance
kind: terraform
- id: workstation
use:
- network1
- homefs
diff --git a/tools/validate_configs/test_configs/hpc-cluster-slurm-with-startup.yaml b/tools/validate_configs/test_configs/hpc-cluster-slurm-with-startup.yaml
index 0150a5a61b..ae022dbb7a 100644
--- a/tools/validate_configs/test_configs/hpc-cluster-slurm-with-startup.yaml
+++ b/tools/validate_configs/test_configs/hpc-cluster-slurm-with-startup.yaml
@@ -28,29 +28,29 @@ deployment_groups:
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
# as a prefix. To refer to a local or community module, prefix with ./, ../ or /
# Example - ./modules/network/vpc
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: modules/scripts/startup-script
+ - id: startup
+ source: modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
source: "modules/startup-script/examples/install_ansible.sh"
destination: install_ansible.sh
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -61,9 +61,9 @@ deployment_groups:
partition_name: compute
max_node_count: 20
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- network1
- homefs
@@ -73,9 +73,9 @@ deployment_groups:
controller_startup_script: $(startup.startup_script)
compute_startup_script: $(startup.startup_script)
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- network1
- homefs
diff --git a/tools/validate_configs/test_configs/hpc-cluster-small-slurm-v5.yaml b/tools/validate_configs/test_configs/hpc-cluster-small-slurm-v5.yaml
index 8e94221836..aba6e5b910 100644
--- a/tools/validate_configs/test_configs/hpc-cluster-small-slurm-v5.yaml
+++ b/tools/validate_configs/test_configs/hpc-cluster-small-slurm-v5.yaml
@@ -29,20 +29,20 @@ deployment_groups:
# Source is an embedded resource, denoted by "resources/*" without ./, ../, /
# as a prefix. To refer to a local resource, prefix with ./, ../ or /
# Example - ./resources/network/vpc
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: community/modules/compute/schedmd-slurm-gcp-v5-partition
+ - id: debug_partition
+ source: community/modules/compute/schedmd-slurm-gcp-v5-partition
kind: terraform
- id: debug_partition
use:
- network1
- homefs
@@ -53,9 +53,9 @@ deployment_groups:
machine_type: n2-standard-2
is_default: true
- - source: community/modules/compute/schedmd-slurm-gcp-v5-partition
+ - id: compute_partition
+ source: community/modules/compute/schedmd-slurm-gcp-v5-partition
kind: terraform
- id: compute_partition
use:
- network1
- homefs
@@ -63,18 +63,18 @@ deployment_groups:
partition_name: compute
node_count_dynamic_max: 20
- - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
+ - id: slurm_controller
+ source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
kind: terraform
- id: slurm_controller
use:
- network1
- debug_partition
- compute_partition
- homefs
- - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
+ - id: slurm_login
+ source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
kind: terraform
- id: slurm_login
use:
- network1
- slurm_controller
diff --git a/tools/validate_configs/test_configs/htcondor-pool.yaml b/tools/validate_configs/test_configs/htcondor-pool.yaml
index 229dd178cb..f61e2c56de 100644
--- a/tools/validate_configs/test_configs/htcondor-pool.yaml
+++ b/tools/validate_configs/test_configs/htcondor-pool.yaml
@@ -27,25 +27,25 @@ vars:
deployment_groups:
- group: htcondor-env
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
outputs:
- network_name
- - source: community/modules/scripts/htcondor-install
+ - id: htcondor_install
+ source: community/modules/scripts/htcondor-install
kind: terraform
- id: htcondor_install
- - source: community/modules/project/service-enablement
+ - id: htcondor_services
+ source: community/modules/project/service-enablement
kind: terraform
- id: htcondor_services
use:
- htcondor_install
- - source: modules/scripts/startup-script
+ - id: htcondor_install_scripts
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_install_scripts
settings:
runners:
- type: shell
@@ -57,39 +57,39 @@ deployment_groups:
- group: packer
modules:
- - source: modules/packer/custom-image
+ - id: custom-image
+ source: modules/packer/custom-image
kind: packer
- id: custom-image
settings:
image_family: $(vars.htcondor_image_family)
- group: pool
modules:
- - source: modules/network/pre-existing-vpc
+ - id: cluster_network
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: cluster_network
- - source: community/modules/scheduler/htcondor-configure
+ - id: htcondor_configure
+ source: community/modules/scheduler/htcondor-configure
kind: terraform
- id: htcondor_configure
- - source: modules/scripts/startup-script
+ - id: htcondor_configure_central_manager
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_central_manager
settings:
runners:
- $(htcondor_configure.central_manager_runner)
- - source: modules/scripts/startup-script
+ - id: htcondor_configure_access_point
+ source: modules/scripts/startup-script
kind: terraform
- id: htcondor_configure_access_point
settings:
runners:
- $(htcondor_configure.access_point_runner)
- - source: modules/compute/vm-instance
+ - id: htcondor_cm
+ source: modules/compute/vm-instance
kind: terraform
- id: htcondor_cm
use:
- cluster_network
- htcondor_configure_central_manager
@@ -107,9 +107,9 @@ deployment_groups:
outputs:
- internal_ip
- - source: modules/compute/vm-instance
+ - id: htcondor_access
+ source: modules/compute/vm-instance
kind: terraform
- id: htcondor_access
use:
- cluster_network
- htcondor_configure_access_point
diff --git a/tools/validate_configs/test_configs/instance-with-startup.yaml b/tools/validate_configs/test_configs/instance-with-startup.yaml
index 8e0867b9c3..3b13ca6e4a 100644
--- a/tools/validate_configs/test_configs/instance-with-startup.yaml
+++ b/tools/validate_configs/test_configs/instance-with-startup.yaml
@@ -25,20 +25,20 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: modules/scripts/startup-script
+ - id: startup
+ source: modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -51,9 +51,9 @@ deployment_groups:
source: "modules/startup-script/examples/mount.yaml"
destination: "tmp.sh"
- - source: modules/compute/vm-instance
+ - id: workstation
+ source: modules/compute/vm-instance
kind: terraform
- id: workstation
use:
- network1
- homefs
@@ -62,8 +62,8 @@ deployment_groups:
metadata:
startup-script: $(startup.startup_script)
- - source: ./community/modules/scripts/wait-for-startup
+ - id: wait
+ source: ./community/modules/scripts/wait-for-startup
kind: terraform
- id: wait
settings:
instance_name: ((module.workstation.name[0]))
diff --git a/tools/validate_configs/test_configs/label_test.yaml b/tools/validate_configs/test_configs/label_test.yaml
index a8bbd72892..b9777b2bbc 100644
--- a/tools/validate_configs/test_configs/label_test.yaml
+++ b/tools/validate_configs/test_configs/label_test.yaml
@@ -28,13 +28,13 @@ vars:
deployment_groups:
- group: infrastructure
modules:
- - source: modules/network/vpc
+ - id: network
+ source: modules/network/vpc
kind: terraform
- id: network
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network]
settings:
name: homefs
@@ -44,9 +44,9 @@ deployment_groups:
local_custom: "some_value"
ghpc_deployment: "deployment_override"
- - source: modules/file-system/filestore
+ - id: homefs1
+ source: modules/file-system/filestore
kind: terraform
- id: homefs1
use: [network]
settings:
name: homefs
diff --git a/tools/validate_configs/test_configs/new_project.yaml b/tools/validate_configs/test_configs/new_project.yaml
index 3cc90da254..06563e5b8a 100644
--- a/tools/validate_configs/test_configs/new_project.yaml
+++ b/tools/validate_configs/test_configs/new_project.yaml
@@ -22,9 +22,9 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./community/modules/project/new-project
+ - id: project
+ source: ./community/modules/project/new-project
kind: terraform
- id: project
settings:
project_id: test_project
folder_id: 334688113020 # random number
diff --git a/tools/validate_configs/test_configs/overwrite_labels.yaml b/tools/validate_configs/test_configs/overwrite_labels.yaml
index d41472eda9..3d4b724bc4 100644
--- a/tools/validate_configs/test_configs/overwrite_labels.yaml
+++ b/tools/validate_configs/test_configs/overwrite_labels.yaml
@@ -29,13 +29,13 @@ vars:
deployment_groups:
- group: infrastructure
modules:
- - source: modules/network/vpc
+ - id: network
+ source: modules/network/vpc
kind: terraform
- id: network
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network]
settings:
name: homefs
@@ -44,9 +44,9 @@ deployment_groups:
labels:
custom_label: some_value
- - source: modules/file-system/filestore
+ - id: homefs1
+ source: modules/file-system/filestore
kind: terraform
- id: homefs1
use: [network]
settings:
name: homefs
@@ -55,9 +55,9 @@ deployment_groups:
labels:
ghpc_role: storage-home
- - source: modules/file-system/filestore
+ - id: homefs2
+ source: modules/file-system/filestore
kind: terraform
- id: homefs2
use: [network]
settings:
name: homefs
@@ -66,9 +66,9 @@ deployment_groups:
labels:
ghpc_deployment: storage_deployment
- - source: modules/file-system/filestore
+ - id: homefs3
+ source: modules/file-system/filestore
kind: terraform
- id: homefs3
use: [network]
settings:
name: homefs
diff --git a/tools/validate_configs/test_configs/packer.yaml b/tools/validate_configs/test_configs/packer.yaml
index e93780de16..24af11c96d 100644
--- a/tools/validate_configs/test_configs/packer.yaml
+++ b/tools/validate_configs/test_configs/packer.yaml
@@ -27,14 +27,14 @@ vars:
deployment_groups:
- group: network
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- group: packer
modules:
- - source: modules/packer/custom-image
+ - id: my-custom-image
+ source: modules/packer/custom-image
kind: packer
- id: my-custom-image
settings:
use_iap: true
omit_external_ip: true
diff --git a/tools/validate_configs/test_configs/pre-existing-fs.yaml b/tools/validate_configs/test_configs/pre-existing-fs.yaml
index df885129fa..6a89e74840 100644
--- a/tools/validate_configs/test_configs/pre-existing-fs.yaml
+++ b/tools/validate_configs/test_configs/pre-existing-fs.yaml
@@ -29,37 +29,37 @@ deployment_groups:
modules:
# the pre-existing-vpc is not needed here, since filestore will use the
# network-name from deployment vars
- - source: modules/file-system/filestore
+ - id: homefs-filestore
+ source: modules/file-system/filestore
kind: terraform
- id: homefs-filestore
- group: compute
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/pre-existing-network-storage
+ - id: homefs
+ source: modules/file-system/pre-existing-network-storage
kind: terraform
- id: homefs
settings:
server_ip: "" # for now, must be completed manually in compute/main.tf
remote_mount: nfsshare
local_mount: $(vars.local_mount) # automatic, added here for clarity
fs_type: nfs
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute-partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute-partition
use:
- homefs
- network1
settings:
partition_name: compute
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm
use:
- homefs
- compute-partition
diff --git a/tools/validate_configs/test_configs/rocky-linux.yaml b/tools/validate_configs/test_configs/rocky-ss.yaml
similarity index 77%
rename from tools/validate_configs/test_configs/rocky-linux.yaml
rename to tools/validate_configs/test_configs/rocky-ss.yaml
index 4eabb31282..b06679749a 100644
--- a/tools/validate_configs/test_configs/rocky-linux.yaml
+++ b/tools/validate_configs/test_configs/rocky-ss.yaml
@@ -25,49 +25,42 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: appsfs
+ source: modules/file-system/filestore
kind: terraform
- id: appsfs
use: [network1]
settings:
name: appsfs
local_mount: /apps
- - source: community/modules/file-system/nfs-server
+ - id: nfs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: nfs
use: [network1]
settings:
image: rocky-linux-cloud/rocky-linux-8
auto_delete_disk: true
- - source: ./community//modules/scripts/spack-install
+ - id: spack
+ source: ./community//modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
- spack_ref: v0.17.0
+ spack_ref: v0.18.0
spack_cache_url:
- - mirror_name: 'gcs_cache'
- mirror_url: gs://example-buildcache/linux-centos7
compilers:
- gcc@10.3.0 target=x86_64
packages:
- cmake%gcc@10.3.0 target=x86_64
- - intel-mkl%gcc@10.3.0 target=skylake
- - intel-mpi@2018.4.274%gcc@10.3.0 target=skylake
- - >-
- fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5
- target=x86_64
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -94,13 +87,14 @@ deployment_groups:
destination: "install-nfs-client.sh"
- $(appsfs.install_nfs_client_runner)
- $(nfs.mount_runner)
+ - $(spack.install_spack_deps_runner)
- type: shell
content: $(spack.startup_script)
destination: "/apps/spack-install.sh"
- - source: ./modules/compute/vm-instance
+ - id: instance
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance
use: [network1, startup, nfs, appsfs]
settings:
machine_type: e2-standard-4
diff --git a/tools/validate_configs/test_configs/simple-startup.yaml b/tools/validate_configs/test_configs/simple-startup.yaml
index 7c75e6e643..3940714717 100644
--- a/tools/validate_configs/test_configs/simple-startup.yaml
+++ b/tools/validate_configs/test_configs/simple-startup.yaml
@@ -25,13 +25,13 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -48,15 +48,15 @@ deployment_groups:
tar zxvf /tmp/$1 -C /
args: "foo.tgz 'Expanding the file'"
- - source: ./modules/compute/vm-instance
+ - id: instance
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance
use: [network1, startup]
settings:
machine_type: e2-standard-4
- - source: ./community/modules/scripts/wait-for-startup
+ - id: waiter
+ source: ./community/modules/scripts/wait-for-startup
kind: terraform
- id: waiter
settings:
instance_name: ((module.instance.name[0]))
diff --git a/tools/validate_configs/test_configs/slurm-two-partitions-workstation.yaml b/tools/validate_configs/test_configs/slurm-two-partitions-workstation.yaml
index 7959bb93f5..4f36bb0dde 100644
--- a/tools/validate_configs/test_configs/slurm-two-partitions-workstation.yaml
+++ b/tools/validate_configs/test_configs/slurm-two-partitions-workstation.yaml
@@ -25,20 +25,20 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/vpc
+ - id: network1
+ source: modules/network/vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
- - source: modules/scripts/startup-script
+ - id: startup
+ source: modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -51,9 +51,9 @@ deployment_groups:
source: "modules/startup-script/examples/mount.yaml"
destination: "tmp.sh"
- - source: modules/compute/vm-instance
+ - id: workstation
+ source: modules/compute/vm-instance
kind: terraform
- id: workstation
use:
- network1
- homefs
@@ -63,27 +63,27 @@ deployment_groups:
metadata:
startup-script: $(startup.startup_script)
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute-partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute-partition
use:
- homefs
- network1
settings:
partition_name: compute
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: debug-partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: debug-partition
use:
- homefs
- network1
settings:
partition_name: debug
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm
use:
- homefs
- compute-partition
diff --git a/tools/validate_configs/test_configs/spack-buildcache.yaml b/tools/validate_configs/test_configs/spack-buildcache.yaml
index 1f2803d1f8..b8322d2c94 100644
--- a/tools/validate_configs/test_configs/spack-buildcache.yaml
+++ b/tools/validate_configs/test_configs/spack-buildcache.yaml
@@ -25,13 +25,13 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: ./community/modules/scripts/spack-install
+ - id: spack
+ source: ./community/modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
@@ -51,15 +51,15 @@ deployment_groups:
path: '/tmp/spack_key.gpg'
caches_to_populate:
- type: 'mirror'
- path: ## Add GCS bucket to populate here ##
+ path: ## Add GCS bucket to populate here ##
- - source: modules/scripts/startup-script
+ - id: spack-startup
+ source: modules/scripts/startup-script
kind: terraform
- id: spack-startup
settings:
runners:
- type: data
- source: ## Add path to GPG key here ##
+ source: ## Add path to GPG key here ##
destination: /tmp/spack_key.gpg
- type: shell
content: |
@@ -80,9 +80,9 @@ deployment_groups:
destination: shutdown.sh
content: shutdown -h
- - source: modules/compute/vm-instance
+ - id: spack-build
+ source: modules/compute/vm-instance
kind: terraform
- id: spack-build
use:
- network1
- spack-startup
diff --git a/tools/validate_configs/test_configs/spack-environments.yaml b/tools/validate_configs/test_configs/spack-environments.yaml
index d8d13f8c8a..ffe5ece48b 100644
--- a/tools/validate_configs/test_configs/spack-environments.yaml
+++ b/tools/validate_configs/test_configs/spack-environments.yaml
@@ -25,13 +25,13 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: ./community/modules/scripts/spack-install
+ - id: spack
+ source: ./community/modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
@@ -79,15 +79,15 @@ deployment_groups:
path: '/tmp/spack_key.gpg'
caches_to_populate:
- type: 'mirror'
- path: ## Add GCS bucket to populate here ##
+ path: ## Add GCS bucket to populate here ##
- - source: modules/scripts/startup-script
+ - id: spack-startup
+ source: modules/scripts/startup-script
kind: terraform
- id: spack-startup
settings:
runners:
- type: data
- source: ## Add path to GPG key here ##
+ source: ## Add path to GPG key here ##
destination: /tmp/spack_key.gpg
- type: shell
content: |
@@ -108,9 +108,9 @@ deployment_groups:
destination: shutdown.sh
content: shutdown -h
- - source: modules/compute/vm-instance
+ - id: spack-build
+ source: modules/compute/vm-instance
kind: terraform
- id: spack-build
use:
- network1
- spack-startup
diff --git a/tools/validate_configs/test_configs/startup-options.yaml b/tools/validate_configs/test_configs/startup-options.yaml
index 0579f6180d..cbfe2764e3 100644
--- a/tools/validate_configs/test_configs/startup-options.yaml
+++ b/tools/validate_configs/test_configs/startup-options.yaml
@@ -25,13 +25,13 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -48,35 +48,35 @@ deployment_groups:
tar zxvf /tmp/$1 -C /
args: "foo.tgz 'Expanding the file'"
- - source: ./modules/compute/vm-instance
+ - id: instance-explicit-startup
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance-explicit-startup
use: [network1]
settings:
name_prefix: explicit
machine_type: e2-standard-4
startup_script: $(startup.startup_script)
- - source: ./modules/compute/vm-instance
+ - id: instance-no-startup
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance-no-startup
use: [network1]
settings:
name_prefix: no-startup
machine_type: e2-standard-4
- - source: ./modules/compute/vm-instance
+ - id: instance-use-startup
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance-use-startup
use: [network1, startup]
settings:
name_prefix: use-startup
machine_type: e2-standard-4
startup_script: $(startup.startup_script)
- - source: ./modules/compute/vm-instance
+ - id: instance-metadata-startup
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance-metadata-startup
use: [network1]
settings:
name_prefix: metadata-startup
diff --git a/tools/validate_configs/test_configs/test_outputs.yaml b/tools/validate_configs/test_configs/test_outputs.yaml
index 1a9ed048df..103f038974 100644
--- a/tools/validate_configs/test_configs/test_outputs.yaml
+++ b/tools/validate_configs/test_configs/test_outputs.yaml
@@ -25,15 +25,15 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: modules/compute/vm-instance
+ - id: instance
+ source: modules/compute/vm-instance
kind: terraform
- id: instance
outputs:
- name
- - source: community/modules/database/slurm-cloudsql-federation
+ - id: sql
+ source: community/modules/database/slurm-cloudsql-federation
kind: terraform
- id: sql
outputs:
- cloudsql
settings:
@@ -42,24 +42,24 @@ deployment_groups:
nat_ips:
- ip
- - source: modules/file-system/filestore
+ - id: filestore
+ source: modules/file-system/filestore
kind: terraform
- id: filestore
use: [vpc]
outputs:
- network_storage
- install_nfs_client
- - source: ./community/modules/file-system/nfs-server
+ - id: nfs
+ source: ./community/modules/file-system/nfs-server
kind: terraform
- id: nfs
outputs:
- network_storage
- install_nfs_client
- - source: modules/file-system/pre-existing-network-storage
+ - id: pre-existing-storage
+ source: modules/file-system/pre-existing-network-storage
kind: terraform
- id: pre-existing-storage
outputs:
- network_storage
settings:
@@ -68,9 +68,9 @@ deployment_groups:
local_mount: /home
fs_type: nfs
- - source: modules/network/pre-existing-vpc
+ - id: pre-existing-vpc
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: pre-existing-vpc
outputs:
- network_name
- network_self_link
@@ -79,9 +79,9 @@ deployment_groups:
- subnetwork_self_link
- subnetwork_address
- - source: modules/network/vpc
+ - id: vpc
+ source: modules/network/vpc
kind: terraform
- id: vpc
outputs:
- network_name
- network_self_link
@@ -91,9 +91,9 @@ deployment_groups:
- subnetwork_address
- nat_ips
- - source: community/modules/project/new-project
+ - id: new-project
+ source: community/modules/project/new-project
kind: terraform
- id: new-project
outputs:
- project_name
- project_id
@@ -117,9 +117,9 @@ deployment_groups:
billing_account: "111110-M2N704-854685" # random billing number
org_id: 123456789 # random org id
- - source: community/modules/project/service-account
+ - id: sa
+ source: community/modules/project/service-account
kind: terraform
- id: sa
outputs:
- email
- emails
@@ -138,31 +138,31 @@ deployment_groups:
project_roles:
- "compute.instanceAdmin.v1"
- - source: community/modules/scripts/spack-install
+ - id: spack
+ source: community/modules/scripts/spack-install
kind: terraform
- id: spack
outputs:
- startup_script
- controller_startup_script
- - source: modules/scripts/startup-script
+ - id: startup
+ source: modules/scripts/startup-script
kind: terraform
- id: startup
outputs:
- startup_script
- - source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: partition
+ source: ./community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: partition
use: [vpc]
outputs:
- partition
settings:
partition_name: compute
- - source: ./community/modules/file-system/DDN-EXAScaler
+ - id: lustre
+ source: ./community/modules/file-system/DDN-EXAScaler
kind: terraform
- id: lustre
outputs:
- private_addresses
- ssh_console
@@ -170,9 +170,9 @@ deployment_groups:
- http_console
- network_storage
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: controller
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: controller
use:
- partition
- vpc
diff --git a/tools/validate_configs/test_configs/threads_per_core.yaml b/tools/validate_configs/test_configs/threads_per_core.yaml
index 441f809226..de06cab879 100644
--- a/tools/validate_configs/test_configs/threads_per_core.yaml
+++ b/tools/validate_configs/test_configs/threads_per_core.yaml
@@ -28,13 +28,13 @@ deployment_groups:
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
# as a prefix. To refer to a local or community module, prefix with ./, ../ or /
# Example - ./modules/network/vpc
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: ./modules/compute/vm-instance
+ - id: n1-2-threads
+ source: ./modules/compute/vm-instance
kind: terraform
- id: n1-2-threads
use:
- network1
settings:
@@ -42,9 +42,9 @@ deployment_groups:
machine_type: n1-standard-32
threads_per_core: 2
- - source: ./modules/compute/vm-instance
+ - id: n1-1-thread
+ source: ./modules/compute/vm-instance
kind: terraform
- id: n1-1-thread
use:
- network1
settings:
@@ -52,9 +52,9 @@ deployment_groups:
machine_type: n1-standard-32
threads_per_core: 1
- - source: ./modules/compute/vm-instance
+ - id: n1-0-threads
+ source: ./modules/compute/vm-instance
kind: terraform
- id: n1-0-threads
use:
- network1
settings:
@@ -62,9 +62,9 @@ deployment_groups:
machine_type: n1-standard-32
threads_per_core: 0
- - source: ./modules/compute/vm-instance
+ - id: n1-null-threads
+ source: ./modules/compute/vm-instance
kind: terraform
- id: n1-null-threads
use:
- network1
settings:
@@ -72,9 +72,9 @@ deployment_groups:
machine_type: n1-standard-32
threads_per_core: null
- - source: ./modules/compute/vm-instance
+ - id: n2-2-threads
+ source: ./modules/compute/vm-instance
kind: terraform
- id: n2-2-threads
use:
- network1
settings:
@@ -82,9 +82,9 @@ deployment_groups:
machine_type: n2-standard-32
threads_per_core: 2
- - source: ./modules/compute/vm-instance
+ - id: n2-1-thread
+ source: ./modules/compute/vm-instance
kind: terraform
- id: n2-1-thread
use:
- network1
settings:
@@ -92,9 +92,9 @@ deployment_groups:
machine_type: n2-standard-32
threads_per_core: 1
- - source: ./modules/compute/vm-instance
+ - id: c2-2-threads
+ source: ./modules/compute/vm-instance
kind: terraform
- id: c2-2-threads
use:
- network1
settings:
@@ -102,9 +102,9 @@ deployment_groups:
machine_type: c2-standard-30
threads_per_core: 2
- - source: ./modules/compute/vm-instance
+ - id: c2-1-thread
+ source: ./modules/compute/vm-instance
kind: terraform
- id: c2-1-thread
use:
- network1
settings:
@@ -112,9 +112,9 @@ deployment_groups:
machine_type: c2-standard-30
threads_per_core: 1
- - source: ./modules/compute/vm-instance
+ - id: e2-medium-0-thread
+ source: ./modules/compute/vm-instance
kind: terraform
- id: e2-medium-0-thread
use:
- network1
settings:
@@ -122,9 +122,9 @@ deployment_groups:
machine_type: e2-medium
threads_per_core: 0
- - source: ./modules/compute/vm-instance
+ - id: e2-medium-null-thread
+ source: ./modules/compute/vm-instance
kind: terraform
- id: e2-medium-null-thread
use:
- network1
settings:
diff --git a/tools/validate_configs/test_configs/ubuntu-ss.yaml b/tools/validate_configs/test_configs/ubuntu-ss.yaml
index 2fb18d80b6..b2cf676059 100644
--- a/tools/validate_configs/test_configs/ubuntu-ss.yaml
+++ b/tools/validate_configs/test_configs/ubuntu-ss.yaml
@@ -25,36 +25,33 @@ vars:
deployment_groups:
- group: primary
modules:
- - source: ./modules/network/pre-existing-vpc
+ - id: network1
+ source: ./modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: appsfs
+ source: modules/file-system/filestore
kind: terraform
- id: appsfs
use: [network1]
settings:
name: appsfs
local_mount: /apps
- - source: community/modules/file-system/nfs-server
+ - id: nfs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: nfs
use: [network1]
settings:
- image: ubuntu-os-cloud/ubuntu-1804-lts
auto_delete_disk: true
- - source: ./community//modules/scripts/spack-install
+ - id: spack
+ source: ./community//modules/scripts/spack-install
kind: terraform
- id: spack
settings:
install_dir: /apps/spack
spack_url: https://github.com/spack/spack
- spack_ref: v0.17.0
+ spack_ref: v0.18.0
spack_cache_url:
- - mirror_name: 'gcs_cache'
- mirror_url: gs://example-buildcache/linux-centos7
compilers:
- gcc@10.3.0 target=x86_64
packages:
@@ -65,9 +62,9 @@ deployment_groups:
fftw%intel@18.0.5 target=skylake ^intel-mpi@2018.4.274%intel@18.0.5
target=x86_64
- - source: ./modules/scripts/startup-script
+ - id: startup
+ source: ./modules/scripts/startup-script
kind: terraform
- id: startup
settings:
runners:
- type: shell
@@ -94,13 +91,14 @@ deployment_groups:
destination: "install-nfs-client.sh"
- $(appsfs.install_nfs_client_runner)
- $(nfs.mount_runner)
+ - $(spack.install_spack_deps_runner)
- type: shell
content: $(spack.startup_script)
destination: "/apps/spack-install.sh"
- - source: ./modules/compute/vm-instance
+ - id: instance
+ source: ./modules/compute/vm-instance
kind: terraform
- id: instance
use: [network1, startup, nfs, appsfs]
settings:
machine_type: e2-standard-4
diff --git a/tools/validate_configs/test_configs/use-resources.yaml b/tools/validate_configs/test_configs/use-resources.yaml
index 4897534fd1..5ef30961e9 100644
--- a/tools/validate_configs/test_configs/use-resources.yaml
+++ b/tools/validate_configs/test_configs/use-resources.yaml
@@ -28,36 +28,36 @@ deployment_groups:
# Source is an embedded module, denoted by "modules/*" without ./, ../,
# / as a prefix. To refer to a local module, prefix with ./, ../ or /
# Example - ./modules/network/pre-existing-vpc
- - source: modules/network/pre-existing-vpc
+ - id: network1
+ source: modules/network/pre-existing-vpc
kind: terraform
- id: network1
- - source: modules/file-system/filestore
+ - id: homefs
+ source: modules/file-system/filestore
kind: terraform
- id: homefs
use: [network1]
settings:
local_mount: /home
network_name: $(network1.network_name)
- - source: community/modules/file-system/nfs-server
+ - id: projectsfs
+ source: community/modules/file-system/nfs-server
kind: terraform
- id: projectsfs
use: [network1]
- - source: community/modules/file-system/DDN-EXAScaler
+ - id: scratchfs
+ source: community/modules/file-system/DDN-EXAScaler
kind: terraform
- id: scratchfs
settings:
local_mount: /scratch
network_self_link: $(network1.network_self_link)
subnetwork_self_link: $(network1.subnetwork_self_link)
subnetwork_address: $(network1.subnetwork_address)
- - source: community/modules/compute/SchedMD-slurm-on-gcp-partition
+ - id: compute_partition
+ source: community/modules/compute/SchedMD-slurm-on-gcp-partition
kind: terraform
- id: compute_partition
use:
- homefs
- scratchfs
@@ -66,17 +66,17 @@ deployment_groups:
max_node_count: 200
partition_name: compute
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
+ - id: slurm_controller
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-controller
kind: terraform
- id: slurm_controller
use:
- projectsfs
- compute_partition
- network1
- - source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
+ - id: slurm_login
+ source: ./community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
- id: slurm_login
use:
- homefs
- scratchfs
diff --git a/tools/validate_configs/validate_configs.sh b/tools/validate_configs/validate_configs.sh
index 5a75cf951d..0e746caf83 100755
--- a/tools/validate_configs/validate_configs.sh
+++ b/tools/validate_configs/validate_configs.sh
@@ -19,7 +19,7 @@ run_test() {
example=$1
tmpdir="$(mktemp -d)"
exampleFile=$(basename "$example")
- DEPLOYMENT="${exampleFile%.yaml}-$(basename "${tmpdir##*.}")"
+ DEPLOYMENT=$(echo "${exampleFile%.yaml}-$(basename "${tmpdir##*.}")" | sed -e 's/\(.*\)/\L\1/')
PROJECT="invalid-project"
echo "testing ${example} in ${tmpdir}"