Skip to content

Commit

Permalink
Merge pull request #396 from GoogleCloudPlatform/release-v1.1.0
Browse files Browse the repository at this point in the history
Release v1.1.0
  • Loading branch information
nick-stroud authored Jul 12, 2022
2 parents b0a5f6f + d90f9a8 commit dffc869
Show file tree
Hide file tree
Showing 132 changed files with 5,051 additions and 241 deletions.
23 changes: 23 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

---

version: 2
updates:
- package-ecosystem: gomod
directory: /
schedule:
interval: daily
target-branch: develop
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ See

The Toolkit supports Packer templates in the contemporary [HCL2 file
format][pkrhcl2] and not in the legacy JSON file format. We require the use of
Packer 1.7 or above, and recommend using the latest release.
Packer 1.7.9 or above, and recommend using the latest release.

The Toolkit's [Packer template module documentation][pkrmodreadme] describes
input variables and their behavior. An [image-building example][pkrexample]
Expand Down
5 changes: 4 additions & 1 deletion cmd/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ func runCreateCmd(cmd *cobra.Command, args []string) {
bpFilename = args[0]
}

deploymentConfig := config.NewDeploymentConfig(bpFilename)
deploymentConfig, err := config.NewDeploymentConfig(bpFilename)
if err != nil {
log.Fatal(err)
}
if err := deploymentConfig.SetCLIVariables(cliVariables); err != nil {
log.Fatalf("Failed to set the variables at CLI: %v", err)
}
Expand Down
5 changes: 4 additions & 1 deletion cmd/expand.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ func runExpandCmd(cmd *cobra.Command, args []string) {
bpFilename = args[0]
}

deploymentConfig := config.NewDeploymentConfig(bpFilename)
deploymentConfig, err := config.NewDeploymentConfig(bpFilename)
if err != nil {
log.Fatal(err)
}
if err := deploymentConfig.SetCLIVariables(cliVariables); err != nil {
log.Fatalf("Failed to set the variables at CLI: %v", err)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`,
log.Fatalf("cmd.Help function failed: %s", err)
}
},
Version: "v1.0.0",
Version: "v1.1.0",
}
)

Expand Down
8 changes: 8 additions & 0 deletions community/examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ Examples using Intel HPC technologies can be found in the
[Intel folder](intel). More information can be found in the
[readme](intel/README.md).

### cloud-batch.yaml

[See description in core](../../examples/README.md/#cloud-batchyaml--)

### spack-gromacs.yaml

[See description in core](../../examples/README.md#spack-gromacsyaml--)
Expand All @@ -24,3 +28,7 @@ Examples using Intel HPC technologies can be found in the
### hpc-cluster-small-sharedvpc.yaml

[See description in core](../../examples/README.md#hpc-cluster-small-sharedvpcyaml--)

### slurm-gcp-v5-cluster.yaml

[See description in core](../../examples/README.md#slurm-gcp-v5-clusteryaml--)
67 changes: 67 additions & 0 deletions community/examples/cloud-batch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
blueprint_name: cloud-batch

vars:
project_id: ## Set GCP Project ID Here ##
deployment_name: hello-workload
region: us-central1
zone: us-central1-c

deployment_groups:
- group: primary
modules:
- source: modules/network/pre-existing-vpc
kind: terraform
id: network1

- source: modules/file-system/filestore
kind: terraform
id: appfs
use: [network1]
settings: {local_mount: /sw}

- source: modules/scripts/startup-script
kind: terraform
id: hello-startup-script
settings:
runners:
- type: shell
source: modules/startup-script/examples/install_ansible.sh
destination: install_ansible.sh
- $(appfs.install_nfs_client_runner)
- $(appfs.mount_runner)
- type: shell
destination: generate_hello.sh
content: |
#!/bin/sh
echo "Hello World" > /sw/hello.txt
- source: community/modules/scheduler/cloud-batch-job
kind: terraform
id: batch-job
use: [network1, appfs, hello-startup-script]
settings:
runnable: "cat /sw/hello.txt"
machine_type: n2-standard-4
image:
family: centos-7
project: centos-cloud

- source: community/modules/scheduler/cloud-batch-login-node
kind: terraform
id: batch-login
use: [batch-job]
outputs: [instructions]
7 changes: 3 additions & 4 deletions community/examples/hpc-cluster-small-sharedvpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ blueprint_name: hpc-cluster-small-sharedvpc
vars:
project_id: ## Set GCP Project ID Here ##
host_project_id: your-host-project
network_name: your-shared-network
subnetwork_name: your-shared-subnetwork
deployment_name: hpc-small-shared-vpc
region: us-central1
zone: us-central1-c
Expand All @@ -45,16 +43,17 @@ deployment_groups:
id: network1
settings:
project_id: $(vars.host_project_id)
network_name: your-shared-network
subnetwork_name: your-shared-subnetwork

- source: modules/file-system/filestore
kind: terraform
id: homefs
use: [network1]
settings:
local_mount: /home
project_id: $(vars.host_project_id)
connect_mode: PRIVATE_SERVICE_ACCESS

network_name: $(network1.network_id)

# This debug_partition will work out of the box without requesting additional GCP quota.
- source: community/modules/compute/SchedMD-slurm-on-gcp-partition
Expand Down
5 changes: 2 additions & 3 deletions community/examples/intel/daos-slurm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,14 @@ deployment_groups:
- homefs
- debug_partition # debug partition will be default as it is listed first
- compute_partition
- daos-client-script
settings:
login_node_count: 1
compute_startup_script: $(daos-client-script.startup_script)
compute_node_scopes:
- "https://www.googleapis.com/auth/monitoring.write"
- "https://www.googleapis.com/auth/logging.write"
- "https://www.googleapis.com/auth/devstorage.read_only"
- "https://www.googleapis.com/auth/cloud-platform"
controller_startup_script: $(daos-client-script.startup_script)

- source: community/modules/scheduler/SchedMD-slurm-on-gcp-login-node
kind: terraform
Expand All @@ -132,8 +131,8 @@ deployment_groups:
- network1
- homefs
- slurm_controller
- daos-client-script
settings:
login_startup_script: $(daos-client-script.startup_script)
login_scopes:
- "https://www.googleapis.com/auth/monitoring.write"
- "https://www.googleapis.com/auth/logging.write"
Expand Down
82 changes: 82 additions & 0 deletions community/examples/slurm-gcp-v5-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

---

blueprint_name: slurm-gcp-v5-cluster

vars:
project_id: ## Set GCP Project ID Here ##
deployment_name: slurm-gcp-v5
region: us-central1
zone: us-central1-c
slurm_cluster_name: ghpc

deployment_groups:
- group: primary
modules:
# Source is an embedded resource, denoted by "resources/*" without ./, ../, /
# as a prefix. To refer to a local resource, prefix with ./, ../ or /
# Example - ./resources/network/vpc
- source: modules/network/vpc
kind: terraform
id: network1

- source: modules/file-system/filestore
kind: terraform
id: homefs
use: [network1]
settings:
local_mount: /home

- source: community/modules/compute/schedmd-slurm-gcp-v5-partition
kind: terraform
id: debug_partition
use:
- network1
- homefs
settings:
partition_name: debug
node_count_dynamic_max: 4
enable_placement: false
machine_type: n2-standard-2
is_default: true

- source: community/modules/compute/schedmd-slurm-gcp-v5-partition
kind: terraform
id: compute_partition
use:
- network1
- homefs
settings:
partition_name: compute
node_count_dynamic_max: 20

- source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
kind: terraform
id: slurm_controller
use:
- network1
- debug_partition
- compute_partition
- homefs

- source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
kind: terraform
id: slurm_login
use:
- network1
- slurm_controller
settings:
machine_type: n2-standard-4
24 changes: 12 additions & 12 deletions community/examples/spack-gromacs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,23 +51,24 @@ deployment_groups:
settings:
install_dir: /sw/spack
spack_url: https://github.com/spack/spack
spack_ref: v0.17.1
spack_ref: v0.18.0
log_file: /var/log/spack.log
configs:
- type: single-config
scope: defaults
value: "config:build_stage:/sw/spack/spack-stage"
content: "config:build_stage:/sw/spack/spack-stage"
- type: file
scope: defaults
value: |
content: |
modules:
tcl:
hash_length: 0
all:
conflict:
- '{name}'
projections:
all: '{name}/{version}-{compiler.name}-{compiler.version}'
default:
tcl:
hash_length: 0
all:
conflict:
- '{name}'
projections:
all: '{name}/{version}-{compiler.name}-{compiler.version}'
compilers:
- gcc@10.3.0 target=x86_64
packages:
Expand Down Expand Up @@ -120,5 +121,4 @@ deployment_groups:
- homefs
- appsfs
- slurm_controller
settings:
login_startup_script: $(spack-startup.startup_script)
- spack-startup
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Description

This module creates a compute partition that be used as input to
This module creates a compute partition that be can used as input to
[SchedMD-slurm-on-gcp-controller](../../scheduler/SchedMD-slurm-on-gcp-controller/README.md).

> **Warning**: updating a partition will not cause the slurm controller to
Expand Down Expand Up @@ -64,10 +64,11 @@ No modules.
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_bandwidth_tier"></a> [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.<br> - Setting `platform_default` respects the Google Cloud Platform API default values for networking.<br> - Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.<br> - Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).<br> - Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.<br> - Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.<br> - See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no |
| <a name="input_compute_disk_size_gb"></a> [compute\_disk\_size\_gb](#input\_compute\_disk\_size\_gb) | Size of boot disk to create for the partition compute nodes | `number` | `20` | no |
| <a name="input_compute_disk_type"></a> [compute\_disk\_type](#input\_compute\_disk\_type) | Type of boot disk to create for the partition compute nodes | `string` | `"pd-standard"` | no |
| <a name="input_cpu_platform"></a> [cpu\_platform](#input\_cpu\_platform) | The name of the minimum CPU platform that you want the instance to use. | `string` | `null` | no |
| <a name="input_enable_placement"></a> [enable\_placement](#input\_enable\_placement) | Enable placement groups | `bool` | `true` | no |
| <a name="input_enable_placement"></a> [enable\_placement](#input\_enable\_placement) | Enable compact placement policies for jobs requiring low latency networking. | `bool` | `true` | no |
| <a name="input_exclusive"></a> [exclusive](#input\_exclusive) | Exclusive job access to nodes | `bool` | `true` | no |
| <a name="input_gpu_count"></a> [gpu\_count](#input\_gpu\_count) | Number of GPUs attached to the partition compute instances | `number` | `0` | no |
| <a name="input_gpu_type"></a> [gpu\_type](#input\_gpu\_type) | Type of GPUs attached to the partition compute instances | `string` | `null` | no |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ output "partition" {
enable_placement : var.enable_placement
regional_capacity : var.regional_capacity
regional_policy : var.regional_policy
bandwidth_tier : var.bandwidth_tier
instance_template : var.instance_template
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ variable "exclusive" {
}

variable "enable_placement" {
description = "Enable placement groups"
description = "Enable compact placement policies for jobs requiring low latency networking."
type = bool
default = true
}
Expand All @@ -142,6 +142,25 @@ variable "regional_policy" {
default = {}
}

variable "bandwidth_tier" {
description = <<EOT
Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details.
EOT
type = string
default = "platform_default"

validation {
condition = contains(["platform_default", "virtio_enabled", "gvnic_enabled", "tier_1_enabled"], var.bandwidth_tier)
error_message = "Allowed values for bandwidth_tier are 'platform_default', 'virtio_enabled', 'gvnic_enabled', or 'tier_1_enabled'."
}
}

variable "instance_template" {
description = "Instance template to use to create partition instances"
type = string
Expand Down
Loading

0 comments on commit dffc869

Please sign in to comment.