Skip to content

Commit

Permalink
Merge pull request #469 from GoogleCloudPlatform/release-v1.2.1
Browse files Browse the repository at this point in the history
Release v1.2.1
  • Loading branch information
nick-stroud authored Aug 11, 2022
2 parents b6f46fe + 475366e commit 238f9b9
Show file tree
Hide file tree
Showing 73 changed files with 597 additions and 192 deletions.
7 changes: 7 additions & 0 deletions .ansible-lint
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,10 @@ skip_list:

mock_roles:
- googlecloudplatform.google_cloud_ops_agents

kinds:
- playbook: "**/ansible_playbooks/*test.{yml,yaml}"
- playbook: "**/files/*.{yml,yaml}"
- playbook: "**/scripts/*.{yml,yaml}"
- tasks: "**/ansible_playbooks/test*.{yml,yaml}"
- tasks: "**/tasks/*"
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`,
log.Fatalf("cmd.Help function failed: %s", err)
}
},
Version: "v1.2.0",
Version: "v1.2.1",
}
)

Expand Down
4 changes: 2 additions & 2 deletions community/examples/intel/daos-cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ deployment_groups:
# This module creates a DAOS server. Server images MUST be created before running this.
# https://github.com/daos-stack/google-cloud-daos/tree/main/images
# more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.0
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
kind: terraform
id: daos-server
use: [network1]
Expand All @@ -43,7 +43,7 @@ deployment_groups:
# This module creates a MIG with DAOS clients. Client images MUST be created before running this.
# https://github.com/daos-stack/google-cloud-daos/tree/main/images
# more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_client
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.2.0
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.2.1
kind: terraform
id: daos-client
use: [network1, daos-server]
Expand Down
2 changes: 1 addition & 1 deletion community/examples/intel/daos-slurm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ deployment_groups:
# This module creates a DAOS server. Server images MUST be created before running this.
# https://github.com/daos-stack/google-cloud-daos/tree/main/images
# more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.0
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
kind: terraform
id: daos
use: [network1]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.2.1"
}

required_version = ">= 0.14.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@
ansible.builtin.cron:
name: "run HTCondor autoscaler"
user: condor
job: "{{ python }} {{ autoscaler}} --p {{ project_id }} --r {{ region }} --z {{ zone }} --mz --g {{ mig_id }} --c {{ max_size}} | /bin/logger"
job: "{{ python }} {{ autoscaler }} --p {{ project_id }} --r {{ region }} --z {{ zone }} --mz --g {{ mig_id }} --c {{ max_size }} | /bin/logger"
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ No providers.
| Name | Source | Version |
|------|--------|---------|
| <a name="module_slurm_partition"></a> [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | v5.0.2 |
| <a name="module_slurm_partition"></a> [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | v5.0.3 |
## Resources
Expand Down Expand Up @@ -118,6 +118,8 @@ No resources.
| <a name="input_spot_instance_config"></a> [spot\_instance\_config](#input\_spot\_instance\_config) | Configuration for spot VMs. | <pre>object({<br> termination_action = string<br> })</pre> | `null` | no |
| <a name="input_subnetwork_self_link"></a> [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | `null` | no |
| <a name="input_tags"></a> [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no |
| <a name="input_zone_policy_allow"></a> [zone\_policy\_allow](#input\_zone\_policy\_allow) | Partition nodes will prefer to be created in the listed zones. If a zone appears<br>in both zone\_policy\_allow and zone\_policy\_deny, then zone\_policy\_deny will take<br>priority for that zone. | `set(string)` | `[]` | no |
| <a name="input_zone_policy_deny"></a> [zone\_policy\_deny](#input\_zone\_policy\_deny) | Partition nodes will not be created in the listed zones. If a zone appears in<br>both zone\_policy\_allow and zone\_policy\_deny, then zone\_policy\_deny will take<br>priority for that zone. | `set(string)` | `[]` | no |

## Outputs

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ locals {


module "slurm_partition" {
source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=v5.0.2"
source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=v5.0.3"

slurm_cluster_name = var.slurm_cluster_name
partition_nodes = local.partition_nodes
Expand All @@ -74,6 +74,8 @@ module "slurm_partition" {
partition_name = var.partition_name
project_id = var.project_id
region = var.region
zone_policy_allow = var.zone_policy_allow
zone_policy_deny = var.zone_policy_deny
subnetwork = var.subnetwork_self_link == null ? "" : var.subnetwork_self_link
partition_conf = local.partition_conf
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
* limitations under the License.
*/


# Most variables have been sourced and modified from the SchedMD/slurm-gcp
# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3

variable "slurm_cluster_name" {
type = string
Expand All @@ -36,6 +37,40 @@ variable "region" {
type = string
}

variable "zone_policy_allow" {
description = <<-EOD
Partition nodes will prefer to be created in the listed zones. If a zone appears
in both zone_policy_allow and zone_policy_deny, then zone_policy_deny will take
priority for that zone.
EOD
type = set(string)
default = []

validation {
condition = alltrue([
for x in var.zone_policy_allow : length(regexall("^[a-z]+-[a-z]+[0-9]-[a-z]$", x)) > 0
])
error_message = "A provided zone in zone_policy_allow is not a valid zone (Regexp: '^[a-z]+-[a-z]+[0-9]-[a-z]$')."
}
}

variable "zone_policy_deny" {
description = <<-EOD
Partition nodes will not be created in the listed zones. If a zone appears in
both zone_policy_allow and zone_policy_deny, then zone_policy_deny will take
priority for that zone.
EOD
type = set(string)
default = []

validation {
condition = alltrue([
for x in var.zone_policy_deny : length(regexall("^[a-z]+-[a-z]+[0-9]-[a-z]$", x)) > 0
])
error_message = "A provided zone in zone_policy_deny is not a valid zone (Regexp '^[a-z]+-[a-z]+[0-9]-[a-z]$')."
}
}

variable "partition_name" {
description = "The name of the slurm partition."
type = string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.1"
}
provider_meta "google-beta" {
module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.1"
}

required_version = ">= 0.13.0"
Expand Down
31 changes: 27 additions & 4 deletions community/modules/file-system/Intel-DAOS/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,30 @@ For more information, please refer to the [Google Cloud DAOS repo on GitHub](htt

Working examples of a DAOS deployment and how it can be used in conjunction with Slurm [can be found in the community examples folder](../../../examples/intel/).

Using the DAOS server module implies that one has DAOS server images created as [instructed in the images section here](https://github.com/daos-stack/google-cloud-daos/tree/main/images).
A full list of server module parameters can be found at [the DAOS Server module README](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server).

A full list of module parameters can be found at [the DAOS Server module README](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server).
### DAOS Server Images

In order to use the DAOS server terraform module a DAOS server image must be created as instructed in the *images* directory [here](https://github.com/daos-stack/google-cloud-daos/tree/main/images).

DAOS server images must be built from the same tagged version of the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository that is specified in the `source:` attribute for modules used in the [community examples](../../../examples/intel/).

For example, in the following snippet taken from the [community/example/intel/daos-cluster.yml](../../../examples/intel/daos-cluster.yaml) the `source:` attribute specifies v0.2.1 of the daos_server terraform module

```yaml
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
kind: terraform
id: daos-server
use: [network1]
settings:
number_of_instances: 2
labels: {ghpc_role: file-system}
```
In order to use the daos_server module v0.2.1 , you need to
1. Clone the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repo and check out v0.2.1
2. Follow the instructions in the images/README.md directory to build a DAOS server image
## Recommended settings
Expand All @@ -21,7 +42,7 @@ By default, the DAOS system is created with 4 servers will be configured for bes
The following settings will configure this [system for TCO](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/examples/daos_cluster#the-terraformtfvarstcoexample-file) (default):

```yaml
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.0
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
kind: terraform
id: daos-server
use: [network1]
Expand All @@ -37,7 +58,7 @@ The following settings will configure this [system for TCO](https://github.com/d
The following settings will configure this system for [best performance](https://github.com/daos-stack/google-cloud-daos/tree/develop/terraform/examples/daos_cluster#the-terraformtfvarsperfexample-file):

```yaml
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.0
- source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1
kind: terraform
id: daos-server
use: [network1]
Expand Down Expand Up @@ -75,3 +96,5 @@ Intel Corporation provides several ways for the users to get technical support:
2. Commercial L3 support is available on an on-demand basis. Please get in touch with Intel Corporation to obtain more information.

- You may inquire about the L3 support via the Slack channel (https://daos-stack.slack.com/archives/C03GLTLHA59)

[here](https://github.com/daos-stack/google-cloud-daos/tree/main/images)
2 changes: 1 addition & 1 deletion community/modules/file-system/nfs-server/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ output "network_storage" {
description = "export of all desired folder directories"
value = [for mount in var.local_mounts : {
remote_mount = "/exports${mount}"
local_mount = "${mount}"
local_mount = mount
fs_type = "nfs"
mount_options = "defaults,hard,intr"
server_ip = google_compute_instance.compute_instance.network_interface[0].network_ip
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
# limitations under the License.

if [ ! "$(which mount.nfs)" ]; then
if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then

yum -y update
yum install -y nfs-utils
if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] ||
[ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then
yum install --disablerepo="*" --enablerepo="base,epel" -y nfs-utils
elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then
apt-get -y update
apt-get -y install nfs-common
Expand Down
12 changes: 6 additions & 6 deletions community/modules/file-system/nfs-server/scripts/mount.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@
tasks:
- name: Read metadata network_storage information
uri:
url: "{{url}}/{{meta_key}}"
url: "{{ url }}/{{ meta_key }}"
method: GET
headers:
Metadata-Flavor: "Google"
register: storage
- name: Mount file systems
mount:
src: "{{item.server_ip}}:/{{item.remote_mount}}"
path: "{{item.local_mount}}"
opts: "{{item.mount_options}}"
src: "{{ item.server_ip }}:/{{ item.remote_mount }}"
path: "{{ item.local_mount }}"
opts: "{{ item.mount_options }}"
boot: true
fstype: "{{item.fs_type}}"
fstype: "{{ item.fs_type }}"
state: "mounted"
loop: "{{storage.json}}"
loop: "{{ storage.json }}"
2 changes: 1 addition & 1 deletion community/modules/file-system/nfs-server/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.2.1"
}

required_version = ">= 0.14.0"
Expand Down
2 changes: 1 addition & 1 deletion community/modules/project/service-enablement/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.2.1"
}

required_version = ">= 0.14.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.2.1"
}

required_version = ">= 0.14.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.2.1"
}

required_version = ">= 0.14.0"
Expand Down
2 changes: 1 addition & 1 deletion community/modules/scheduler/cloud-batch-job/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ output "instructions" {
gcloud ${var.gcloud_version} batch jobs delete ${local.job_id} --location=${var.region} --project=${var.project_id}
List all jobs in region:
gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id} | grep ^name:
gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id}
EOT
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ limitations under the License.
|------|-------------|------|---------|:--------:|
| <a name="input_batch_job_directory"></a> [batch\_job\_directory](#input\_batch\_job\_directory) | The path of the directory on the login node in which to place the Google Cloud Batch job template | `string` | `"/home/batch-jobs"` | no |
| <a name="input_deployment_name"></a> [deployment\_name](#input\_deployment\_name) | Name of the deployment, also used for the job\_id | `string` | n/a | yes |
| <a name="input_enable_oslogin"></a> [enable\_oslogin](#input\_enable\_oslogin) | Enable or Disable OS Login with "ENABLE" or "DISABLE". Set to "INHERIT" to inherit project OS Login setting. | `string` | `"ENABLE"` | no |
| <a name="input_gcloud_version"></a> [gcloud\_version](#input\_gcloud\_version) | The version of the gcloud cli being used. Used for output instructions. Valid inputs are `"alpha"`, `"beta"` and "" (empty string for default version). Typically supplied by a cloud-batch-job module. | `string` | `"alpha"` | no |
| <a name="input_instance_template"></a> [instance\_template](#input\_instance\_template) | Login VM instance template self-link. Typically supplied by a cloud-batch-job module. | `string` | n/a | yes |
| <a name="input_job_filename"></a> [job\_filename](#input\_job\_filename) | The filename of the generated job template file. Typically supplied by a cloud-batch-job module. | `string` | n/a | yes |
Expand Down
10 changes: 9 additions & 1 deletion community/modules/scheduler/cloud-batch-login-node/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@ data "google_compute_instance_template" "batch_instance_template" {
locals {
instance_template_metadata = data.google_compute_instance_template.batch_instance_template.metadata
batch_startup_script = local.instance_template_metadata["startup-script"]
login_metadata = merge(local.instance_template_metadata, { startup-script = module.login_startup_script.startup_script })
startup_metadata = { startup-script = module.login_startup_script.startup_script }

oslogin_api_values = {
"DISABLE" = "FALSE"
"ENABLE" = "TRUE"
}
oslogin_metadata = var.enable_oslogin == "INHERIT" ? {} : { enable-oslogin = lookup(local.oslogin_api_values, var.enable_oslogin, "") }

login_metadata = merge(local.instance_template_metadata, local.startup_metadata, local.oslogin_metadata)
}

module "login_startup_script" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ output "instructions" {
gcloud ${var.gcloud_version} batch jobs delete ${var.job_id} --location=${var.region} --project=${var.project_id}
List all jobs in region:
gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id} | grep ^name:
gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id}
EOT
}
10 changes: 10 additions & 0 deletions community/modules/scheduler/cloud-batch-login-node/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,13 @@ variable "batch_job_directory" {
type = string
default = "/home/batch-jobs"
}

variable "enable_oslogin" {
description = "Enable or Disable OS Login with \"ENABLE\" or \"DISABLE\". Set to \"INHERIT\" to inherit project OS Login setting."
type = string
default = "ENABLE"
validation {
condition = var.enable_oslogin == null ? false : contains(["ENABLE", "DISABLE", "INHERIT"], var.enable_oslogin)
error_message = "Allowed string values for var.enable_oslogin are \"ENABLE\", \"DISABLE\", or \"INHERIT\"."
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.2.1"
}

required_version = ">= 0.14.0"
Expand Down
2 changes: 1 addition & 1 deletion community/modules/scheduler/htcondor-configure/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ terraform {
}
}
provider_meta "google" {
module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.2.0"
module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.2.1"
}

required_version = ">= 0.13.0"
Expand Down
Loading

0 comments on commit 238f9b9

Please sign in to comment.