From 754e2d03c3e37240bc4f2cc996bc3627c5da8968 Mon Sep 17 00:00:00 2001 From: "Mark A. Olson" Date: Mon, 18 Jul 2022 18:37:22 -0700 Subject: [PATCH 01/45] Update Intel DAOS examples to use google-cloud-daos v0.2.1 DAOS v2.0.3 was released and as a result a few minor changes were made in the https://github.com/daos-stack/google-cloud-daos repository. This change updates the examples in community/examples/intel to use to use the terraform modules from https://github.com/daos-stack/google-cloud-daos/releases/tag/v0.2.1 Signed-off-by: Mark A. Olson --- community/examples/intel/daos-cluster.yaml | 4 ++-- community/examples/intel/daos-slurm.yaml | 2 +- community/modules/file-system/Intel-DAOS/README.md | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/community/examples/intel/daos-cluster.yaml b/community/examples/intel/daos-cluster.yaml index 8957a61d92..e3a1adb39a 100644 --- a/community/examples/intel/daos-cluster.yaml +++ b/community/examples/intel/daos-cluster.yaml @@ -32,7 +32,7 @@ deployment_groups: # This module creates a DAOS server. Server images MUST be created before running this. # https://github.com/daos-stack/google-cloud-daos/tree/main/images # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server - - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.0 + - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 kind: terraform id: daos-server use: [network1] @@ -43,7 +43,7 @@ deployment_groups: # This module creates a MIG with DAOS clients. Client images MUST be created before running this. # https://github.com/daos-stack/google-cloud-daos/tree/main/images # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_client - - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.2.0 + - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.2.1 kind: terraform id: daos-client use: [network1, daos-server] diff --git a/community/examples/intel/daos-slurm.yaml b/community/examples/intel/daos-slurm.yaml index dbefaa4272..e2d3a44c1d 100644 --- a/community/examples/intel/daos-slurm.yaml +++ b/community/examples/intel/daos-slurm.yaml @@ -39,7 +39,7 @@ deployment_groups: # This module creates a DAOS server. Server images MUST be created before running this. # https://github.com/daos-stack/google-cloud-daos/tree/main/images # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server - - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.0 + - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 kind: terraform id: daos use: [network1] diff --git a/community/modules/file-system/Intel-DAOS/README.md b/community/modules/file-system/Intel-DAOS/README.md index d7abf4c8f6..d0388e7083 100644 --- a/community/modules/file-system/Intel-DAOS/README.md +++ b/community/modules/file-system/Intel-DAOS/README.md @@ -21,7 +21,7 @@ By default, the DAOS system is created with 4 servers will be configured for bes The following settings will configure this [system for TCO](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/examples/daos_cluster#the-terraformtfvarstcoexample-file) (default): ```yaml - - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.0 + - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 kind: terraform id: daos-server use: [network1] @@ -37,7 +37,7 @@ The following settings will configure this [system for TCO](https://github.com/d The following settings will configure this system for [best performance](https://github.com/daos-stack/google-cloud-daos/tree/develop/terraform/examples/daos_cluster#the-terraformtfvarsperfexample-file): ```yaml - - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.0 + - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 kind: terraform id: daos-server use: [network1] From 6162338028d63d9cf920b2b98fcb87b7e3650bab Mon Sep 17 00:00:00 2001 From: Karim Roukoz Date: Tue, 19 Jul 2022 14:19:01 +0000 Subject: [PATCH 02/45] fix errors when missing deployment_name add unit tests for config.DeploymentName() fix errors when missing deployment_name fix errors when missing deployment_name fix errors when missing deployment_name --- pkg/config/config.go | 4 ++++ pkg/config/config_test.go | 35 ++++++++++++++++++++++++++++++++--- pkg/config/expand.go | 10 +--------- pkg/config/expand_test.go | 6 +++--- 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index fbb923fce6..7f1efc7b90 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -396,6 +396,10 @@ func checkUsedModuleNames( // validateConfig runs a set of simple early checks on the imported input YAML func (dc *DeploymentConfig) validateConfig() { + _, err := dc.Config.DeploymentName() + if err != nil { + log.Fatal(err) + } moduleToGroup, err := checkModuleAndGroupNames(dc.Config.DeploymentGroups) if err != nil { log.Fatal(err) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 39edc9e688..5fe2723885 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -27,6 +27,7 @@ import ( "hpc-toolkit/pkg/modulereader" + "github.com/pkg/errors" "github.com/zclconf/go-cty/cty" . "gopkg.in/check.v1" ) @@ -188,7 +189,7 @@ func getDeploymentConfigForTest() DeploymentConfig { testBlueprint := Blueprint{ BlueprintName: "simple", Validators: []validatorConfig{}, - Vars: map[string]interface{}{}, + Vars: map[string]interface{}{"deployment_name": "deployment_name"}, TerraformBackendDefaults: TerraformBackend{ Type: "", Configuration: map[string]interface{}{}, @@ -231,7 +232,7 @@ func getBasicDeploymentConfigWithTestModule() DeploymentConfig { } return DeploymentConfig{ Config: Blueprint{ - Vars: make(map[string]interface{}), + Vars: map[string]interface{}{"deployment_name": "deployment_name"}, DeploymentGroups: []DeploymentGroup{testDeploymentGroup}, }, } @@ -315,6 +316,34 @@ func (s *MySuite) TestCheckModuleAndGroupNames(c *C) { c.Assert(dc.ModuleToGroup[testModID], Equals, 0) } +func (s *MySuite) TestDeploymentName(c *C) { + dc := getDeploymentConfigForTest() + var e *DeploymentNameError + + // Is deployment_name a valid string? + deploymentName, err := dc.Config.DeploymentName() + c.Assert(deploymentName, Equals, "deployment_name") + c.Assert(err, IsNil) + + // Is deployment_name an empty string? + dc.Config.Vars["deployment_name"] = "" + deploymentName, err = dc.Config.DeploymentName() + c.Assert(deploymentName, Equals, "") + c.Check(errors.As(err, &e), Equals, true) + + // Is deployment_name not a string? + dc.Config.Vars["deployment_name"] = 100 + _, err = dc.Config.DeploymentName() + c.Assert(deploymentName, Equals, "") + c.Check(errors.As(err, &e), Equals, true) + + // Is deployment_name not set? + delete(dc.Config.Vars, "deployment_name") + deploymentName, err = dc.Config.DeploymentName() + c.Assert(deploymentName, Equals, "") + c.Check(errors.As(err, &e), Equals, true) +} + func (s *MySuite) TestNewBlueprint(c *C) { dc := getDeploymentConfigForTest() outFile := filepath.Join(tmpTestDir, "out_TestNewBlueprint.yaml") @@ -376,7 +405,7 @@ func (s *MySuite) TestSetCLIVariables(c *C) { // Success dc := getBasicDeploymentConfigWithTestModule() c.Assert(dc.Config.Vars["project_id"], IsNil) - c.Assert(dc.Config.Vars["deployment_name"], IsNil) + c.Assert(dc.Config.Vars["deployment_name"], Equals, "deployment_name") c.Assert(dc.Config.Vars["region"], IsNil) c.Assert(dc.Config.Vars["zone"], IsNil) diff --git a/pkg/config/expand.go b/pkg/config/expand.go index ec88107720..2f8f18ad54 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -214,14 +214,6 @@ func getRole(source string) string { return role } -func getDeploymentName(vars map[string]interface{}) string { - deployName, exists := vars["deployment_name"] - if exists { - return deployName.(string) - } - return "undefined" -} - func toStringInterfaceMap(i interface{}) (map[string]interface{}, error) { var ret map[string]interface{} switch val := i.(type) { @@ -247,7 +239,7 @@ func toStringInterfaceMap(i interface{}) (map[string]interface{}, error) { func (dc *DeploymentConfig) combineLabels() error { defaultLabels := map[string]interface{}{ blueprintLabel: dc.Config.BlueprintName, - deploymentLabel: getDeploymentName(dc.Config.Vars), + deploymentLabel: dc.Config.Vars["deployment_name"], } labels := "labels" var globalLabels map[string]interface{} diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 0f0fa95e03..0a3183071f 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -43,7 +43,8 @@ func (s *MySuite) TestExpandBackends(c *C) { grp := dc.Config.DeploymentGroups[0] c.Assert(grp.TerraformBackend.Type, Not(Equals), "") gotPrefix := grp.TerraformBackend.Configuration["prefix"] - expPrefix := fmt.Sprintf("%s/%s", dc.Config.BlueprintName, grp.Name) + expPrefix := fmt.Sprintf("%s/%s/%s", dc.Config.BlueprintName, + dc.Config.Vars["deployment_name"], grp.Name) c.Assert(gotPrefix, Equals, expPrefix) // Add a new resource group, ensure each group name is included @@ -51,7 +52,6 @@ func (s *MySuite) TestExpandBackends(c *C) { Name: "group2", } dc.Config.DeploymentGroups = append(dc.Config.DeploymentGroups, newGroup) - dc.Config.Vars["deployment_name"] = "testDeployment" err = dc.expandBackends() c.Assert(err, IsNil) newGrp := dc.Config.DeploymentGroups[1] @@ -264,7 +264,7 @@ func (s *MySuite) TestCombineLabels(c *C) { // Was the ghpc_deployment label set correctly? ghpcDeployment, exists := globalLabels[deploymentLabel] c.Assert(exists, Equals, true) - c.Assert(ghpcDeployment, Equals, "undefined") + c.Assert(ghpcDeployment, Equals, "deployment_name") // Was "labels" created for the module with no settings? _, exists = dc.Config.DeploymentGroups[0].Modules[0].Settings["labels"] From bc36d00c1fc4c461c2766480141364b2eff4cbcc Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 19 Jul 2022 15:35:20 -0700 Subject: [PATCH 03/45] Bump patch release to 1.1.2 --- cmd/root.go | 2 +- .../compute/SchedMD-slurm-on-gcp-partition/versions.tf | 2 +- .../modules/database/slurm-cloudsql-federation/versions.tf | 4 ++-- community/modules/file-system/nfs-server/versions.tf | 2 +- community/modules/project/service-enablement/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-controller/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf | 2 +- .../modules/scheduler/cloud-batch-login-node/versions.tf | 2 +- community/modules/scheduler/htcondor-configure/versions.tf | 2 +- community/modules/scripts/wait-for-startup/versions.tf | 2 +- modules/compute/vm-instance/versions.tf | 4 ++-- modules/file-system/filestore/versions.tf | 4 ++-- modules/monitoring/dashboard/versions.tf | 2 +- modules/network/pre-existing-vpc/versions.tf | 2 +- modules/scripts/startup-script/versions.tf | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index d65dd659cd..de906088b6 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`, log.Fatalf("cmd.Help function failed: %s", err) } }, - Version: "v1.1.1", + Version: "v1.1.2", } ) diff --git a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf index fe3f9e309c..18ff9a691a 100644 --- a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf +++ b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.1.2" } required_version = ">= 0.14.0" diff --git a/community/modules/database/slurm-cloudsql-federation/versions.tf b/community/modules/database/slurm-cloudsql-federation/versions.tf index 22eab367a2..bf92e98ffd 100644 --- a/community/modules/database/slurm-cloudsql-federation/versions.tf +++ b/community/modules/database/slurm-cloudsql-federation/versions.tf @@ -30,10 +30,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.1.2" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.1.2" } required_version = ">= 0.13.0" diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf index 7edd04ee8d..d58fc5ac84 100644 --- a/community/modules/file-system/nfs-server/versions.tf +++ b/community/modules/file-system/nfs-server/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.1.2" } required_version = ">= 0.14.0" diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf index 102224ffdc..def3e0665d 100644 --- a/community/modules/project/service-enablement/versions.tf +++ b/community/modules/project/service-enablement/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.1.2" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf index 3c198bb5e4..91aa6b23f2 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.1.2" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf index be164b0a0f..43098f57d5 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.1.2" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/cloud-batch-login-node/versions.tf b/community/modules/scheduler/cloud-batch-login-node/versions.tf index 4a6a9d1d0d..e4b5d4f4b2 100644 --- a/community/modules/scheduler/cloud-batch-login-node/versions.tf +++ b/community/modules/scheduler/cloud-batch-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.1.2" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/htcondor-configure/versions.tf b/community/modules/scheduler/htcondor-configure/versions.tf index 945665b532..a99c5c55d7 100644 --- a/community/modules/scheduler/htcondor-configure/versions.tf +++ b/community/modules/scheduler/htcondor-configure/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.1.2" } required_version = ">= 0.13.0" diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf index ab8e39d1c0..932fde8085 100644 --- a/community/modules/scripts/wait-for-startup/versions.tf +++ b/community/modules/scripts/wait-for-startup/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.1.2" } required_version = ">= 0.14.0" diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 3c0ded61e2..efa0463b59 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -27,10 +27,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.1.2" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.1.2" } required_version = ">= 0.14.0" diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf index 22c01db3c3..51f1aff95a 100644 --- a/modules/file-system/filestore/versions.tf +++ b/modules/file-system/filestore/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.1.2" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.1.2" } required_version = ">= 0.14.0" diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf index 8f45567450..d03e144c3b 100644 --- a/modules/monitoring/dashboard/versions.tf +++ b/modules/monitoring/dashboard/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.1.2" } required_version = ">= 0.14.0" diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf index 42e9e471fb..95a0225c71 100644 --- a/modules/network/pre-existing-vpc/versions.tf +++ b/modules/network/pre-existing-vpc/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.1.2" } required_version = ">= 0.14.0" diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf index de6cd636b8..af809f2f6d 100644 --- a/modules/scripts/startup-script/versions.tf +++ b/modules/scripts/startup-script/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.1.1" + module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.1.2" } required_version = ">= 0.14.0" From 73e5660cbbd281c63ce9cec84c36ceb3cc920fbb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Jul 2022 22:36:03 +0000 Subject: [PATCH 04/45] Bump github.com/spf13/afero from 1.9.0 to 1.9.2 Bumps [github.com/spf13/afero](https://github.com/spf13/afero) from 1.9.0 to 1.9.2. - [Release notes](https://github.com/spf13/afero/releases) - [Commits](https://github.com/spf13/afero/compare/v1.9.0...v1.9.2) --- updated-dependencies: - dependency-name: github.com/spf13/afero dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index de0a93367e..8386c6d2b5 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect github.com/otiai10/copy v1.7.0 github.com/pkg/errors v0.9.1 - github.com/spf13/afero v1.9.0 + github.com/spf13/afero v1.9.2 github.com/spf13/cobra v1.5.0 github.com/zclconf/go-cty v1.10.0 google.golang.org/genproto v0.0.0-20220616135557-88e70c0c3a90 diff --git a/go.sum b/go.sum index d6359b4e8a..ddf109a93a 100644 --- a/go.sum +++ b/go.sum @@ -276,8 +276,8 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.9.0 h1:sFSLUHgxdnN32Qy38hK3QkYBFXZj9DKjVjCUCtD7juY= -github.com/spf13/afero v1.9.0/go.mod h1:iUV7ddyEEZPO5gA3zD4fJt6iStLlL+Lg4m2cihcDf8Y= +github.com/spf13/afero v1.9.2 h1:j49Hj62F0n+DaZ1dDCvhABaPNSGNkt32oRFxI33IEMw= +github.com/spf13/afero v1.9.2/go.mod h1:iUV7ddyEEZPO5gA3zD4fJt6iStLlL+Lg4m2cihcDf8Y= github.com/spf13/cobra v1.5.0 h1:X+jTBEBqF0bHN+9cSMgmfuvv2VHJ9ezmFNf9Y/XstYU= github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM= github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= From 5f816ca91a2169f811d054875569932372a9f03e Mon Sep 17 00:00:00 2001 From: Karim Roukoz Date: Tue, 19 Jul 2022 23:32:00 +0000 Subject: [PATCH 05/45] fix errors when missing deployment_name --- pkg/config/config.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 7f1efc7b90..e5d2091feb 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -523,8 +523,8 @@ func ConvertMapToCty(iMap map[string]interface{}) (map[string]cty.Value, error) // global variables, then they are replaced by the cty.Value of the // corresponding entry in the origin. All other cty.Values are unmodified. // ERROR: if (somehow) the cty.String cannot be converted to a Go string -// ERROR: rely on HCL TraverseAbs to bubble up "diagnostics" when the global variable -// being resolved does not exist in b.Vars +// ERROR: rely on HCL TraverseAbs to bubble up "diagnostics" when the global +// variable being resolved does not exist in b.Vars func ResolveVariables( ctyMap map[string]cty.Value, origin map[string]cty.Value, From dae0a1415f5cf1106e59d50635fa11a230e1b21b Mon Sep 17 00:00:00 2001 From: "Mark A. Olson" Date: Tue, 19 Jul 2022 17:20:37 -0700 Subject: [PATCH 06/45] Updated community/modules/file-system/Intel-DAOS/README.md with information about the requirement to build the DAOS server image from the same tagged version of the https://github.com/daos-stack/google-cloud-daos repo that the modules in the blueprints are using. Signed-off-by: Mark A. Olson --- .../modules/file-system/Intel-DAOS/README.md | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/community/modules/file-system/Intel-DAOS/README.md b/community/modules/file-system/Intel-DAOS/README.md index d0388e7083..768482dec5 100644 --- a/community/modules/file-system/Intel-DAOS/README.md +++ b/community/modules/file-system/Intel-DAOS/README.md @@ -10,9 +10,30 @@ For more information, please refer to the [Google Cloud DAOS repo on GitHub](htt Working examples of a DAOS deployment and how it can be used in conjunction with Slurm [can be found in the community examples folder](../../../examples/intel/). -Using the DAOS server module implies that one has DAOS server images created as [instructed in the images section here](https://github.com/daos-stack/google-cloud-daos/tree/main/images). +A full list of server module parameters can be found at [the DAOS Server module README](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server). -A full list of module parameters can be found at [the DAOS Server module README](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server). +### DAOS Server Images + +In order to use the DAOS server terraform module a DAOS server image must be created as instructed in the *images* directory [here](https://github.com/daos-stack/google-cloud-daos/tree/main/images). + +DAOS server images must be built from the same tagged version of the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository that is specified in the `source:` attribute for modules used in the [community examples](../../../examples/intel/). + +For example, in the following snippet taken from the [community/example/intel/daos-cluster.yml](../../../examples/intel/daos-cluster.yaml) the `source:` attribute specifies v0.2.1 of the daos_server terraform module + +```yaml + - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.2.1 + kind: terraform + id: daos-server + use: [network1] + settings: + number_of_instances: 2 + labels: {ghpc_role: file-system} +``` + +In order to use the daos_server module v0.2.1 , you need to + +1. Clone the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repo and check out v0.2.1 +2. Follow the instructions in the images/README.md directory to build a DAOS server image ## Recommended settings @@ -75,3 +96,5 @@ Intel Corporation provides several ways for the users to get technical support: 2. Commercial L3 support is available on an on-demand basis. Please get in touch with Intel Corporation to obtain more information. - You may inquire about the L3 support via the Slack channel (https://daos-stack.slack.com/archives/C03GLTLHA59) + +[here](https://github.com/daos-stack/google-cloud-daos/tree/main/images) From 2ea2ce3810c0caf1954c910480d3c783e8c62473 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Jul 2022 14:07:59 -0700 Subject: [PATCH 07/45] Add os-login option to Batch login module and default to on --- .../modules/scheduler/cloud-batch-login-node/README.md | 1 + .../modules/scheduler/cloud-batch-login-node/main.tf | 10 +++++++++- .../scheduler/cloud-batch-login-node/variables.tf | 10 ++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/community/modules/scheduler/cloud-batch-login-node/README.md b/community/modules/scheduler/cloud-batch-login-node/README.md index a1c849c121..89cdb27349 100644 --- a/community/modules/scheduler/cloud-batch-login-node/README.md +++ b/community/modules/scheduler/cloud-batch-login-node/README.md @@ -109,6 +109,7 @@ limitations under the License. |------|-------------|------|---------|:--------:| | [batch\_job\_directory](#input\_batch\_job\_directory) | The path of the directory on the login node in which to place the Google Cloud Batch job template | `string` | `"/home/batch-jobs"` | no | | [deployment\_name](#input\_deployment\_name) | Name of the deployment, also used for the job\_id | `string` | n/a | yes | +| [enable\_oslogin](#input\_enable\_oslogin) | Enable or Disable OS Login with "ENABLE" or "DISABLE". Set to "INHERIT" to inherit project OS Login setting. | `string` | `"ENABLE"` | no | | [gcloud\_version](#input\_gcloud\_version) | The version of the gcloud cli being used. Used for output instructions. Valid inputs are `"alpha"`, `"beta"` and "" (empty string for default version). Typically supplied by a cloud-batch-job module. | `string` | `"alpha"` | no | | [instance\_template](#input\_instance\_template) | Login VM instance template self-link. Typically supplied by a cloud-batch-job module. | `string` | n/a | yes | | [job\_filename](#input\_job\_filename) | The filename of the generated job template file. Typically supplied by a cloud-batch-job module. | `string` | n/a | yes | diff --git a/community/modules/scheduler/cloud-batch-login-node/main.tf b/community/modules/scheduler/cloud-batch-login-node/main.tf index b2a171c969..c43ad0bfab 100644 --- a/community/modules/scheduler/cloud-batch-login-node/main.tf +++ b/community/modules/scheduler/cloud-batch-login-node/main.tf @@ -21,7 +21,15 @@ data "google_compute_instance_template" "batch_instance_template" { locals { instance_template_metadata = data.google_compute_instance_template.batch_instance_template.metadata batch_startup_script = local.instance_template_metadata["startup-script"] - login_metadata = merge(local.instance_template_metadata, { startup-script = module.login_startup_script.startup_script }) + startup_metadata = { startup-script = module.login_startup_script.startup_script } + + oslogin_api_values = { + "DISABLE" = "FALSE" + "ENABLE" = "TRUE" + } + oslogin_metadata = var.enable_oslogin == "INHERIT" ? {} : { enable-oslogin = lookup(local.oslogin_api_values, var.enable_oslogin, "") } + + login_metadata = merge(local.instance_template_metadata, local.startup_metadata, local.oslogin_metadata) } module "login_startup_script" { diff --git a/community/modules/scheduler/cloud-batch-login-node/variables.tf b/community/modules/scheduler/cloud-batch-login-node/variables.tf index 658f037c43..6b0dcb8807 100644 --- a/community/modules/scheduler/cloud-batch-login-node/variables.tf +++ b/community/modules/scheduler/cloud-batch-login-node/variables.tf @@ -70,3 +70,13 @@ variable "batch_job_directory" { type = string default = "/home/batch-jobs" } + +variable "enable_oslogin" { + description = "Enable or Disable OS Login with \"ENABLE\" or \"DISABLE\". Set to \"INHERIT\" to inherit project OS Login setting." + type = string + default = "ENABLE" + validation { + condition = var.enable_oslogin == null ? false : contains(["ENABLE", "DISABLE", "INHERIT"], var.enable_oslogin) + error_message = "Allowed string values for var.enable_oslogin are \"ENABLE\", \"DISABLE\", or \"INHERIT\"." + } +} From a574c5f4c261c7250ad0e9b66ff2fd45259c4e26 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Jul 2022 14:11:28 -0700 Subject: [PATCH 08/45] Add Batch submission from login node to Batch integration test --- .../base-integration-test.yml | 1 + .../test-batch-submission.yml | 35 +++++++++++++++++++ .../daily-tests/tests/cloud-build.yml | 4 ++- 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml index ee72bd0e25..6d6a49bb14 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml @@ -152,6 +152,7 @@ deployment_name: "{{ deployment_name }}" mounts: "{{ mounts }}" partitions: "{{ partitions }}" + custom_vars: "{{ custom_vars }}" loop: "{{ post_deploy_tests }}" loop_control: loop_var: test diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml new file mode 100644 index 0000000000..d1e50a1374 --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml @@ -0,0 +1,35 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Wait for startup script to complete + become: true + ansible.builtin.wait_for: + path: /var/log/messages + search_regex: '.*{{ remote_node }}.*startup-script exit status ([0-9]+)' + timeout: 600 + register: startup_status +- name: Batch Job Block + block: + - name: Submit batch job + ansible.builtin.command: gcloud alpha batch jobs submit {{ deployment_name }} --config=/home/batch-jobs/cloud-batch-{{ deployment_name }}.json --location=us-central1 --project={{ custom_vars.project }} + - name: Wait for job to run + ansible.builtin.command: "gcloud alpha batch jobs describe {{ deployment_name }} --location=us-central1 --project={{ custom_vars.project }}" + register: result + until: result.stdout.find("SUCCEEDED") != -1 + retries: 60 + delay: 10 + + always: + - name: delete job + ansible.builtin.command: "gcloud alpha batch jobs delete {{ deployment_name }} --location=us-central1 --project={{ custom_vars.project }}" diff --git a/tools/cloud-build/daily-tests/tests/cloud-build.yml b/tools/cloud-build/daily-tests/tests/cloud-build.yml index 2db21fbaf1..43fbed2fb3 100644 --- a/tools/cloud-build/daily-tests/tests/cloud-build.yml +++ b/tools/cloud-build/daily-tests/tests/cloud-build.yml @@ -19,4 +19,6 @@ blueprint_yaml: "{{ workspace }}/community/examples/cloud-batch.yaml" blueprint_dir: deployment-cloud-batch network: "default" remote_node: "{{ deployment_name }}-batch-login" -post_deploy_tests: [] +post_deploy_tests: [test-batch-submission.yml] +custom_vars: + project: "{{ project }}" From 485452f3693e261cc10eaceef6c64de4a92d62f1 Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Wed, 20 Jul 2022 23:49:17 +0000 Subject: [PATCH 09/45] Add check for startup script failure, montoring --- .../daily-tests/ansible_playbooks/test-monitoring.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml index b6dba36e6a..05727b156c 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml @@ -21,6 +21,10 @@ search_regex: '.*{{ remote_node }}.*startup-script exit status ([0-9]+)' timeout: 600 register: startup_status +- name: Fail if startup script exited with a non-zero return code + fail: + msg: There was a failure in the startup script + when: startup_status['match_groups'][0] != "0" - name: Fail if ops agent is not running become: true ansible.builtin.command: systemctl is-active {{ item }} From e5c995281718f821ab45db574768f19e27c0ff8b Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Jul 2022 16:53:25 -0700 Subject: [PATCH 10/45] Add hello world integration tests to demonstrate interaction between test files --- .../hello-world-integration-test.yml | 35 +++++++++++++++++++ .../ansible_playbooks/test-hello-world.yml | 22 ++++++++++++ .../daily-tests/tests/hello-world-vars.yml | 21 +++++++++++ 3 files changed, 78 insertions(+) create mode 100644 tools/cloud-build/daily-tests/ansible_playbooks/hello-world-integration-test.yml create mode 100644 tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml create mode 100644 tools/cloud-build/daily-tests/tests/hello-world-vars.yml diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/hello-world-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/hello-world-integration-test.yml new file mode 100644 index 0000000000..5e91ad496c --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/hello-world-integration-test.yml @@ -0,0 +1,35 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The hello world integration test exists to demonstrate the test interaction between test files +# Run this locally with the following command: +# ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/hello-world-integration-test.yml --extra-vars="@tools/cloud-build/daily-tests/tests/hello-world-vars.yml" +--- +- name: hello world integration tests + hosts: 127.0.0.1 + connection: local + + tasks: + - name: Print Hello World + debug: + msg: Hello world from the base integration test. + - name: Run post_deploy_tests + include_tasks: "{{ test }}" + run_once: true + vars: + top_level_var: "{{ top_level_var }}" + custom_vars: "{{ custom_vars }}" + loop: "{{ post_deploy_tests }}" + loop_control: + loop_var: test diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml new file mode 100644 index 0000000000..7fdea9b2c5 --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml @@ -0,0 +1,22 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The hello world integration test exists to demonstrate the test interaction between test files +--- +- name: Print top_level_var + debug: + msg: "{{ top_level_var }}" +- name: Print item from custom_vars + debug: + msg: "{{ custom_vars.bar }}" diff --git a/tools/cloud-build/daily-tests/tests/hello-world-vars.yml b/tools/cloud-build/daily-tests/tests/hello-world-vars.yml new file mode 100644 index 0000000000..468ce30689 --- /dev/null +++ b/tools/cloud-build/daily-tests/tests/hello-world-vars.yml @@ -0,0 +1,21 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The hello world integration test exists to demonstrate the test interaction between test files +--- +top_level_var: "top level var" +post_deploy_tests: [test-hello-world.yml] +custom_vars: + foo: "foo var" + bar: "bar var" From ca248a3db28b47d1f621cc50095d41f7e539fdee Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Jul 2022 17:12:36 -0700 Subject: [PATCH 11/45] Addressing feedback --- .../ansible_playbooks/test-batch-submission.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml index d1e50a1374..fad6c21052 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml @@ -19,12 +19,16 @@ search_regex: '.*{{ remote_node }}.*startup-script exit status ([0-9]+)' timeout: 600 register: startup_status +- name: Fail if startup script exited with a non-zero return code + fail: + msg: There was a failure in the startup script + when: startup_status['match_groups'][0] != "0" - name: Batch Job Block block: - name: Submit batch job ansible.builtin.command: gcloud alpha batch jobs submit {{ deployment_name }} --config=/home/batch-jobs/cloud-batch-{{ deployment_name }}.json --location=us-central1 --project={{ custom_vars.project }} - name: Wait for job to run - ansible.builtin.command: "gcloud alpha batch jobs describe {{ deployment_name }} --location=us-central1 --project={{ custom_vars.project }}" + ansible.builtin.command: gcloud alpha batch jobs describe {{ deployment_name }} --location=us-central1 --project={{ custom_vars.project }} register: result until: result.stdout.find("SUCCEEDED") != -1 retries: 60 @@ -32,4 +36,4 @@ always: - name: delete job - ansible.builtin.command: "gcloud alpha batch jobs delete {{ deployment_name }} --location=us-central1 --project={{ custom_vars.project }}" + ansible.builtin.command: gcloud alpha batch jobs delete {{ deployment_name }} --location=us-central1 --project={{ custom_vars.project }} From 5f972a06d3ab3cf55b93b9c95703896b2f5fceb6 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Jul 2022 20:34:36 -0700 Subject: [PATCH 12/45] Update Batch list instructions now that Batch response is brief --- community/modules/scheduler/cloud-batch-job/outputs.tf | 2 +- community/modules/scheduler/cloud-batch-login-node/outputs.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/community/modules/scheduler/cloud-batch-job/outputs.tf b/community/modules/scheduler/cloud-batch-job/outputs.tf index f730aec507..fcabec3d5d 100644 --- a/community/modules/scheduler/cloud-batch-job/outputs.tf +++ b/community/modules/scheduler/cloud-batch-job/outputs.tf @@ -29,7 +29,7 @@ output "instructions" { gcloud ${var.gcloud_version} batch jobs delete ${local.job_id} --location=${var.region} --project=${var.project_id} List all jobs in region: - gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id} | grep ^name: + gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id} EOT } diff --git a/community/modules/scheduler/cloud-batch-login-node/outputs.tf b/community/modules/scheduler/cloud-batch-login-node/outputs.tf index 7f0ca33abf..2c5ec8b613 100644 --- a/community/modules/scheduler/cloud-batch-login-node/outputs.tf +++ b/community/modules/scheduler/cloud-batch-login-node/outputs.tf @@ -37,6 +37,6 @@ output "instructions" { gcloud ${var.gcloud_version} batch jobs delete ${var.job_id} --location=${var.region} --project=${var.project_id} List all jobs in region: - gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id} | grep ^name: + gcloud ${var.gcloud_version} batch jobs list ${var.region} --project=${var.project_id} EOT } From 1adce054c47020d94db7d7cfa71a17f7b484c8b3 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Jul 2022 23:02:38 -0700 Subject: [PATCH 13/45] Update Batch integration test to run in series --- tools/cloud-build/daily-tests/integration-group-4.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/cloud-build/daily-tests/integration-group-4.yaml b/tools/cloud-build/daily-tests/integration-group-4.yaml index ee902ef1a9..934ea44d0c 100644 --- a/tools/cloud-build/daily-tests/integration-group-4.yaml +++ b/tools/cloud-build/daily-tests/integration-group-4.yaml @@ -17,7 +17,8 @@ # and test health of the pool # ├── build_ghpc # └── fetch_builder -# ├── htcondor (group 4) +# └── htcondor (group 4) +# └── Cloud Batch timeout: 14400s # 4hr @@ -65,6 +66,7 @@ steps: ## Test Cloud Batch Example - id: cloud-batch waitFor: + - htcondor - fetch_builder - build_ghpc name: >- From d4f4df4d6b460a40a0d0582b06f69d50e77d4e61 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Jul 2022 22:39:24 -0700 Subject: [PATCH 14/45] Enable ansible lint pre-commit hook --- .ansible-lint | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.ansible-lint b/.ansible-lint index d4432b0c7a..082bbbe6cb 100644 --- a/.ansible-lint +++ b/.ansible-lint @@ -3,3 +3,10 @@ skip_list: mock_roles: - googlecloudplatform.google_cloud_ops_agents + +kinds: + - playbook: "**/ansible_playbooks/*test.{yml,yaml}" + - playbook: "**/files/*.{yml,yaml}" + - playbook: "**/scripts/*.{yml,yaml}" + - tasks: "**/ansible_playbooks/test*.{yml,yaml}" + - tasks: "**/tasks/*" From 8eb83ea468ed2a383e6e6ab326f65d3dc7c428f6 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Jul 2022 13:21:36 -0500 Subject: [PATCH 15/45] Address var-spacing ansible linting errors --- .../files/htcondor_configure_autoscaler.yml | 2 +- .../file-system/nfs-server/scripts/mount.yaml | 12 ++++++------ modules/file-system/filestore/scripts/mount.yaml | 12 ++++++------ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/community/modules/compute/htcondor-execute-point/files/htcondor_configure_autoscaler.yml b/community/modules/compute/htcondor-execute-point/files/htcondor_configure_autoscaler.yml index ac0add27ee..2858fcba93 100644 --- a/community/modules/compute/htcondor-execute-point/files/htcondor_configure_autoscaler.yml +++ b/community/modules/compute/htcondor-execute-point/files/htcondor_configure_autoscaler.yml @@ -38,4 +38,4 @@ ansible.builtin.cron: name: "run HTCondor autoscaler" user: condor - job: "{{ python }} {{ autoscaler}} --p {{ project_id }} --r {{ region }} --z {{ zone }} --mz --g {{ mig_id }} --c {{ max_size}} | /bin/logger" + job: "{{ python }} {{ autoscaler }} --p {{ project_id }} --r {{ region }} --z {{ zone }} --mz --g {{ mig_id }} --c {{ max_size }} | /bin/logger" diff --git a/community/modules/file-system/nfs-server/scripts/mount.yaml b/community/modules/file-system/nfs-server/scripts/mount.yaml index 267e5bb08e..2cb9d8ce58 100644 --- a/community/modules/file-system/nfs-server/scripts/mount.yaml +++ b/community/modules/file-system/nfs-server/scripts/mount.yaml @@ -23,17 +23,17 @@ tasks: - name: Read metadata network_storage information uri: - url: "{{url}}/{{meta_key}}" + url: "{{ url }}/{{ meta_key }}" method: GET headers: Metadata-Flavor: "Google" register: storage - name: Mount file systems mount: - src: "{{item.server_ip}}:/{{item.remote_mount}}" - path: "{{item.local_mount}}" - opts: "{{item.mount_options}}" + src: "{{ item.server_ip }}:/{{ item.remote_mount }}" + path: "{{ item.local_mount }}" + opts: "{{ item.mount_options }}" boot: true - fstype: "{{item.fs_type}}" + fstype: "{ {item.fs_type }}" state: "mounted" - loop: "{{storage.json}}" + loop: "{{ storage.json }}" diff --git a/modules/file-system/filestore/scripts/mount.yaml b/modules/file-system/filestore/scripts/mount.yaml index 267e5bb08e..b39a2f4adb 100644 --- a/modules/file-system/filestore/scripts/mount.yaml +++ b/modules/file-system/filestore/scripts/mount.yaml @@ -23,17 +23,17 @@ tasks: - name: Read metadata network_storage information uri: - url: "{{url}}/{{meta_key}}" + url: "{{ url }}/{{ meta_key }}" method: GET headers: Metadata-Flavor: "Google" register: storage - name: Mount file systems mount: - src: "{{item.server_ip}}:/{{item.remote_mount}}" - path: "{{item.local_mount}}" - opts: "{{item.mount_options}}" + src: "{{ item.server_ip }}:/{{ item.remote_mount }}" + path: "{{ item.local_mount }}" + opts: "{{ item.mount_options }}" boot: true - fstype: "{{item.fs_type}}" + fstype: "{{ item.fs_type }}" state: "mounted" - loop: "{{storage.json}}" + loop: "{{ storage.json }}" From f5964b7159cc00c4c1891dc7fbb28ffe7f10691e Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Jul 2022 13:22:22 -0500 Subject: [PATCH 16/45] Address package-latest ansible linting errors --- .../files/install-htcondor-autoscaler-deps.yml | 6 +++--- .../scripts/htcondor-install/files/install-htcondor.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml b/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml index d6a6acb9dc..c3109bbb94 100644 --- a/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml +++ b/community/modules/scripts/htcondor-install/files/install-htcondor-autoscaler-deps.yml @@ -24,13 +24,13 @@ - name: Create virtual environment for HTCondor autoscaler ansible.builtin.pip: name: pip - version: 21.3.1 + version: 21.3.1 # last Python 2.7-compatible release virtualenv: /usr/local/htcondor virtualenv_command: /usr/bin/python3 -m venv - name: Install latest setuptools ansible.builtin.pip: name: setuptools - state: latest + state: 44.1.1 # last Python 2.7-compatible release virtualenv: /usr/local/htcondor virtualenv_command: /usr/bin/python3 -m venv - name: Install HTCondor autoscaler dependencies @@ -41,6 +41,6 @@ - htcondor ansible.builtin.pip: name: "{{ item }}" - state: latest + state: present # rely on pip resolver to pick latest compatible releases virtualenv: /usr/local/htcondor virtualenv_command: /usr/bin/python3 -m venv diff --git a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml index ecea1f3a1f..1c591fd97f 100644 --- a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml +++ b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml @@ -28,7 +28,7 @@ - name: install HTCondor ansible.builtin.yum: name: condor - state: latest + state: present - name: ensure token directory ansible.builtin.file: path: /etc/condor/tokens.d From d6054f4f27194367cf9ef008181cdbef13ec55e1 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Jul 2022 13:41:49 -0500 Subject: [PATCH 17/45] Address unamed-task ansible linting errors --- .../ansible_playbooks/base-integration-test.yml | 3 ++- .../ansible_playbooks/slurm-integration-test.yml | 11 ++++++----- .../daily-tests/ansible_playbooks/test-monitoring.yml | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml index 6d6a49bb14..330387bd88 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml @@ -62,7 +62,8 @@ gcloud compute instances list --filter="labels.ghpc_deployment={{ deployment_name }}" --format='table(name,zone,id,status)' - - debug: + - name: Print instance information + ansible.builtin.debug: var: instances_list.stdout_lines - name: Get remote IP register: remote_ip diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml index ced599cdd1..418a4765d8 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml @@ -63,7 +63,8 @@ gcloud compute instances list --filter="labels.ghpc_deployment={{ deployment_name }}" --format='table(name,zone,id,status)' - - debug: + - name: Print instance information + ansible.builtin.debug: var: instances_list.stdout_lines - name: Get login IP register: login_ip @@ -174,17 +175,17 @@ command: cat /var/log/slurm/resume.log register: resume_output ignore_errors: true - - debug: + - name: Print Slurm resume.log + ansible.builtin.debug: var: resume_output.stdout_lines - - name: Recover Suspend Logs delegate_to: "{{ hostvars['localhost']['controller_ip']['stdout'] }}" command: cat /var/log/slurm/suspend.log register: suspend_output ignore_errors: true - - debug: + - name: Print Slurm suspend.log + ansible.builtin.debug: var: suspend_output.stdout_lines - - name: Delete Firewall Rule run_once: true delegate_to: localhost diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml index 05727b156c..d89b1da497 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml @@ -37,7 +37,8 @@ run_once: true delegate_to: localhost register: dashboards -- debug: +- name: Print dashboard information + ansible.builtin.debug: var: dashboards - name: Fail if the HPC Dashboard hasn't been created ansible.builtin.fail: From 0c07d5076f8351483e9a30bd9be51013ea7436e9 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 21 Jul 2022 14:32:05 -0700 Subject: [PATCH 18/45] Update integration test documentation for hello world test --- tools/cloud-build/daily-tests/README.md | 30 +++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tools/cloud-build/daily-tests/README.md b/tools/cloud-build/daily-tests/README.md index 147e45196a..d233753375 100644 --- a/tools/cloud-build/daily-tests/README.md +++ b/tools/cloud-build/daily-tests/README.md @@ -3,21 +3,33 @@ Integration tests have been broken into multiple steps. This allows easily adding new integration tests as build steps under hpc-toolkit-integration-tests. -Cloud build calls ansible-playbook -`[slurm-integration-tests | basic-integration-tests]` with a custom +Cloud build calls ansible-playbook `*-integration-tests.yml` with a custom configuration yaml. Each test has its own yaml under tools/cloud-build/daily-tests/tests. This file specifies common variables and a list of post_deploy_test, which can be an empty array for tests that only -validate deployment. Or can list various extra tasks (only one implemented now: -`test-mounts-and-partitions`). This file also specifies the blueprint to create -the HPC environment +validate deployment. Or can list various extra tasks, named `test-*.yml. This +file also specifies the blueprint to create the HPC environment -The integration test yml, either `slurm-integration-tests` or -`basic-integration-tests`, under ansible_playbooks, in turn calls the creation -of the blueprint (create_deployment.sh) and the post_deploy_tests. +The integration test yml under ansible_playbooks, in turn calls the creation of +the blueprint (create_deployment.sh) and the post_deploy_tests. To run the tests on your own project, with your own files, use: ```shell -gcloud builds submit --config tools/cloud-build/daily-tests/hpc-toolkit-integration-tests.yaml +gcloud builds submit --config tools/cloud-build/daily-tests/integration-group-1.yaml ``` + +## Hello World Integration Test + +The hello world integration test exists to demonstrate the test interaction +between test files, and can be used to test passing variables without having to +actually run integration test on cloud build. + +This example consists of 3 files: + +- tools/cloud-build/daily-tests/ansible_playbooks/hello-world-integration-test.yml + - The playbook that is the root of the test +- tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml + - The post deploy test (tasks) that is called by the playbook +- tools/cloud-build/daily-tests/tests/hello-world-vars.yml + - The variables passed into the playbook From 67d94b6bfc02a2c744b5fe1dfac4727e0c5cba24 Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Thu, 21 Jul 2022 21:43:26 +0000 Subject: [PATCH 19/45] Add zone policy variables to slurm partition These variables allow setting preferential zones and fully excluded zones within the region in which the partition is located. --- .../schedmd-slurm-gcp-v5-partition/README.md | 2 + .../schedmd-slurm-gcp-v5-partition/main.tf | 2 + .../variables.tf | 37 ++++++++++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md index 91ec13af18..07cecdece6 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md @@ -118,6 +118,8 @@ No resources. | [spot\_instance\_config](#input\_spot\_instance\_config) | Configuration for spot VMs. |
object({
termination_action = string
})
| `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | `null` | no | | [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no | +| [zone\_policy\_allow](#input\_zone\_policy\_allow) | Partition nodes will prefer to be created in the listed zones. If a zone appears
in both zone\_policy\_allow and zone\_policy\_deny, then zone\_policy\_deny will take
priority for that zone. | `set(string)` | `[]` | no | +| [zone\_policy\_deny](#input\_zone\_policy\_deny) | Partition nodes will not be created in the listed zones. If a zone appears in
both zone\_policy\_allow and zone\_policy\_deny, then zone\_policy\_deny will take
priority for that zone. | `set(string)` | `[]` | no | ## Outputs diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf index 919149dd69..f31e32db45 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf @@ -74,6 +74,8 @@ module "slurm_partition" { partition_name = var.partition_name project_id = var.project_id region = var.region + zone_policy_allow = var.zone_policy_allow + zone_policy_deny = var.zone_policy_deny subnetwork = var.subnetwork_self_link == null ? "" : var.subnetwork_self_link partition_conf = local.partition_conf } diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf index 9137ce69a0..b3776e8c83 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf @@ -14,7 +14,8 @@ * limitations under the License. */ - +# Most variables have been sourced and modified from the SchedMD/slurm-gcp +# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.2 variable "slurm_cluster_name" { type = string @@ -36,6 +37,40 @@ variable "region" { type = string } +variable "zone_policy_allow" { + description = <<-EOD + Partition nodes will prefer to be created in the listed zones. If a zone appears + in both zone_policy_allow and zone_policy_deny, then zone_policy_deny will take + priority for that zone. + EOD + type = set(string) + default = [] + + validation { + condition = alltrue([ + for x in var.zone_policy_allow : length(regexall("^[a-z]+-[a-z]+[0-9]-[a-z]$", x)) > 0 + ]) + error_message = "A provided zone in zone_policy_allow is not a valid zone (Regexp: '^[a-z]+-[a-z]+[0-9]-[a-z]$')." + } +} + +variable "zone_policy_deny" { + description = <<-EOD + Partition nodes will not be created in the listed zones. If a zone appears in + both zone_policy_allow and zone_policy_deny, then zone_policy_deny will take + priority for that zone. + EOD + type = set(string) + default = [] + + validation { + condition = alltrue([ + for x in var.zone_policy_deny : length(regexall("^[a-z]+-[a-z]+[0-9]-[a-z]$", x)) > 0 + ]) + error_message = "A provided zone in zone_policy_deny is not a valid zone (Regexp '^[a-z]+-[a-z]+[0-9]-[a-z]$')." + } +} + variable "partition_name" { description = "The name of the slurm partition." type = string From ad58b6ff5e7f6b1176d7ff3cb2592468e3faeba0 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 21 Jul 2022 16:13:29 -0700 Subject: [PATCH 20/45] Rename spack post deploy test to match other post deploy tests --- .../daily-tests/ansible_playbooks/{spack.yml => test-spack.yml} | 0 tools/cloud-build/daily-tests/tests/spack-gromacs.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tools/cloud-build/daily-tests/ansible_playbooks/{spack.yml => test-spack.yml} (100%) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/spack.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml similarity index 100% rename from tools/cloud-build/daily-tests/ansible_playbooks/spack.yml rename to tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml diff --git a/tools/cloud-build/daily-tests/tests/spack-gromacs.yml b/tools/cloud-build/daily-tests/tests/spack-gromacs.yml index 845efb6464..7465007e66 100644 --- a/tools/cloud-build/daily-tests/tests/spack-gromacs.yml +++ b/tools/cloud-build/daily-tests/tests/spack-gromacs.yml @@ -24,7 +24,7 @@ max_nodes: 5 login_node: slurm-{{ deployment_name }}-login0 controller_node: slurm-{{ deployment_name }}-controller post_deploy_tests: -- spack.yml +- test-spack.yml partitions: - compute mounts: From c5f37b13d29183de1d2d711419bba6a393df090c Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Jul 2022 13:51:38 -0500 Subject: [PATCH 21/45] Address command-instead-of-* ansible linting errors --- .../ansible_playbooks/test-monitoring.yml | 14 ++++++++------ .../test-mounts-and-partitions.yml | 7 +++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml index d89b1da497..66ed7ea993 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml @@ -25,13 +25,15 @@ fail: msg: There was a failure in the startup script when: startup_status['match_groups'][0] != "0" -- name: Fail if ops agent is not running +- name: Gather service facts become: true - ansible.builtin.command: systemctl is-active {{ item }} - with_items: - - google-cloud-ops-agent.service - - google-cloud-ops-agent-fluent-bit.service - - google-cloud-ops-agent-opentelemetry-collector.service + ansible.builtin.service_facts: +- name: Fail if ops agent is not running + ansible.builtin.assert: + that: + - ansible_facts.services["google-cloud-ops-agent.service"].status == "enabled" + - ansible_facts.services["google-cloud-ops-agent-fluent-bit.service"].state == "running" + - ansible_facts.services["google-cloud-ops-agent-opentelemetry-collector.service"].state == "running" - name: Check that monitoring dashboard has been created ansible.builtin.command: gcloud monitoring dashboards list --format="get(displayName)" run_once: true diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml index e3677c58c7..93a7f335df 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml @@ -44,14 +44,13 @@ when: not item.stat.exists loop: "{{ stat_mounts.results }}" - name: Test Mounts on partitions - ansible.builtin.shell: srun -N 1 ls -laF {{ mounts | join(' ') }} + ansible.builtin.command: "srun -N 1 ls -laF {{ mounts | join(' ') }}" loop: "{{ partitions }}" - name: Test partitions with hostname - ansible.builtin.shell: srun -N 2 --partition {{ item }} hostname + ansible.builtin.command: srun -N 2 --partition {{ item }} hostname loop: "{{ partitions }}" - name: Ensure all nodes are powered down - ansible.builtin.shell: - sinfo -t 'IDLE&POWERED_DOWN' --noheader --format "%n" + ansible.builtin.command: sinfo -t 'IDLE&POWERED_DOWN' --noheader --format "%n" register: final_node_count changed_when: False until: final_node_count.stdout_lines | length == initial_node_count.stdout_lines | length From dc8686d16686ab755ed9e1aa51df0f318fe127ce Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Jul 2022 14:07:36 -0500 Subject: [PATCH 22/45] Address no-changed-when ansible linting errors --- .../base-integration-test.yml | 17 +++++++++-- .../htcondor-integration-test.yml | 29 +++++++++++++++++-- .../packer-integration-test.yml | 19 ++++++++---- .../slurm-integration-test.yml | 24 ++++++++++++--- .../test-batch-submission.yml | 5 ++++ .../ansible_playbooks/test-monitoring.yml | 1 + .../test-mounts-and-partitions.yml | 4 +++ 7 files changed, 84 insertions(+), 15 deletions(-) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml index 330387bd88..d9157ee1e0 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml @@ -56,6 +56,7 @@ - "terraform init" - "terraform apply -auto-approve -no-color" - name: Gather instance information + changed_when: false delegate_to: localhost register: instances_list command: >- @@ -66,6 +67,7 @@ ansible.builtin.debug: var: instances_list.stdout_lines - name: Get remote IP + changed_when: false register: remote_ip command: >- gcloud compute instances describe --zone={{ zone }} {{ remote_node }} @@ -73,11 +75,14 @@ ## Setup firewall for cloud build - name: Get Builder IP + changed_when: false shell: >- dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | awk -F'"' '{print $2}' register: build_ip - name: Create firewall rule + register: fw_result + changed_when: fw_result.rc == 0 command: argv: - gcloud @@ -93,6 +98,8 @@ - --rules=tcp:22 - --source-ranges={{ build_ip.stdout }} - name: 'Add SSH Keys to OS-Login' + register: key_result + changed_when: key_result.rc == 0 command: argv: - gcloud @@ -113,6 +120,9 @@ ## Cleanup and fail gracefully rescue: - name: Delete Firewall Rule + register: fw_deleted + changed_when: fw_deleted.rc == 0 + failed_when: false command: argv: - gcloud @@ -120,9 +130,9 @@ - firewall-rules - delete - "{{ deployment_name }}" - ignore_errors: true - name: Tear Down Cluster run_once: true + changed_when: true # assume something got destroyed delegate_to: localhost environment: TF_IN_AUTOMATION: "TRUE" @@ -161,6 +171,9 @@ ## Always cleanup, even on failure always: - name: Delete Firewall Rule + register: fw_deleted + changed_when: fw_deleted.rc == 0 + failed_when: false run_once: true delegate_to: localhost command: @@ -170,9 +183,9 @@ - firewall-rules - delete - "{{ deployment_name }}" - ignore_errors: true - name: Tear Down Cluster run_once: true + changed_when: true # assume something got destroyed delegate_to: localhost environment: TF_IN_AUTOMATION: "TRUE" diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml index 2ae0545b36..8a382dc40e 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml @@ -57,6 +57,7 @@ - terraform apply -auto-approve -no-color - name: Get startup_script register: network_name + changed_when: false args: chdir: "{{ workspace }}/{{ deployment_name }}/htcondor-env" executable: /bin/bash @@ -65,6 +66,7 @@ ../packer/custom-image/startup_script.sh - name: Create VM image with Packer register: image_created + changed_when: image_created.rc == 0 args: chdir: "{{ workspace }}/{{ deployment_name }}/packer/custom-image" executable: /bin/bash @@ -86,6 +88,7 @@ - terraform apply -auto-approve -no-color - name: Get Access Point public IP address register: access_ip + changed_when: false args: chdir: "{{ workspace }}/{{ deployment_name }}/pool" executable: /bin/bash @@ -98,11 +101,14 @@ groups: [remote_host] ## Setup firewall for cloud build - name: Get Builder IP + register: build_ip + changed_when: false ansible.builtin.shell: >- dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | awk -F'"' '{print $2}' - register: build_ip - name: Create firewall rule + register: fw_result + changed_when: fw_result.rc == 0 ansible.builtin.command: argv: - gcloud @@ -118,6 +124,8 @@ - --rules=tcp:22 - --source-ranges={{ build_ip.stdout }} - name: Add SSH Keys to OS Login + register: key_result + changed_when: key_result.rc == 0 ansible.builtin.command: argv: - gcloud @@ -130,6 +138,9 @@ - "--key-file=/builder/home/.ssh/id_rsa.pub" rescue: - name: Delete Firewall Rule + register: fw_deleted + changed_when: fw_deleted.rc == 0 + failed_when: false # keep cleaning up ansible.builtin.command: argv: - gcloud @@ -137,8 +148,9 @@ - firewall-rules - delete - "{{ deployment_name }}" - ignore_errors: true - name: Tear Down Pool + changed_when: true # assume something destroyed + failed_when: false # keep cleaning up run_once: true delegate_to: localhost environment: @@ -159,10 +171,13 @@ set -o pipefail jq -r '.builds[-1].artifact_id' packer-manifest.json | cut -d ":" -f2 - name: Delete custom image + register: image_deleted when: image_created is defined and image_created.rc == 0 + changed_when: image_deleted.rc == 0 ansible.builtin.command: cmd: gcloud compute images delete {{ image_name.stdout }} - name: Tear Down Network + changed_when: true # assume something destroyed delegate_to: localhost environment: TF_IN_AUTOMATION: "TRUE" @@ -203,6 +218,7 @@ loop_var: test always: - name: Tear Down Pool + changed_when: true # assume something destroyed delegate_to: localhost run_once: true environment: @@ -214,6 +230,7 @@ - terraform init - terraform destroy -auto-approve - name: Get image name + changed_when: false run_once: true delegate_to: localhost register: image_name @@ -224,11 +241,17 @@ set -o pipefail jq -r '.builds[-1].artifact_id' packer-manifest.json | cut -d ":" -f2 - name: Delete custom image + register: image_deleted + changed_when: image_deleted.rc == 0 + failed_when: false # keep cleaning up run_once: true delegate_to: localhost ansible.builtin.command: cmd: gcloud compute images delete {{ image_name.stdout }} - name: Delete Firewall Rule + register: fw_deleted + changed_when: fw_deleted.rc == 0 + failed_when: false # keep cleaning up run_once: true delegate_to: localhost ansible.builtin.command: @@ -238,8 +261,8 @@ - firewall-rules - delete - "{{ deployment_name }}" - ignore_errors: true - name: Tear Down Network + changed_when: true # assume something destroyed run_once: true delegate_to: localhost environment: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml index f1ca91f6db..a1c414fd08 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml @@ -46,14 +46,20 @@ - terraform validate - terraform apply -auto-approve -no-color - name: Create VM image with Packer - command: - cmd: "{{ item }}" + register: image_created + changed_when: image_created.rc == 0 + ansible.builtin.shell: | + set -e -o pipefail + packer init . + packer validate . + packer build . + args: chdir: "{{ workspace }}/{{ deployment_name }}/packer/custom-image" - with_items: - - packer init . - - packer validate . - - packer build . + executable: /bin/bash - name: Delete VM Image + register: image_deleted + changed_when: image_deleted.rc == 0 + when: image_created.rc == 0 ansible.builtin.shell: | gcloud compute images delete --project={{ project }} --quiet $(jq -r '.builds[-1].artifact_id' packer-manifest.json | cut -d ":" -f2) args: @@ -61,6 +67,7 @@ ## Always cleanup network always: - name: Tear Down Network + changed_when: true # assume something destroyed run_once: true delegate_to: localhost environment: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml index 418a4765d8..f13d873970 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml @@ -57,6 +57,7 @@ - "terraform init" - "terraform apply -auto-approve -no-color" - name: Gather instance information + changed_when: false delegate_to: localhost register: instances_list command: >- @@ -67,11 +68,13 @@ ansible.builtin.debug: var: instances_list.stdout_lines - name: Get login IP + changed_when: false register: login_ip command: >- gcloud compute instances describe --zone={{ zone }} {{ login_node }} --format='get(networkInterfaces[0].accessConfigs[0].natIP)' - name: Get Controller IP + changed_when: false register: controller_ip command: >- gcloud compute instances describe --zone={{ zone }} {{ controller_node }} @@ -79,11 +82,14 @@ ## Setup firewall for cloud build - name: Get Builder IP + changed_when: false shell: >- dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | awk -F'"' '{print $2}' register: build_ip - name: Create firewall rule + register: fw_created + changed_when: fw_created.rc == 0 command: argv: - gcloud @@ -99,6 +105,8 @@ - --rules=tcp:22 - --source-ranges={{ build_ip.stdout }} - name: 'Add SSH Keys to OS-Login' + register: key_created + changed_when: key_created.rc == 0 command: argv: - gcloud @@ -117,6 +125,9 @@ ## Cleanup and fail gracefully rescue: - name: Delete Firewall Rule + register: fw_deleted + changed_when: fw_deleted.rc == 0 + failed_when: false # keep cleaning up command: argv: - gcloud @@ -124,8 +135,8 @@ - firewall-rules - delete - "{{ deployment_name }}" - ignore_errors: true - name: Tear Down Cluster + changed_when: true # assume something destroyed run_once: true delegate_to: localhost environment: @@ -171,22 +182,27 @@ ## Always cleanup, even on failure always: - name: Recover Resume Logs + changed_when: false + failed_when: false delegate_to: "{{ hostvars['localhost']['controller_ip']['stdout'] }}" command: cat /var/log/slurm/resume.log register: resume_output - ignore_errors: true - name: Print Slurm resume.log ansible.builtin.debug: var: resume_output.stdout_lines - name: Recover Suspend Logs + changed_when: false + failed_when: false delegate_to: "{{ hostvars['localhost']['controller_ip']['stdout'] }}" command: cat /var/log/slurm/suspend.log register: suspend_output - ignore_errors: true - name: Print Slurm suspend.log ansible.builtin.debug: var: suspend_output.stdout_lines - name: Delete Firewall Rule + register: fw_deleted + changed_when: fw_deleted.rc == 0 + failed_when: false # keep cleaning up run_once: true delegate_to: localhost command: @@ -196,8 +212,8 @@ - firewall-rules - delete - "{{ deployment_name }}" - ignore_errors: true - name: Tear Down Cluster + changed_when: true # assume something destroyed run_once: true delegate_to: localhost environment: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml index fad6c21052..93d02a8a31 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml @@ -26,8 +26,11 @@ - name: Batch Job Block block: - name: Submit batch job + register: batch_submission + changed_when: batch_submission.rc == 0 ansible.builtin.command: gcloud alpha batch jobs submit {{ deployment_name }} --config=/home/batch-jobs/cloud-batch-{{ deployment_name }}.json --location=us-central1 --project={{ custom_vars.project }} - name: Wait for job to run + changed_when: false ansible.builtin.command: gcloud alpha batch jobs describe {{ deployment_name }} --location=us-central1 --project={{ custom_vars.project }} register: result until: result.stdout.find("SUCCEEDED") != -1 @@ -36,4 +39,6 @@ always: - name: delete job + register: batch_deletion + changed_when: batch_deletion.rc == 0 ansible.builtin.command: gcloud alpha batch jobs delete {{ deployment_name }} --location=us-central1 --project={{ custom_vars.project }} diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml index 66ed7ea993..ae5a0a9569 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml @@ -35,6 +35,7 @@ - ansible_facts.services["google-cloud-ops-agent-fluent-bit.service"].state == "running" - ansible_facts.services["google-cloud-ops-agent-opentelemetry-collector.service"].state == "running" - name: Check that monitoring dashboard has been created + changed_when: false ansible.builtin.command: gcloud monitoring dashboards list --format="get(displayName)" run_once: true delegate_to: localhost diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml index 93a7f335df..69276e0440 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml @@ -44,9 +44,13 @@ when: not item.stat.exists loop: "{{ stat_mounts.results }}" - name: Test Mounts on partitions + register: srun_mounts + changed_when: srun_mounts.rc == 0 ansible.builtin.command: "srun -N 1 ls -laF {{ mounts | join(' ') }}" loop: "{{ partitions }}" - name: Test partitions with hostname + register: srun_hostname + changed_when: srun_hostname.rc == 0 ansible.builtin.command: srun -N 2 --partition {{ item }} hostname loop: "{{ partitions }}" - name: Ensure all nodes are powered down From b89e3ec96cd7eb5164a44d4e0b38f044ea5dcf82 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Jul 2022 19:51:12 -0500 Subject: [PATCH 23/45] Ensure packer build test uses network name --- .../daily-tests/ansible_playbooks/packer-integration-test.yml | 1 + tools/cloud-build/daily-tests/tests/packer.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml index a1c414fd08..e6c8881ab1 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml @@ -29,6 +29,7 @@ ROOT_DIR: "{{ workspace }}" BLUEPRINT_DIR: "{{ blueprint_dir }}" DEPLOYMENT_NAME: "{{ deployment_name }}" + NETWORK: "{{ network }}" args: creates: "{{ workspace }}/{{ deployment_name }}.tgz" - name: Create Infrastructure and test diff --git a/tools/cloud-build/daily-tests/tests/packer.yml b/tools/cloud-build/daily-tests/tests/packer.yml index 32cd229e5b..f557f5b824 100644 --- a/tools/cloud-build/daily-tests/tests/packer.yml +++ b/tools/cloud-build/daily-tests/tests/packer.yml @@ -19,3 +19,4 @@ zone: us-central1-c workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/image-builder.yaml" blueprint_dir: image-builder +network: "{{ deployment_name }}-net" From 176ccf549b4f2eebda60ff9aabe8bab15e3d84cb Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 21 Jul 2022 17:54:03 -0700 Subject: [PATCH 24/45] Make packer integration test run in sequence instead of parallel --- tools/cloud-build/daily-tests/integration-group-3.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/cloud-build/daily-tests/integration-group-3.yaml b/tools/cloud-build/daily-tests/integration-group-3.yaml index 292c22af89..445dc870f9 100644 --- a/tools/cloud-build/daily-tests/integration-group-3.yaml +++ b/tools/cloud-build/daily-tests/integration-group-3.yaml @@ -16,10 +16,10 @@ # Test Packer image building and monitoring dashboard in parallel # ├── build_ghpc # └── fetch_builder -# └── packer (group 3) # └── monitoring (group 3) # └── omnia # └── lustre-new-vpc +# └── packer timeout: 14400s # 4hr @@ -110,8 +110,7 @@ steps: # image in it - id: packer waitFor: - - fetch_builder - - build_ghpc + - lustre-new-vpc name: >- us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/hpc-toolkit-builder entrypoint: /bin/bash From 39500727b92af8fc0fab08eb3286bc3c2c026c41 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 21 Jul 2022 17:55:36 -0700 Subject: [PATCH 25/45] Use unique network name in packer integration test --- .../daily-tests/ansible_playbooks/packer-integration-test.yml | 1 + tools/cloud-build/daily-tests/tests/packer.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml index f1ca91f6db..53c7033f75 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml @@ -29,6 +29,7 @@ ROOT_DIR: "{{ workspace }}" BLUEPRINT_DIR: "{{ blueprint_dir }}" DEPLOYMENT_NAME: "{{ deployment_name }}" + NETWORK: "{{ network }}" args: creates: "{{ workspace }}/{{ deployment_name }}.tgz" - name: Create Infrastructure and test diff --git a/tools/cloud-build/daily-tests/tests/packer.yml b/tools/cloud-build/daily-tests/tests/packer.yml index 32cd229e5b..f557f5b824 100644 --- a/tools/cloud-build/daily-tests/tests/packer.yml +++ b/tools/cloud-build/daily-tests/tests/packer.yml @@ -19,3 +19,4 @@ zone: us-central1-c workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/image-builder.yaml" blueprint_dir: image-builder +network: "{{ deployment_name }}-net" From 57f133d3414bed2c4c21938bd07bf1a79a907a3b Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Fri, 22 Jul 2022 10:20:44 -0700 Subject: [PATCH 26/45] Fix ansible-lint errors in spack test --- .../cloud-build/daily-tests/ansible_playbooks/test-spack.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml index 66adf38562..40b9038b20 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml @@ -28,10 +28,11 @@ when: startup_status['match_groups'][0] != "0" - name: Ensure spack is installed command: spack --version -- name: Ensure gromacs is installed - shell: spack load gromacs + changed_when: False - name: Test gromacs is available on compute nodes shell: | spack load gromacs srun -N 1 gmx_mpi -version sleep 120 + register: srun_gromacs + changed_when: srun_gromacs.rc == 0 From 748a2e8e808e51ba9b08962d1415e0757fc73956 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Jul 2022 22:52:56 -0700 Subject: [PATCH 27/45] Breakout startup wait to new file & update Batch test --- .../tasks/wait-for-startup-script.yml | 30 +++++++++++++++++++ .../test-batch-submission.yml | 16 ++++------ 2 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 tools/cloud-build/daily-tests/ansible_playbooks/tasks/wait-for-startup-script.yml diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/tasks/wait-for-startup-script.yml b/tools/cloud-build/daily-tests/ansible_playbooks/tasks/wait-for-startup-script.yml new file mode 100644 index 0000000000..3d23d0bbd6 --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/tasks/wait-for-startup-script.yml @@ -0,0 +1,30 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Assert variables are defined + ansible.builtin.assert: + that: + - vm_name is defined + +- name: Wait for startup script to complete + become: true + ansible.builtin.wait_for: + path: /var/log/messages + search_regex: '.*{{ vm_name }}.*startup-script exit status ([0-9]+)' + timeout: 600 + register: startup_status +- name: Fail if startup script exited with a non-zero return code + fail: + msg: There was a failure in the startup script + when: startup_status['match_groups'][0] != "0" diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml index 93d02a8a31..acfbaa5309 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml @@ -12,17 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -- name: Wait for startup script to complete - become: true - ansible.builtin.wait_for: - path: /var/log/messages - search_regex: '.*{{ remote_node }}.*startup-script exit status ([0-9]+)' - timeout: 600 - register: startup_status -- name: Fail if startup script exited with a non-zero return code - fail: - msg: There was a failure in the startup script - when: startup_status['match_groups'][0] != "0" +- name: Wait for startup script + ansible.builtin.include_tasks: "tasks/wait-for-startup-script.yml" + vars: + vm_name: "{{ remote_node }}" + - name: Batch Job Block block: - name: Submit batch job From 79c74178347583e951327c497a10a533575f14d1 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 21 Jul 2022 10:29:00 -0700 Subject: [PATCH 28/45] Reuse wait-for-startup-script in other integration tests --- .../tasks/wait-for-startup-script.yml | 3 ++- .../ansible_playbooks/test-batch-submission.yml | 3 ++- .../ansible_playbooks/test-monitoring.yml | 16 +++++----------- .../ansible_playbooks/test-spack.yml | 17 +++++------------ 4 files changed, 14 insertions(+), 25 deletions(-) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/tasks/wait-for-startup-script.yml b/tools/cloud-build/daily-tests/ansible_playbooks/tasks/wait-for-startup-script.yml index 3d23d0bbd6..b83360baa4 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/tasks/wait-for-startup-script.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/tasks/wait-for-startup-script.yml @@ -16,13 +16,14 @@ ansible.builtin.assert: that: - vm_name is defined + - timeout_seconds is defined - name: Wait for startup script to complete become: true ansible.builtin.wait_for: path: /var/log/messages search_regex: '.*{{ vm_name }}.*startup-script exit status ([0-9]+)' - timeout: 600 + timeout: "{{ timeout_seconds }}" register: startup_status - name: Fail if startup script exited with a non-zero return code fail: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml index acfbaa5309..ff7a2630c5 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-batch-submission.yml @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -- name: Wait for startup script +- name: Include wait for startup script ansible.builtin.include_tasks: "tasks/wait-for-startup-script.yml" vars: vm_name: "{{ remote_node }}" + timeout_seconds: 600 - name: Batch Job Block block: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml index ae5a0a9569..00d3ca7676 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-monitoring.yml @@ -14,17 +14,11 @@ --- -- name: Wait for startup script to complete - become: true - ansible.builtin.wait_for: - path: /var/log/messages - search_regex: '.*{{ remote_node }}.*startup-script exit status ([0-9]+)' - timeout: 600 - register: startup_status -- name: Fail if startup script exited with a non-zero return code - fail: - msg: There was a failure in the startup script - when: startup_status['match_groups'][0] != "0" +- name: Include wait for startup script + ansible.builtin.include_tasks: "tasks/wait-for-startup-script.yml" + vars: + vm_name: "{{ remote_node }}" + timeout_seconds: 600 - name: Gather service facts become: true ansible.builtin.service_facts: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml index 40b9038b20..e2a527ac6c 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-spack.yml @@ -14,18 +14,11 @@ --- -- name: Wait for startup script to complete - become: true - wait_for: - path: /var/log/messages - search_regex: '.*{{ login_node }}.*startup-script exit status ([0-9]+)' - timeout: 7200 - state: present - register: startup_status -- name: Fail if startup script exited with a non-zero return code - fail: - msg: There was a failure in the startup script - when: startup_status['match_groups'][0] != "0" +- name: Include wait for startup script + ansible.builtin.include_tasks: "tasks/wait-for-startup-script.yml" + vars: + vm_name: "{{ login_node }}" + timeout_seconds: 7200 - name: Ensure spack is installed command: spack --version changed_when: False From b85e70f67f8aa5bea5e9a48661a02280256c4a3f Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 21 Jul 2022 16:45:39 -0700 Subject: [PATCH 29/45] Move partitions and mounts into custom_vars --- .../ansible_playbooks/base-integration-test.yml | 2 -- .../ansible_playbooks/slurm-integration-test.yml | 3 +-- .../test-mounts-and-partitions.yml | 15 ++++++++++----- .../cloud-build/daily-tests/tests/hpc-high-io.yml | 15 ++++++++------- .../daily-tests/tests/lustre-new-vpc.yml | 11 ++++++----- .../daily-tests/tests/spack-gromacs.yml | 11 ++++++----- 6 files changed, 31 insertions(+), 26 deletions(-) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml index d9157ee1e0..6de2f6079a 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml @@ -161,8 +161,6 @@ vars: remote_node: "{{ remote_node }}" deployment_name: "{{ deployment_name }}" - mounts: "{{ mounts }}" - partitions: "{{ partitions }}" custom_vars: "{{ custom_vars }}" loop: "{{ post_deploy_tests }}" loop_control: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml index f13d873970..144b5a4b85 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml @@ -173,8 +173,7 @@ run_once: true vars: login_node: "{{ login_node }}" - mounts: "{{ mounts }}" - partitions: "{{ partitions }}" + custom_vars: "{{ custom_vars }}" loop: "{{ post_deploy_tests }}" loop_control: loop_var: test diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml index 69276e0440..1820ba7726 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml @@ -13,6 +13,11 @@ # limitations under the License. --- +- name: Assert variables are defined + ansible.builtin.assert: + that: + - custom_vars.partitions is defined + - custom_vars.mounts is defined - name: Get partition info ansible.builtin.command: sinfo --format='%P' --noheader @@ -32,12 +37,12 @@ ansible.builtin.fail: msg: Test Check Partitions failed when: item not in partition_output.stdout - loop: "{{ partitions }}" + loop: "{{ custom_vars.partitions }}" - name: Get mount info ansible.builtin.stat: path: "{{ item }}" register: stat_mounts - loop: "{{ mounts }}" + loop: "{{ custom_vars.mounts }}" - name: Check if mount exists ansible.builtin.fail: msg: "{{ item.item }} not mounted" @@ -46,13 +51,13 @@ - name: Test Mounts on partitions register: srun_mounts changed_when: srun_mounts.rc == 0 - ansible.builtin.command: "srun -N 1 ls -laF {{ mounts | join(' ') }}" - loop: "{{ partitions }}" + ansible.builtin.command: "srun -N 1 ls -laF {{ custom_vars.mounts | join(' ') }}" + loop: "{{ custom_vars.partitions }}" - name: Test partitions with hostname register: srun_hostname changed_when: srun_hostname.rc == 0 ansible.builtin.command: srun -N 2 --partition {{ item }} hostname - loop: "{{ partitions }}" + loop: "{{ custom_vars.partitions }}" - name: Ensure all nodes are powered down ansible.builtin.command: sinfo -t 'IDLE&POWERED_DOWN' --noheader --format "%n" register: final_node_count diff --git a/tools/cloud-build/daily-tests/tests/hpc-high-io.yml b/tools/cloud-build/daily-tests/tests/hpc-high-io.yml index ca1e10a3fd..024dd715a3 100644 --- a/tools/cloud-build/daily-tests/tests/hpc-high-io.yml +++ b/tools/cloud-build/daily-tests/tests/hpc-high-io.yml @@ -25,10 +25,11 @@ login_node: "slurm-{{ deployment_name }}-login0" controller_node: "slurm-{{ deployment_name }}-controller" post_deploy_tests: - test-mounts-and-partitions.yml -partitions: -- compute -- low_cost -mounts: -- /home -- /scratch -- /projects +custom_vars: + partitions: + - compute + - low_cost + mounts: + - /home + - /scratch + - /projects diff --git a/tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml b/tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml index bcbf3dd3be..de222408c5 100644 --- a/tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml +++ b/tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml @@ -25,8 +25,9 @@ login_node: "slurm-{{ deployment_name }}-login0" controller_node: "slurm-{{ deployment_name }}-controller" post_deploy_tests: - test-mounts-and-partitions.yml -partitions: -- compute -mounts: -- /home -- /scratch +custom_vars: + partitions: + - compute + mounts: + - /home + - /scratch diff --git a/tools/cloud-build/daily-tests/tests/spack-gromacs.yml b/tools/cloud-build/daily-tests/tests/spack-gromacs.yml index 7465007e66..ac40db6f29 100644 --- a/tools/cloud-build/daily-tests/tests/spack-gromacs.yml +++ b/tools/cloud-build/daily-tests/tests/spack-gromacs.yml @@ -25,8 +25,9 @@ login_node: slurm-{{ deployment_name }}-login0 controller_node: slurm-{{ deployment_name }}-controller post_deploy_tests: - test-spack.yml -partitions: -- compute -mounts: -- /home -- /sw +custom_vars: + partitions: + - compute + mounts: + - /home + - /sw From e0f6adc40bfc77cf56968a0329ec5cf67561e2f9 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 21 Jul 2022 16:46:16 -0700 Subject: [PATCH 30/45] Add variable assertion to hello world test --- .../daily-tests/ansible_playbooks/test-hello-world.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml index 7fdea9b2c5..61fdeaffe1 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-hello-world.yml @@ -14,6 +14,12 @@ # The hello world integration test exists to demonstrate the test interaction between test files --- +- name: Assert variables are defined + ansible.builtin.assert: + that: + - top_level_var + - custom_vars.bar is defined + - name: Print top_level_var debug: msg: "{{ top_level_var }}" From a10f76086500626bb142724091ccb51e20c522e0 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Fri, 22 Jul 2022 12:48:52 -0700 Subject: [PATCH 31/45] Breakout testing for mounts into its own file --- .../test-mounts-and-partitions.yml | 14 +++------ .../ansible_playbooks/test-mounts.yml | 30 +++++++++++++++++++ 2 files changed, 34 insertions(+), 10 deletions(-) create mode 100644 tools/cloud-build/daily-tests/ansible_playbooks/test-mounts.yml diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml index 1820ba7726..e3dc487eb3 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml @@ -19,6 +19,10 @@ - custom_vars.partitions is defined - custom_vars.mounts is defined +- name: Include test mounts + ansible.builtin.include_tasks: "test-mounts.yml" + vars: + custom_vars: "{{ custom_vars }}" - name: Get partition info ansible.builtin.command: sinfo --format='%P' --noheader changed_when: False @@ -38,16 +42,6 @@ msg: Test Check Partitions failed when: item not in partition_output.stdout loop: "{{ custom_vars.partitions }}" -- name: Get mount info - ansible.builtin.stat: - path: "{{ item }}" - register: stat_mounts - loop: "{{ custom_vars.mounts }}" -- name: Check if mount exists - ansible.builtin.fail: - msg: "{{ item.item }} not mounted" - when: not item.stat.exists - loop: "{{ stat_mounts.results }}" - name: Test Mounts on partitions register: srun_mounts changed_when: srun_mounts.rc == 0 diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts.yml new file mode 100644 index 0000000000..5b27b35840 --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts.yml @@ -0,0 +1,30 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +- name: Assert variables are defined + ansible.builtin.assert: + that: + - custom_vars.mounts is defined + +- name: Get mount info + ansible.builtin.stat: + path: "{{ item }}" + register: stat_mounts + loop: "{{ custom_vars.mounts }}" +- name: Check if mount exists + ansible.builtin.fail: + msg: "{{ item.item }} not mounted" + when: not item.stat.exists + loop: "{{ stat_mounts.results }}" From 10f4470014cbfe8bd0c916db99aa4044b4ae17be Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Fri, 22 Jul 2022 12:52:14 -0700 Subject: [PATCH 32/45] Add post deploy test-mounts to Batch integration test --- tools/cloud-build/daily-tests/tests/cloud-build.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/cloud-build/daily-tests/tests/cloud-build.yml b/tools/cloud-build/daily-tests/tests/cloud-build.yml index 43fbed2fb3..0bff9f22bb 100644 --- a/tools/cloud-build/daily-tests/tests/cloud-build.yml +++ b/tools/cloud-build/daily-tests/tests/cloud-build.yml @@ -19,6 +19,9 @@ blueprint_yaml: "{{ workspace }}/community/examples/cloud-batch.yaml" blueprint_dir: deployment-cloud-batch network: "default" remote_node: "{{ deployment_name }}-batch-login" -post_deploy_tests: [test-batch-submission.yml] +post_deploy_tests: +- test-batch-submission.yml +- test-mounts.yml custom_vars: project: "{{ project }}" + mounts: [/sw] From f8c2edcc3f1b5267303e88eb653242343a76c74e Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Fri, 22 Jul 2022 14:06:27 -0700 Subject: [PATCH 33/45] Address feedback --- .../{test-mounts-and-partitions.yml => test-partitions.yml} | 6 +----- tools/cloud-build/daily-tests/tests/hpc-high-io.yml | 3 ++- tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml | 3 ++- 3 files changed, 5 insertions(+), 7 deletions(-) rename tools/cloud-build/daily-tests/ansible_playbooks/{test-mounts-and-partitions.yml => test-partitions.yml} (92%) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-partitions.yml similarity index 92% rename from tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml rename to tools/cloud-build/daily-tests/ansible_playbooks/test-partitions.yml index e3dc487eb3..6a030ead71 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-mounts-and-partitions.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-partitions.yml @@ -19,10 +19,6 @@ - custom_vars.partitions is defined - custom_vars.mounts is defined -- name: Include test mounts - ansible.builtin.include_tasks: "test-mounts.yml" - vars: - custom_vars: "{{ custom_vars }}" - name: Get partition info ansible.builtin.command: sinfo --format='%P' --noheader changed_when: False @@ -37,7 +33,7 @@ executable: /bin/bash changed_when: False register: initial_node_count -- name: Check partition compute exists +- name: Check partitions exist ansible.builtin.fail: msg: Test Check Partitions failed when: item not in partition_output.stdout diff --git a/tools/cloud-build/daily-tests/tests/hpc-high-io.yml b/tools/cloud-build/daily-tests/tests/hpc-high-io.yml index 024dd715a3..0f3d9586fe 100644 --- a/tools/cloud-build/daily-tests/tests/hpc-high-io.yml +++ b/tools/cloud-build/daily-tests/tests/hpc-high-io.yml @@ -24,7 +24,8 @@ max_nodes: 5 login_node: "slurm-{{ deployment_name }}-login0" controller_node: "slurm-{{ deployment_name }}-controller" post_deploy_tests: -- test-mounts-and-partitions.yml +- test-mounts.yml +- test-partitions.yml custom_vars: partitions: - compute diff --git a/tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml b/tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml index de222408c5..4089c21f5c 100644 --- a/tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml +++ b/tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml @@ -24,7 +24,8 @@ max_nodes: 5 login_node: "slurm-{{ deployment_name }}-login0" controller_node: "slurm-{{ deployment_name }}-controller" post_deploy_tests: -- test-mounts-and-partitions.yml +- test-mounts.yml +- test-partitions.yml custom_vars: partitions: - compute From 678f54ae01a33b41fe6361f80a0fdfb4d0ecfe5e Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Mon, 25 Jul 2022 19:40:53 +0000 Subject: [PATCH 34/45] Update version of slurm-gcp-v5 modules to 5.0.3 --- .../modules/compute/schedmd-slurm-gcp-v5-partition/README.md | 2 +- .../modules/compute/schedmd-slurm-gcp-v5-partition/main.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v5-controller/README.md | 4 ++-- .../modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf | 4 ++-- .../modules/scheduler/schedmd-slurm-gcp-v5-login/README.md | 4 ++-- .../modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md index 07cecdece6..fddb6294c3 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md @@ -71,7 +71,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | v5.0.2 | +| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | v5.0.3 | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf index f31e32db45..51f8df77f4 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf @@ -64,7 +64,7 @@ locals { module "slurm_partition" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=v5.0.2" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=v5.0.3" slurm_cluster_name = var.slurm_cluster_name partition_nodes = local.partition_nodes diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md index c173fa4056..fdc7763e40 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md @@ -90,8 +90,8 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | v5.0.2 | -| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.0.2 | +| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | v5.0.3 | +| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.0.3 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf index 9b11b1f6b5..5fe4c151b9 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf @@ -26,7 +26,7 @@ locals { } module "slurm_controller_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=v5.0.2" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=v5.0.3" access_config = var.access_config slurm_cluster_name = var.slurm_cluster_name @@ -57,7 +57,7 @@ module "slurm_controller_instance" { } module "slurm_controller_template" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.0.2" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.0.3" additional_disks = var.additional_disks can_ip_forward = var.can_ip_forward diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md index 1c1e3e3b89..36ef18c41d 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md @@ -72,8 +72,8 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | v5.0.2 | -| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.0.2 | +| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | v5.0.3 | +| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | v5.0.3 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf index 2bbeb8f0a4..2918bb220a 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf @@ -22,7 +22,7 @@ locals { } module "slurm_login_template" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.0.2" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=v5.0.3" additional_disks = var.additional_disks can_ip_forward = var.can_ip_forward @@ -58,7 +58,7 @@ module "slurm_login_template" { } module "slurm_login_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=v5.0.2" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=v5.0.3" access_config = var.access_config slurm_cluster_name = var.slurm_cluster_name From 82c7512cd3fe34ddf6cde8fc1851d70afe9c8c9e Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Mon, 25 Jul 2022 19:43:38 +0000 Subject: [PATCH 35/45] Add passthrough variable disable_default_mounts Added a new variable to the slurm v5 controller modules for disabling the default mounts. --- .../schedmd-slurm-gcp-v5-controller/README.md | 1 + .../schedmd-slurm-gcp-v5-controller/main.tf | 1 + .../schedmd-slurm-gcp-v5-controller/variables.tf | 15 +++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md index fdc7763e40..33b1325970 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md @@ -109,6 +109,7 @@ No resources. | [cloudsql](#input\_cloudsql) | Use this database instead of the one on the controller.
server\_ip : Address of the database server.
user : The user to access the database as.
password : The password, given the user, to access the given database. (sensitive)
db\_name : The database to access. |
object({
server_ip = string
user = string
password = string # sensitive
db_name = string
})
| `null` | no | | [compute\_startup\_script](#input\_compute\_startup\_script) | Startup script used by the compute VMs. | `string` | `""` | no | | [controller\_startup\_script](#input\_controller\_startup\_script) | Startup script used by the controller VM. | `string` | `""` | no | +| [disable\_default\_mounts](#input\_disable\_default\_mounts) | Disable default global network storage from the controller
* /usr/local/etc/slurm
* /etc/munge
* /home
* /apps
If these are disabled, the slurm etc and munge dirs must be added manually,
or some other mechanism must be used to synchronize the slurm conf files
and the munge key across the cluster. | `bool` | `false` | no | | [disable\_smt](#input\_disable\_smt) | Disables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no | | [disk\_auto\_delete](#input\_disk\_auto\_delete) | Whether or not the boot disk should be auto-deleted. | `bool` | `true` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB. | `number` | `50` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf index 5fe4c151b9..fe20629a55 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf @@ -48,6 +48,7 @@ module "slurm_controller_instance" { enable_cleanup_subscriptions = var.enable_cleanup_subscriptions enable_bigquery_load = var.enable_bigquery_load epilog_scripts = var.epilog_scripts + disable_default_mounts = var.disable_default_mounts login_network_storage = var.network_storage network_storage = var.network_storage partitions = var.partition diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf index e96f5f9e4e..166e7ec21b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf @@ -95,6 +95,21 @@ variable "cgroup_conf_tpl" { default = null } +variable "disable_default_mounts" { + description = <<-EOD + Disable default global network storage from the controller + * /usr/local/etc/slurm + * /etc/munge + * /home + * /apps + If these are disabled, the slurm etc and munge dirs must be added manually, + or some other mechanism must be used to synchronize the slurm conf files + and the munge key across the cluster. + EOD + type = bool + default = false +} + variable "disable_smt" { type = bool description = "Disables Simultaneous Multi-Threading (SMT) on instance." From 4b1ea73e9f9ae827d911a64a0478929e9b25d680 Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Mon, 25 Jul 2022 20:02:55 +0000 Subject: [PATCH 36/45] Update source comment in slurm variables.tf Update version in origin comment (slurm-gcp) of the slurm-gcp-v5 modules variables.tf files. --- .../compute/schedmd-slurm-gcp-v5-partition/variables.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v5-controller/README.md | 2 +- .../schedmd-slurm-gcp-v5-controller/variables.tf | 9 ++++++--- .../scheduler/schedmd-slurm-gcp-v5-login/variables.tf | 3 +++ 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf index b3776e8c83..51fe22e77d 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.2 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3 variable "slurm_cluster_name" { type = string diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md index 33b1325970..5436ea3a73 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md @@ -109,7 +109,7 @@ No resources. | [cloudsql](#input\_cloudsql) | Use this database instead of the one on the controller.
server\_ip : Address of the database server.
user : The user to access the database as.
password : The password, given the user, to access the given database. (sensitive)
db\_name : The database to access. |
object({
server_ip = string
user = string
password = string # sensitive
db_name = string
})
| `null` | no | | [compute\_startup\_script](#input\_compute\_startup\_script) | Startup script used by the compute VMs. | `string` | `""` | no | | [controller\_startup\_script](#input\_controller\_startup\_script) | Startup script used by the controller VM. | `string` | `""` | no | -| [disable\_default\_mounts](#input\_disable\_default\_mounts) | Disable default global network storage from the controller
* /usr/local/etc/slurm
* /etc/munge
* /home
* /apps
If these are disabled, the slurm etc and munge dirs must be added manually,
or some other mechanism must be used to synchronize the slurm conf files
and the munge key across the cluster. | `bool` | `false` | no | +| [disable\_default\_mounts](#input\_disable\_default\_mounts) | Disable default global network storage from the controller
* /usr/local/etc/slurm
* /etc/munge
* /home
* /apps
Warning: If these are disabled, the slurm etc and munge dirs must be added
manually, or some other mechanism must be used to synchronize the slurm conf
files and the munge key across the cluster. | `bool` | `false` | no | | [disable\_smt](#input\_disable\_smt) | Disables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no | | [disk\_auto\_delete](#input\_disk\_auto\_delete) | Whether or not the boot disk should be auto-deleted. | `bool` | `true` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB. | `number` | `50` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf index 166e7ec21b..f0468ce549 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf @@ -14,6 +14,9 @@ * limitations under the License. */ +# Most variables have been sourced and modified from the SchedMD/slurm-gcp +# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3 + variable "access_config" { description = "Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet." type = list(object({ @@ -102,9 +105,9 @@ variable "disable_default_mounts" { * /etc/munge * /home * /apps - If these are disabled, the slurm etc and munge dirs must be added manually, - or some other mechanism must be used to synchronize the slurm conf files - and the munge key across the cluster. + Warning: If these are disabled, the slurm etc and munge dirs must be added + manually, or some other mechanism must be used to synchronize the slurm conf + files and the munge key across the cluster. EOD type = bool default = false diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf index 186191ea99..c692099010 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf @@ -14,6 +14,9 @@ * limitations under the License. */ +# Most variables have been sourced and modified from the SchedMD/slurm-gcp +# github repository: https://github.com/SchedMD/slurm-gcp/tree/v5.0.3 + variable "project_id" { type = string description = "Project ID to create resources in." From 34b32754f3bc16909256ea1eab596aaf72132d91 Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Mon, 25 Jul 2022 19:20:09 +0000 Subject: [PATCH 37/45] Update to version 1.2.0 --- cmd/root.go | 2 +- .../compute/SchedMD-slurm-on-gcp-partition/versions.tf | 2 +- .../modules/database/slurm-cloudsql-federation/versions.tf | 4 ++-- community/modules/file-system/nfs-server/versions.tf | 2 +- community/modules/project/service-enablement/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-controller/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf | 2 +- .../modules/scheduler/cloud-batch-login-node/versions.tf | 2 +- community/modules/scheduler/htcondor-configure/versions.tf | 2 +- community/modules/scripts/wait-for-startup/versions.tf | 2 +- modules/compute/vm-instance/versions.tf | 4 ++-- modules/file-system/filestore/versions.tf | 4 ++-- modules/monitoring/dashboard/versions.tf | 2 +- modules/network/pre-existing-vpc/versions.tf | 2 +- modules/scripts/startup-script/versions.tf | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index de906088b6..45745714d8 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`, log.Fatalf("cmd.Help function failed: %s", err) } }, - Version: "v1.1.2", + Version: "v1.2.0", } ) diff --git a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf index 18ff9a691a..3c43a282b6 100644 --- a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf +++ b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.2.0" } required_version = ">= 0.14.0" diff --git a/community/modules/database/slurm-cloudsql-federation/versions.tf b/community/modules/database/slurm-cloudsql-federation/versions.tf index bf92e98ffd..9c15840fa9 100644 --- a/community/modules/database/slurm-cloudsql-federation/versions.tf +++ b/community/modules/database/slurm-cloudsql-federation/versions.tf @@ -30,10 +30,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.0" } required_version = ">= 0.13.0" diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf index d58fc5ac84..bb521cecf8 100644 --- a/community/modules/file-system/nfs-server/versions.tf +++ b/community/modules/file-system/nfs-server/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.2.0" } required_version = ">= 0.14.0" diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf index def3e0665d..8ca5088432 100644 --- a/community/modules/project/service-enablement/versions.tf +++ b/community/modules/project/service-enablement/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.2.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf index 91aa6b23f2..9f19fea41f 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.2.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf index 43098f57d5..664438f369 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.2.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/cloud-batch-login-node/versions.tf b/community/modules/scheduler/cloud-batch-login-node/versions.tf index e4b5d4f4b2..2cef41ddfd 100644 --- a/community/modules/scheduler/cloud-batch-login-node/versions.tf +++ b/community/modules/scheduler/cloud-batch-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.2.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/htcondor-configure/versions.tf b/community/modules/scheduler/htcondor-configure/versions.tf index a99c5c55d7..4efef45073 100644 --- a/community/modules/scheduler/htcondor-configure/versions.tf +++ b/community/modules/scheduler/htcondor-configure/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.2.0" } required_version = ">= 0.13.0" diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf index 932fde8085..2bae52ebca 100644 --- a/community/modules/scripts/wait-for-startup/versions.tf +++ b/community/modules/scripts/wait-for-startup/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.2.0" } required_version = ">= 0.14.0" diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index efa0463b59..f791feaa2d 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -27,10 +27,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.2.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.2.0" } required_version = ">= 0.14.0" diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf index 51f1aff95a..8de9e822f9 100644 --- a/modules/file-system/filestore/versions.tf +++ b/modules/file-system/filestore/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.2.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.2.0" } required_version = ">= 0.14.0" diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf index d03e144c3b..3e2d80b5ba 100644 --- a/modules/monitoring/dashboard/versions.tf +++ b/modules/monitoring/dashboard/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.2.0" } required_version = ">= 0.14.0" diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf index 95a0225c71..dec2d29c0e 100644 --- a/modules/network/pre-existing-vpc/versions.tf +++ b/modules/network/pre-existing-vpc/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.2.0" } required_version = ">= 0.14.0" diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf index af809f2f6d..6b869ec30c 100644 --- a/modules/scripts/startup-script/versions.tf +++ b/modules/scripts/startup-script/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.1.2" + module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.2.0" } required_version = ">= 0.14.0" From d8bb4b2cbaa533b8e994bdfc8bdf30b778fe3911 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 25 Jul 2022 12:01:31 -0600 Subject: [PATCH 38/45] Updating DNN community module to Cloud 6.0.1 This is to include the fixes in https://github.com/DDNStorage/exascaler-cloud-terraform/issues/11 to remove the use of a deprecated terraform API which could cause some blocking issues on the newest versions of terraform Fixes #445 --- community/modules/file-system/DDN-EXAScaler/README.md | 2 +- community/modules/file-system/DDN-EXAScaler/main.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/community/modules/file-system/DDN-EXAScaler/README.md b/community/modules/file-system/DDN-EXAScaler/README.md index 97bed4a046..995538ece6 100644 --- a/community/modules/file-system/DDN-EXAScaler/README.md +++ b/community/modules/file-system/DDN-EXAScaler/README.md @@ -61,7 +61,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [ddn\_exascaler](#module\_ddn\_exascaler) | github.com/DDNStorage/exascaler-cloud-terraform//gcp | 76ab7fc | +| [ddn\_exascaler](#module\_ddn\_exascaler) | github.com/DDNStorage/exascaler-cloud-terraform//gcp | 3eec46e | ## Resources diff --git a/community/modules/file-system/DDN-EXAScaler/main.tf b/community/modules/file-system/DDN-EXAScaler/main.tf index 06eef0ddde..eeb9ecbd5e 100644 --- a/community/modules/file-system/DDN-EXAScaler/main.tf +++ b/community/modules/file-system/DDN-EXAScaler/main.tf @@ -36,7 +36,7 @@ locals { } module "ddn_exascaler" { - source = "github.com/DDNStorage/exascaler-cloud-terraform//gcp?ref=76ab7fc" + source = "github.com/DDNStorage/exascaler-cloud-terraform//gcp?ref=3eec46e" fsname = var.fsname zone = var.zone project = var.project_id From 0ebf697f8ffbc411a83544ae6fcfbd55992941e2 Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Wed, 27 Jul 2022 20:40:01 +0000 Subject: [PATCH 39/45] Fix typo in nfs-server mount playbook A space was included between jinja brackets ({ {) rather than after the brackets and before the variable name. --- community/modules/file-system/nfs-server/scripts/mount.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/modules/file-system/nfs-server/scripts/mount.yaml b/community/modules/file-system/nfs-server/scripts/mount.yaml index 2cb9d8ce58..b39a2f4adb 100644 --- a/community/modules/file-system/nfs-server/scripts/mount.yaml +++ b/community/modules/file-system/nfs-server/scripts/mount.yaml @@ -34,6 +34,6 @@ path: "{{ item.local_mount }}" opts: "{{ item.mount_options }}" boot: true - fstype: "{ {item.fs_type }}" + fstype: "{{ item.fs_type }}" state: "mounted" loop: "{{ storage.json }}" From c0145a151477847b6f6c8d291d7f5957e23c3bb4 Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Wed, 27 Jul 2022 21:53:56 +0000 Subject: [PATCH 40/45] Improve runtime of install-nfs-client runners Remove yum update and include `--disablerepo="*" --enablerepo="base,epel"` in yum install command. --- .../file-system/nfs-server/scripts/install-nfs-client.sh | 7 +++---- .../file-system/filestore/scripts/install-nfs-client.sh | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh b/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh index 1f3a40265b..a15fd1e461 100644 --- a/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh +++ b/community/modules/file-system/nfs-server/scripts/install-nfs-client.sh @@ -14,10 +14,9 @@ # limitations under the License. if [ ! "$(which mount.nfs)" ]; then - if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then - - yum -y update - yum install -y nfs-utils + if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || + [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then + yum install --disablerepo="*" --enablerepo="base,epel" -y nfs-utils elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then apt-get -y update apt-get -y install nfs-common diff --git a/modules/file-system/filestore/scripts/install-nfs-client.sh b/modules/file-system/filestore/scripts/install-nfs-client.sh index 1f3a40265b..a15fd1e461 100644 --- a/modules/file-system/filestore/scripts/install-nfs-client.sh +++ b/modules/file-system/filestore/scripts/install-nfs-client.sh @@ -14,10 +14,9 @@ # limitations under the License. if [ ! "$(which mount.nfs)" ]; then - if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then - - yum -y update - yum install -y nfs-utils + if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || + [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then + yum install --disablerepo="*" --enablerepo="base,epel" -y nfs-utils elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then apt-get -y update apt-get -y install nfs-common From a2b56e58494d1c670e959862015975dea3c82498 Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Fri, 29 Jul 2022 21:50:37 +0000 Subject: [PATCH 41/45] Filter for deployment name in TCP connections widget --- modules/monitoring/dashboard/dashboards/HPC.json.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/monitoring/dashboard/dashboards/HPC.json.tpl b/modules/monitoring/dashboard/dashboards/HPC.json.tpl index 9a9c668f50..489beb1958 100644 --- a/modules/monitoring/dashboard/dashboards/HPC.json.tpl +++ b/modules/monitoring/dashboard/dashboards/HPC.json.tpl @@ -513,7 +513,7 @@ "crossSeriesReducer": "REDUCE_NONE", "perSeriesAligner": "ALIGN_MEAN" }, - "filter": "metric.type=\"agent.googleapis.com/network/tcp_connections\"" + "filter": "metric.type=\"agent.googleapis.com/network/tcp_connections\" metadata.user_labels.\"ghpc_deployment\"=\"${deployment_name}\"" }, "unitOverride": "1" } From 6116e00b7867a42352242ef16145e70af96e6e20 Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Mon, 1 Aug 2022 16:41:57 +0000 Subject: [PATCH 42/45] Remove deprecated interpolation-only expression nfs-server module referred to a for loop variables as an interpolation only expression, which has been deprecated as of terraform 0.12.14. The terraform linter caught this after a recent update in the builder. --- community/modules/file-system/nfs-server/outputs.tf | 2 +- tools/validate_configs/test_configs/2-nfs-servers.yaml | 4 ++++ tools/validate_configs/test_configs/README.md | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/community/modules/file-system/nfs-server/outputs.tf b/community/modules/file-system/nfs-server/outputs.tf index 81633dd2ec..c0f16179be 100644 --- a/community/modules/file-system/nfs-server/outputs.tf +++ b/community/modules/file-system/nfs-server/outputs.tf @@ -18,7 +18,7 @@ output "network_storage" { description = "export of all desired folder directories" value = [for mount in var.local_mounts : { remote_mount = "/exports${mount}" - local_mount = "${mount}" + local_mount = mount fs_type = "nfs" mount_options = "defaults,hard,intr" server_ip = google_compute_instance.compute_instance.network_interface[0].network_ip diff --git a/tools/validate_configs/test_configs/2-nfs-servers.yaml b/tools/validate_configs/test_configs/2-nfs-servers.yaml index 471cde499e..f0fb6066ff 100644 --- a/tools/validate_configs/test_configs/2-nfs-servers.yaml +++ b/tools/validate_configs/test_configs/2-nfs-servers.yaml @@ -33,12 +33,16 @@ deployment_groups: kind: terraform id: homefs use: [network1] + outputs: [network_storage] settings: local_mounts: ["/home"] + auto_delete_disk: true - source: ./community/modules/file-system/nfs-server kind: terraform id: appsfs use: [network1] + outputs: [network_storage] settings: local_mounts: ["/apps"] + auto_delete_disk: true diff --git a/tools/validate_configs/test_configs/README.md b/tools/validate_configs/test_configs/README.md index 0b68b684a5..2b29a0dca5 100644 --- a/tools/validate_configs/test_configs/README.md +++ b/tools/validate_configs/test_configs/README.md @@ -8,6 +8,11 @@ verify a local `ghpc` build. ## Blueprint Descriptions +**2-nfs-servers.yaml**: Creates 2 NFS servers with different local mount points, +but otherwise the same variables. This test exists to ensure there will be no +naming collisions when more than one NFS server is created in a projects with +the same deployment name. + **hpc-cluster-simple.yaml**: Creates a simple cluster with a single compute VM, filestore as a /home directory and a network. This has been used as a demo blueprint when presenting the toolkit. From e2f32ad14de2be715518485dddf791c162cc6c4c Mon Sep 17 00:00:00 2001 From: Alex Heye Date: Mon, 1 Aug 2022 20:25:37 +0000 Subject: [PATCH 43/45] Update version to 1.3.0 --- cmd/root.go | 2 +- .../compute/SchedMD-slurm-on-gcp-partition/versions.tf | 2 +- .../modules/database/slurm-cloudsql-federation/versions.tf | 4 ++-- community/modules/file-system/nfs-server/versions.tf | 2 +- community/modules/project/service-enablement/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-controller/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf | 2 +- .../modules/scheduler/cloud-batch-login-node/versions.tf | 2 +- community/modules/scheduler/htcondor-configure/versions.tf | 2 +- community/modules/scripts/wait-for-startup/versions.tf | 2 +- modules/compute/vm-instance/versions.tf | 4 ++-- modules/file-system/filestore/versions.tf | 4 ++-- modules/monitoring/dashboard/versions.tf | 2 +- modules/network/pre-existing-vpc/versions.tf | 2 +- modules/scripts/startup-script/versions.tf | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 45745714d8..67073d912e 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`, log.Fatalf("cmd.Help function failed: %s", err) } }, - Version: "v1.2.0", + Version: "v1.3.0", } ) diff --git a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf index 3c43a282b6..ed0c14c6cb 100644 --- a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf +++ b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.3.0" } required_version = ">= 0.14.0" diff --git a/community/modules/database/slurm-cloudsql-federation/versions.tf b/community/modules/database/slurm-cloudsql-federation/versions.tf index 9c15840fa9..1ac1480e46 100644 --- a/community/modules/database/slurm-cloudsql-federation/versions.tf +++ b/community/modules/database/slurm-cloudsql-federation/versions.tf @@ -30,10 +30,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.3.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.3.0" } required_version = ">= 0.13.0" diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf index bb521cecf8..36b6144abc 100644 --- a/community/modules/file-system/nfs-server/versions.tf +++ b/community/modules/file-system/nfs-server/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.3.0" } required_version = ">= 0.14.0" diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf index 8ca5088432..16d9699e8f 100644 --- a/community/modules/project/service-enablement/versions.tf +++ b/community/modules/project/service-enablement/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.3.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf index 9f19fea41f..36750c362e 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.3.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf index 664438f369..1eacbe1e96 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.3.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/cloud-batch-login-node/versions.tf b/community/modules/scheduler/cloud-batch-login-node/versions.tf index 2cef41ddfd..ee89adf9c7 100644 --- a/community/modules/scheduler/cloud-batch-login-node/versions.tf +++ b/community/modules/scheduler/cloud-batch-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.3.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/htcondor-configure/versions.tf b/community/modules/scheduler/htcondor-configure/versions.tf index 4efef45073..8f044df3e8 100644 --- a/community/modules/scheduler/htcondor-configure/versions.tf +++ b/community/modules/scheduler/htcondor-configure/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.3.0" } required_version = ">= 0.13.0" diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf index 2bae52ebca..8364495d6a 100644 --- a/community/modules/scripts/wait-for-startup/versions.tf +++ b/community/modules/scripts/wait-for-startup/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.3.0" } required_version = ">= 0.14.0" diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index f791feaa2d..503e7e908b 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -27,10 +27,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.3.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.3.0" } required_version = ">= 0.14.0" diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf index 8de9e822f9..5e3b0feefb 100644 --- a/modules/file-system/filestore/versions.tf +++ b/modules/file-system/filestore/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.3.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.3.0" } required_version = ">= 0.14.0" diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf index 3e2d80b5ba..49d8cae75c 100644 --- a/modules/monitoring/dashboard/versions.tf +++ b/modules/monitoring/dashboard/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.3.0" } required_version = ">= 0.14.0" diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf index dec2d29c0e..0310cf4f00 100644 --- a/modules/network/pre-existing-vpc/versions.tf +++ b/modules/network/pre-existing-vpc/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.3.0" } required_version = ">= 0.14.0" diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf index 6b869ec30c..cea3ab22b0 100644 --- a/modules/scripts/startup-script/versions.tf +++ b/modules/scripts/startup-script/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.2.0" + module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.3.0" } required_version = ">= 0.14.0" From 81278122fc6ca71a7e10e96d313615acd7cb2ed5 Mon Sep 17 00:00:00 2001 From: Carlos Boneti Date: Mon, 8 Aug 2022 15:22:49 -0700 Subject: [PATCH 44/45] Fixing formatting in go files to pass weekly build. Changes to be committed: modified: ghpc.go modified: pkg/config/config.go modified: pkg/modulewriter/tfwriter.go --- ghpc.go | 2 +- pkg/config/config.go | 2 +- pkg/modulewriter/tfwriter.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ghpc.go b/ghpc.go index 029ccf3aa0..b4443c2642 100644 --- a/ghpc.go +++ b/ghpc.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - https://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/pkg/config/config.go b/pkg/config/config.go index e5d2091feb..af7a68e1b4 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -524,7 +524,7 @@ func ConvertMapToCty(iMap map[string]interface{}) (map[string]cty.Value, error) // corresponding entry in the origin. All other cty.Values are unmodified. // ERROR: if (somehow) the cty.String cannot be converted to a Go string // ERROR: rely on HCL TraverseAbs to bubble up "diagnostics" when the global -// variable being resolved does not exist in b.Vars +// variable being resolved does not exist in b.Vars func ResolveVariables( ctyMap map[string]cty.Value, origin map[string]cty.Value, diff --git a/pkg/modulewriter/tfwriter.go b/pkg/modulewriter/tfwriter.go index f884f6c053..29fc5d5032 100644 --- a/pkg/modulewriter/tfwriter.go +++ b/pkg/modulewriter/tfwriter.go @@ -371,7 +371,7 @@ func printTerraformInstructions(grpPath string) { // group in the provided deployment directory // depGroup: The deployment group that is being written // globalVars: The top-level variables, needed for writing terraform.tfvars and -// variables.tf +// variables.tf // groupDir: The path to the directory the resource group will be created in func (w TFWriter) writeDeploymentGroup( depGroup config.DeploymentGroup, From a99ddb5cf1c4bf8df2d16675ecfd480eeaf20ab7 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 10 Aug 2022 18:44:31 -0700 Subject: [PATCH 45/45] Roll back release version patch --- cmd/root.go | 2 +- .../compute/SchedMD-slurm-on-gcp-partition/versions.tf | 2 +- .../modules/database/slurm-cloudsql-federation/versions.tf | 4 ++-- community/modules/file-system/nfs-server/versions.tf | 2 +- community/modules/project/service-enablement/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-controller/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf | 2 +- .../modules/scheduler/cloud-batch-login-node/versions.tf | 2 +- community/modules/scheduler/htcondor-configure/versions.tf | 2 +- community/modules/scripts/wait-for-startup/versions.tf | 2 +- modules/compute/vm-instance/versions.tf | 4 ++-- modules/file-system/filestore/versions.tf | 4 ++-- modules/monitoring/dashboard/versions.tf | 2 +- modules/network/pre-existing-vpc/versions.tf | 2 +- modules/scripts/startup-script/versions.tf | 2 +- 15 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 67073d912e..afc7be6416 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`, log.Fatalf("cmd.Help function failed: %s", err) } }, - Version: "v1.3.0", + Version: "v1.2.1", } ) diff --git a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf index ed0c14c6cb..0ae83900a6 100644 --- a/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf +++ b/community/modules/compute/SchedMD-slurm-on-gcp-partition/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-partition/v1.2.1" } required_version = ">= 0.14.0" diff --git a/community/modules/database/slurm-cloudsql-federation/versions.tf b/community/modules/database/slurm-cloudsql-federation/versions.tf index 1ac1480e46..5782d929a0 100644 --- a/community/modules/database/slurm-cloudsql-federation/versions.tf +++ b/community/modules/database/slurm-cloudsql-federation/versions.tf @@ -30,10 +30,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.1" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.2.1" } required_version = ">= 0.13.0" diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf index 36b6144abc..8de1616d25 100644 --- a/community/modules/file-system/nfs-server/versions.tf +++ b/community/modules/file-system/nfs-server/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.2.1" } required_version = ">= 0.14.0" diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf index 16d9699e8f..f5d03aba8c 100644 --- a/community/modules/project/service-enablement/versions.tf +++ b/community/modules/project/service-enablement/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.2.1" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf index 36750c362e..23edf6d087 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.2.1" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf index 1eacbe1e96..57dd460e84 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.2.1" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/cloud-batch-login-node/versions.tf b/community/modules/scheduler/cloud-batch-login-node/versions.tf index ee89adf9c7..929ce06e7a 100644 --- a/community/modules/scheduler/cloud-batch-login-node/versions.tf +++ b/community/modules/scheduler/cloud-batch-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:cloud-batch-login-node/v1.2.1" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/htcondor-configure/versions.tf b/community/modules/scheduler/htcondor-configure/versions.tf index 8f044df3e8..a572164ca8 100644 --- a/community/modules/scheduler/htcondor-configure/versions.tf +++ b/community/modules/scheduler/htcondor-configure/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.2.1" } required_version = ">= 0.13.0" diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf index 8364495d6a..d2599aedf9 100644 --- a/community/modules/scripts/wait-for-startup/versions.tf +++ b/community/modules/scripts/wait-for-startup/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.2.1" } required_version = ">= 0.14.0" diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 503e7e908b..23c3da0346 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -27,10 +27,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.2.1" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.2.1" } required_version = ">= 0.14.0" diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf index 5e3b0feefb..46eda85e59 100644 --- a/modules/file-system/filestore/versions.tf +++ b/modules/file-system/filestore/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.2.1" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.2.1" } required_version = ">= 0.14.0" diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf index 49d8cae75c..586bc45e3d 100644 --- a/modules/monitoring/dashboard/versions.tf +++ b/modules/monitoring/dashboard/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.2.1" } required_version = ">= 0.14.0" diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf index 0310cf4f00..3a6883cf95 100644 --- a/modules/network/pre-existing-vpc/versions.tf +++ b/modules/network/pre-existing-vpc/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.2.1" } required_version = ">= 0.14.0" diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf index cea3ab22b0..983fc01845 100644 --- a/modules/scripts/startup-script/versions.tf +++ b/modules/scripts/startup-script/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.3.0" + module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.2.1" } required_version = ">= 0.14.0"