diff --git a/Makefile b/Makefile index 2b19fde9c5..51079dad71 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ MIN_GOLANG_VERSION=1.18 # for building ghpc terraform-format packer-format \ check-tflint check-pre-commit +SHELL=/bin/bash -o pipefail ENG = ./cmd/... ./pkg/... TERRAFORM_FOLDERS=$(shell find ./modules ./community/modules ./tools -type f -name "*.tf" -not -path '*/\.*' -exec dirname "{}" \; | sort -u) PACKER_FOLDERS=$(shell find ./modules ./community/modules ./tools -type f -name "*.pkr.hcl" -not -path '*/\.*' -exec dirname "{}" \; | sort -u) diff --git a/cmd/create.go b/cmd/create.go index 530d285779..5035f6bb72 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -23,7 +23,7 @@ import ( "hpc-toolkit/pkg/config" "hpc-toolkit/pkg/modulewriter" "log" - "os" + "path/filepath" "strings" "github.com/spf13/cobra" @@ -77,19 +77,27 @@ var ( func runCreateCmd(cmd *cobra.Command, args []string) { dc := expandOrDie(args[0]) - if err := modulewriter.WriteDeployment(dc, outputDir, overwriteDeployment); err != nil { - var target *modulewriter.OverwriteDeniedError - if errors.As(err, &target) { - fmt.Printf("\n%s\n", err.Error()) - os.Exit(1) - } else { - log.Fatal(err) - } - } + deplName, err := dc.Config.DeploymentName() + cobra.CheckErr(err) + deplDir := filepath.Join(outputDir, deplName) + cobra.CheckErr(modulewriter.WriteDeployment(dc, deplDir, overwriteDeployment)) + + fmt.Println("To deploy your infrastructure please run:") + fmt.Println() + fmt.Printf("./ghpc deploy %s\n", deplDir) + fmt.Println() + printAdvancedInstructionsMessage(deplDir) +} + +func printAdvancedInstructionsMessage(deplDir string) { + fmt.Println("Find instructions for cleanly destroying infrastructure and advanced manual") + fmt.Println("deployment instructions at:") + fmt.Println() + fmt.Printf("%s\n", modulewriter.InstructionsPath(deplDir)) } func expandOrDie(path string) config.DeploymentConfig { - dc, err := config.NewDeploymentConfig(path) + dc, ctx, err := config.NewDeploymentConfig(path) if err != nil { log.Fatal(err) } @@ -113,12 +121,30 @@ func expandOrDie(path string) config.DeploymentConfig { // Expand the blueprint if err := dc.ExpandConfig(); err != nil { - log.Fatal(err) + log.Fatal(renderError(err, ctx)) } return dc } +func renderError(err error, ctx config.YamlCtx) string { + var be config.BpError + if errors.As(err, &be) { + if pos, ok := ctx.Pos(be.Path); ok { + return renderRichError(be.Err, pos, ctx) + } + } + return err.Error() +} + +func renderRichError(err error, pos config.Pos, ctx config.YamlCtx) string { + return fmt.Sprintf(` +Error: %s +on line %d, column %d: +%d: %s +`, err, pos.Line, pos.Column, pos.Line, ctx.Lines[pos.Line-1]) +} + func setCLIVariables(bp *config.Blueprint, s []string) error { for _, cliVar := range s { arr := strings.SplitN(cliVar, "=", 2) diff --git a/cmd/create_test.go b/cmd/create_test.go index 3c4a0ae6be..9dc0ce78cd 100644 --- a/cmd/create_test.go +++ b/cmd/create_test.go @@ -15,6 +15,7 @@ package cmd import ( + "errors" "hpc-toolkit/pkg/config" "github.com/zclconf/go-cty/cty" @@ -128,3 +129,31 @@ func (s *MySuite) TestValidationLevels(c *C) { c.Check(setValidationLevel(&bp, "INVALID"), NotNil) } + +func (s *MySuite) TestRenderError(c *C) { + { // simple + err := errors.New("arbuz") + got := renderError(err, config.YamlCtx{}) + c.Check(got, Equals, "arbuz") + } + { // has pos, but context is missing + ctx := config.NewYamlCtx([]byte(``)) + pth := config.Root.Vars.Dot("kale") + err := config.BpError{Path: pth, Err: errors.New("arbuz")} + got := renderError(err, ctx) + c.Check(got, Equals, "vars.kale: arbuz") + } + { // has pos, has context + ctx := config.NewYamlCtx([]byte(` +vars: + kale: dos`)) + pth := config.Root.Vars.Dot("kale") + err := config.BpError{Path: pth, Err: errors.New("arbuz")} + got := renderError(err, ctx) + c.Check(got, Equals, ` +Error: arbuz +on line 3, column 9: +3: kale: dos +`) + } +} diff --git a/cmd/deploy.go b/cmd/deploy.go index 635e4291dd..7e6cf544f9 100644 --- a/cmd/deploy.go +++ b/cmd/deploy.go @@ -18,6 +18,7 @@ package cmd import ( "fmt" "hpc-toolkit/pkg/config" + "hpc-toolkit/pkg/modulewriter" "hpc-toolkit/pkg/shell" "log" "path/filepath" @@ -48,7 +49,7 @@ var ( Args: cobra.MatchAll(cobra.ExactArgs(1), checkDir), ValidArgsFunction: matchDirs, PreRunE: parseDeployArgs, - RunE: runDeployCmd, + Run: runDeployCmd, SilenceUsage: true, } ) @@ -72,40 +73,33 @@ func getApplyBehavior(autoApprove bool) shell.ApplyBehavior { return shell.PromptBeforeApply } -func runDeployCmd(cmd *cobra.Command, args []string) error { +func runDeployCmd(cmd *cobra.Command, args []string) { expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) - if err != nil { - return err - } - - if err := shell.ValidateDeploymentDirectory(dc.Config.DeploymentGroups, deploymentRoot); err != nil { - return err - } + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) + cobra.CheckErr(err) + cobra.CheckErr(shell.ValidateDeploymentDirectory(dc.Config.DeploymentGroups, deploymentRoot)) for _, group := range dc.Config.DeploymentGroups { groupDir := filepath.Join(deploymentRoot, string(group.Name)) - if err = shell.ImportInputs(groupDir, artifactsDir, expandedBlueprintFile); err != nil { - return err - } + cobra.CheckErr(shell.ImportInputs(groupDir, artifactsDir, expandedBlueprintFile)) var err error switch group.Kind { case config.PackerKind: // Packer groups are enforced to have length 1 - moduleDir := filepath.Join(groupDir, string(group.Modules[0].ID)) + subPath, e := modulewriter.DeploymentSource(group.Modules[0]) + cobra.CheckErr(e) + moduleDir := filepath.Join(groupDir, subPath) err = deployPackerGroup(moduleDir) case config.TerraformKind: err = deployTerraformGroup(groupDir) default: err = fmt.Errorf("group %s is an unsupported kind %s", groupDir, group.Kind.String()) } - if err != nil { - return err - } - + cobra.CheckErr(err) } - return nil + fmt.Println("\n###############################") + printAdvancedInstructionsMessage(deploymentRoot) } func deployPackerGroup(moduleDir string) error { diff --git a/cmd/destroy.go b/cmd/destroy.go index ddbfb7660a..fecc042a27 100644 --- a/cmd/destroy.go +++ b/cmd/destroy.go @@ -64,7 +64,7 @@ func parseDestroyArgs(cmd *cobra.Command, args []string) error { func runDestroyCmd(cmd *cobra.Command, args []string) error { expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) if err != nil { return err } diff --git a/cmd/export.go b/cmd/export.go index f5be025c08..4311f342d7 100644 --- a/cmd/export.go +++ b/cmd/export.go @@ -87,7 +87,7 @@ func runExportCmd(cmd *cobra.Command, args []string) error { } expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) if err != nil { return err } diff --git a/cmd/import.go b/cmd/import.go index 274438eebc..cf93fa65a6 100644 --- a/cmd/import.go +++ b/cmd/import.go @@ -51,7 +51,7 @@ func runImportCmd(cmd *cobra.Command, args []string) error { } expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) if err != nil { return err } diff --git a/cmd/root.go b/cmd/root.go index edc2ccc82a..1fd2ffd9ad 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -50,13 +50,16 @@ HPC deployments on the Google Cloud Platform.`, log.Fatalf("cmd.Help function failed: %s", err) } }, - Version: "v1.19.1", + Version: "v1.20.0", Annotations: annotation, } ) // Execute the root command func Execute() error { + // Don't prefix messages with data & time to improve readability. + // See https://pkg.go.dev/log#pkg-constants + log.SetFlags(0) mismatch, branch, hash, dir := checkGitHashMismatch() if mismatch { @@ -125,6 +128,9 @@ func hpcToolkitRepo() (repo *git.Repository, dir string, err error) { // found. If it's the hpc-toolkit repo, return it. // repo := new(git.Repository) dir, err = os.Getwd() + if err != nil { + return nil, "", err + } subdir := filepath.Dir(dir) o := git.PlainOpenOptions{DetectDotGit: true} repo, err = git.PlainOpenWithOptions(dir, &o) @@ -168,8 +174,5 @@ func hpcToolkitRepo() (repo *git.Repository, dir string, err error) { func isHpcToolkitRepo(r git.Repository) bool { h := plumbing.NewHash(GitInitialHash) _, err := r.CommitObject(h) - if err == nil { - return true - } - return false + return err == nil } diff --git a/cmd/root_test.go b/cmd/root_test.go index 08177873e1..3101b5dbb3 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -199,7 +199,7 @@ func checkPathsEqual(c *C, a, b string) { if err != nil { c.Fatal(err) } - b, err = filepath.EvalSymlinks(a) + b, err = filepath.EvalSymlinks(b) if err != nil { c.Fatal(err) } @@ -241,6 +241,9 @@ func initTestRepo(path string) (repo *git.Repository, initHash plumbing.Hash, er } initHash, err = commit("Init") + if err != nil { + return + } _, err = commit("Last") return } diff --git a/community/examples/hpc-slurm-chromedesktop.yaml b/community/examples/hpc-slurm-chromedesktop.yaml index b3a0ded462..959edfa77b 100644 --- a/community/examples/hpc-slurm-chromedesktop.yaml +++ b/community/examples/hpc-slurm-chromedesktop.yaml @@ -54,7 +54,7 @@ deployment_groups: disable_public_ips: false instance_image: family: slurm-gcp-5-7-debian-11 - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public guest_accelerator: - type: nvidia-tesla-t4-vws count: 1 diff --git a/community/examples/hpc-slurm-ubuntu2004.yaml b/community/examples/hpc-slurm-ubuntu2004.yaml index 8cb8abc9da..10754d2731 100644 --- a/community/examples/hpc-slurm-ubuntu2004.yaml +++ b/community/examples/hpc-slurm-ubuntu2004.yaml @@ -25,7 +25,7 @@ vars: # Please refer to the following link for the latest images: # https://github.com/SchedMD/slurm-gcp/blob/master/docs/images.md#supported-operating-systems family: slurm-gcp-5-7-ubuntu-2004-lts - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public deployment_groups: diff --git a/community/examples/htc-htcondor.yaml b/community/examples/htc-htcondor.yaml index f5460480da..5c3d477efa 100644 --- a/community/examples/htc-htcondor.yaml +++ b/community/examples/htc-htcondor.yaml @@ -17,15 +17,17 @@ blueprint_name: htc-htcondor vars: project_id: ## Set GCP Project ID Here ## - deployment_name: htcondor-001 + deployment_name: htcondor-pool region: us-central1 zone: us-central1-c + disk_size_gb: 100 + new_image_family: htcondor-10x # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md deployment_groups: -- group: htcondor +- group: primary modules: - id: network1 source: modules/network/vpc @@ -35,6 +37,28 @@ deployment_groups: - id: htcondor_install source: community/modules/scripts/htcondor-install + - id: htcondor_install_script + source: modules/scripts/startup-script + settings: + runners: + - $(htcondor_install.install_htcondor_runner) + - $(htcondor_install.install_autoscaler_deps_runner) + +- group: packer + modules: + - id: custom-image + source: modules/packer/custom-image + kind: packer + use: + - network1 + - htcondor_install_script + settings: + disk_size: $(vars.disk_size_gb) + source_image_family: hpc-rocky-linux-8 + image_family: $(vars.new_image_family) + +- group: pool + modules: - id: htcondor_configure source: community/modules/scheduler/htcondor-configure use: @@ -44,7 +68,6 @@ deployment_groups: source: modules/scripts/startup-script settings: runners: - - $(htcondor_install.install_htcondor_runner) - $(htcondor_configure.central_manager_runner) - id: htcondor_cm @@ -54,6 +77,9 @@ deployment_groups: - htcondor_startup_central_manager settings: name_prefix: cm + instance_image: + project: $(vars.project_id) + family: $(vars.new_image_family) add_deployment_name_before_prefix: true machine_type: c2-standard-4 disable_public_ips: true @@ -79,7 +105,6 @@ deployment_groups: source: modules/scripts/startup-script settings: runners: - - $(htcondor_install.install_htcondor_runner) - $(htcondor_configure.execute_point_runner) # the HTCondor modules support up to 2 execute points per blueprint @@ -91,6 +116,10 @@ deployment_groups: - network1 - htcondor_startup_execute_point settings: + instance_image: + project: $(vars.project_id) + family: $(vars.new_image_family) + min_idle: 2 service_account: email: $(htcondor_configure.execute_point_service_account) scopes: @@ -103,6 +132,9 @@ deployment_groups: - htcondor_startup_execute_point settings: spot: true + instance_image: + project: $(vars.project_id) + family: $(vars.new_image_family) service_account: email: $(htcondor_configure.execute_point_service_account) scopes: @@ -112,8 +144,6 @@ deployment_groups: source: modules/scripts/startup-script settings: runners: - - $(htcondor_install.install_htcondor_runner) - - $(htcondor_install.install_autoscaler_deps_runner) - $(htcondor_install.install_autoscaler_runner) - $(htcondor_configure.access_point_runner) - $(htcondor_execute_point.configure_autoscaler_runner) @@ -139,6 +169,9 @@ deployment_groups: - htcondor_startup_access_point settings: name_prefix: ap + instance_image: + project: $(vars.project_id) + family: $(vars.new_image_family) add_deployment_name_before_prefix: true machine_type: c2-standard-4 service_account: diff --git a/community/examples/intel/README.md b/community/examples/intel/README.md index a17223f30f..128f797ad3 100644 --- a/community/examples/intel/README.md +++ b/community/examples/intel/README.md @@ -185,42 +185,28 @@ terraform -chdir=hpc-intel-select/primary destroy ## DAOS Cluster The [pfs-daos.yaml](pfs-daos.yaml) blueprint describes an environment with -- A [managed instance group][mig] with four DAOS server instances -- A [managed instance group][mig] with two DAOS client instances +- Two DAOS server instances +- Two DAOS client instances -For more information, please refer to the [Google Cloud DAOS repo on GitHub][google-cloud-daos]. - -> **_NOTE:_** The [pre-deployment steps in the google-cloud-daos/README.md][pre-deployment] must be completed prior to running this HPC Toolkit example. - -[mig]: https://cloud.google.com/compute/docs/instance-groups -[google-cloud-daos]: https://github.com/daos-stack/google-cloud-daos -[pre-deployment]: https://github.com/daos-stack/google-cloud-daos#pre-deployment-steps +The [pfs-daos.yaml](pfs-daos.yaml) blueprint uses a Packer template and Terraform modules from the [Google Cloud DAOS][google-cloud-daos] repository. Identify a project to work in and substitute its unique id wherever you see `<>` in the instructions below. ### Initial Setup for DAOS Cluster -Before provisioning any infrastructure in this project you should follow the -Toolkit guidance to enable [APIs][apis] and establish minimum resource -[quotas][quotas]. In particular, the following APIs should be enabled +Before provisioning the DAOS cluster you must follow the steps listed in the [Google Cloud DAOS Pre-deployment Guide][pre-deployment_guide]. -- [compute.googleapis.com](https://cloud.google.com/compute/docs/reference/rest/v1#service:-compute.googleapis.com) (Google Compute Engine) -- [secretmanager.googleapis.com](https://cloud.google.com/secret-manager/docs/reference/rest#service:-secretmanager.googleapis.com) (Secret manager, for secure mode) +Skip the "Build DAOS Images" step at the end of the [Pre-deployment Guide][pre-deployment_guide]. The [pfs-daos.yaml](pfs-daos.yaml) blueprint will build the images as part of the deployment. -[apis]: ../../../README.md#enable-gcp-apis -[quotas]: ../../../README.md#gcp-quotas +The Pre-deployment Guide provides instructions for enabling service accounts, APIs, establishing minimum resource quotas and other necessary steps to prepare your project. -The following available quota is required in the region used by the cluster: - -- C2 CPUs: 32 (16 per client node) -- N2 CPUs: 144 (36 per server node) -- PD-SSD: 120GB (20GB per client and server) -- Local SSD: 4 \* 16 \* 375 = 24,000GB (6TB per server) +[google-cloud-daos]: https://github.com/daos-stack/google-cloud-daos +[pre-deployment_guide]: https://github.com/daos-stack/google-cloud-daos/blob/main/docs/pre-deployment_guide.md ### Deploy the DAOS Cluster -Use `ghpc` to provision the blueprint +After completing the steps in the [Pre-deployment Guide][pre-deployment_guide] use `ghpc` to provision the blueprint ```text ghpc create community/examples/intel/pfs-daos.yaml \ @@ -228,18 +214,15 @@ ghpc create community/examples/intel/pfs-daos.yaml \ [--backend-config bucket=] ``` -This will create a set of directories containing Terraform modules and Packer -templates. - -The `--backend-config` option is not required but recommended. It will save the terraform state in a pre-existing [Google Cloud Storage bucket][bucket]. For more information see [Setting up a remote terraform state][backend]. - -Follow `ghpc` instructions to deploy the environment +This will create the deployment directory containing Terraform modules and +Packer templates. The `--backend-config` option is not required but recommended. +It will save the terraform state in a pre-existing [Google Cloud Storage +bucket][bucket]. For more information see [Setting up a remote terraform +state][backend]. Use `ghpc deploy` to provision your DAOS storage cluster: - ```shell - terraform -chdir=pfs-daos/primary init - terraform -chdir=pfs-daos/primary validate - terraform -chdir=pfs-daos/primary apply - ``` +```text +ghpc deploy pfs-daos --auto-approve +``` [backend]: ../../../examples/README.md#optional-setting-up-a-remote-terraform-state [bucket]: https://cloud.google.com/storage/docs/creating-buckets @@ -366,9 +349,8 @@ The `cont1` container is now mounted on `${HOME}/daos/cont1` Create a 20GiB file which will be stored in the DAOS filesystem. ```bash -pushd ${HOME}/daos/cont1 time LD_PRELOAD=/usr/lib64/libioil.so \ -dd if=/dev/zero of=./test20GiB.img iflag=fullblock bs=1G count=20 +dd if=/dev/zero of="${HOME}/daos/cont1/test20GiB.img" iflag=fullblock bs=1G count=20 ``` See the [File System](https://docs.daos.io/v2.2/user/filesystem/) section of the DAOS User Guide for more information about DFuse. @@ -396,7 +378,7 @@ See the [DFuse (DAOS FUSE)](https://docs.daos.io/v2.2/user/filesystem/?h=dfuse#d Delete the remaining infrastructure ```shell -terraform -chdir=pfs-daos/primary destroy +ghpc destroy pfs-daos --auto-approve ``` ## DAOS Server with Slurm cluster @@ -409,39 +391,35 @@ The blueprint uses modules from - [community/modules/scheduler/SchedMD-slurm-on-gcp-login-node][SchedMD-slurm-on-gcp-login-node] - [community/modules/compute/SchedMD-slurm-on-gcp-partition][SchedMD-slurm-on-gcp-partition] -> **_NOTE:_** The [pre-deployment steps in the google-cloud-daos/README.md][pre-deployment] must be completed prior to running this HPC Toolkit example. - -[mig]: https://cloud.google.com/compute/docs/instance-groups -[google-cloud-daos]: https://github.com/daos-stack/google-cloud-daos -[pre-deployment]: https://github.com/daos-stack/google-cloud-daos#pre-deployment-steps -[apis]: ../../../README.md#enable-gcp-apis -[SchedMD-slurm-on-gcp-controller]: ../../modules/scheduler/SchedMD-slurm-on-gcp-controller -[SchedMD-slurm-on-gcp-login-node]: ../../modules/scheduler/SchedMD-slurm-on-gcp-login-node -[SchedMD-slurm-on-gcp-partition]: ../../modules/compute/SchedMD-slurm-on-gcp-partition +The blueprint also uses a Packer template from the [Google Cloud DAOS][google-cloud-daos] repository. Identify a project to work in and substitute its unique id wherever you see `<>` in the instructions below. ### Initial Setup for the DAOS/Slurm cluster -Before provisioning any infrastructure in this project you should follow the -Toolkit guidance to enable [APIs][apis] and establish minimum resource -[quotas][quotas]. In particular, the following APIs should be enabled +Before provisioning the DAOS cluster you must follow the steps listed in the [Google Cloud DAOS Pre-deployment Guide][pre-deployment_guide]. -- [compute.googleapis.com](https://cloud.google.com/compute/docs/reference/rest/v1#service:-compute.googleapis.com) (Google Compute Engine) -- [secretmanager.googleapis.com](https://cloud.google.com/secret-manager/docs/reference/rest#service:-secretmanager.googleapis.com) (Secret manager, for secure mode) +Skip the "Build DAOS Images" step at the end of the [Pre-deployment Guide][pre-deployment_guide]. The [hpc-slurm-daos.yaml](hpc-slurm-daos.yaml) blueprint will build the DAOS server image as part of the deployment. + +The Pre-deployment Guide provides instructions for enabling service accounts, APIs, establishing minimum resource quotas and other necessary steps to prepare your project for DAOS server deployment. + +[google-cloud-daos]: https://github.com/daos-stack/google-cloud-daos +[pre-deployment_guide]: https://github.com/daos-stack/google-cloud-daos/blob/main/docs/pre-deployment_guide.md +[packer-template]: https://github.com/daos-stack/google-cloud-daos/blob/main/images/daos.pkr.hcl [apis]: ../../../README.md#enable-gcp-apis -[quotas]: ../../../README.md#gcp-quotas +[SchedMD-slurm-on-gcp-controller]: ../../modules/scheduler/SchedMD-slurm-on-gcp-controller +[SchedMD-slurm-on-gcp-login-node]: ../../modules/scheduler/SchedMD-slurm-on-gcp-login-node +[SchedMD-slurm-on-gcp-partition]: ../../modules/compute/SchedMD-slurm-on-gcp-partition -And the following available quota is required in the region used by the cluster: +Follow the Toolkit guidance to enable [APIs][apis] and establish minimum resource [quotas][quotas] for Slurm. -For DAOS: -- N2 CPUs: 64 (16 per server node) -- PD-SSD: 80GB (20GB per server) -- Local SSD: 4 \* 4 \* 375 = 6,000GB (1.5TB per server) +[apis]: ../../../README.md#enable-gcp-apis +[quotas]: ../../../README.md#gcp-quotas + +The following available quota is required in the region used by Slurm: -For Slurm: - Filestore: 2560GB - C2 CPUs: 6000 (fully-scaled "compute" partition) - This quota is not necessary at initial deployment, but will be required to @@ -465,11 +443,9 @@ The `--backend-config` option is not required but recommended. It will save the Follow `ghpc` instructions to deploy the environment - ```shell - terraform -chdir=daos-slurm/primary init - terraform -chdir=daos-slurm/primary validate - terraform -chdir=daos-slurm/primary apply - ``` +```text +ghpc deploy daos-slurm --auto-approve +``` [backend]: ../../../examples/README.md#optional-setting-up-a-remote-terraform-state [bucket]: https://cloud.google.com/storage/docs/creating-buckets @@ -609,5 +585,5 @@ have been shutdown and deleted by the Slurm autoscaler. Delete the remaining infrastructure with `terraform`: ```shell -terraform -chdir=daos-slurm/primary destroy +ghpc destroy daos-slurm --auto-approve ``` diff --git a/community/examples/intel/hpc-slurm-daos.yaml b/community/examples/intel/hpc-slurm-daos.yaml index 1cdeaa4760..46b4f02dd7 100644 --- a/community/examples/intel/hpc-slurm-daos.yaml +++ b/community/examples/intel/hpc-slurm-daos.yaml @@ -21,10 +21,14 @@ vars: deployment_name: daos-slurm region: us-central1 zone: us-central1-c + server_image_family: daos-server-hpc-rocky-8 # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md +# Note: this blueprint assumes the existence of a default global network and +# subnetwork in the region chosen above + deployment_groups: - group: primary modules: @@ -37,18 +41,42 @@ deployment_groups: settings: local_mount: "/home" - # This module creates a DAOS server. Server images MUST be created before running this. - # https://github.com/daos-stack/google-cloud-daos/tree/main/images +- group: daos-server-image + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/images + - id: daos-server-image + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.1&depth=1 + kind: packer + settings: + daos_version: 2.2.0 + daos_repo_base_url: https://packages.daos.io + daos_packages_repo_file: EL8/packages/x86_64/daos_packages.repo + use_iap: true + enable_oslogin: false + machine_type: n2-standard-32 + source_image_family: hpc-rocky-linux-8 + source_image_project_id: cloud-hpc-image-public + image_guest_os_features: ["GVNIC"] + disk_size: "20" + state_timeout: "10m" + scopes: ["https://www.googleapis.com/auth/cloud-platform"] + use_internal_ip: true + omit_external_ip: false + daos_install_type: server + image_family: $(vars.server_image_family) + +- group: cluster + modules: # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server - id: daos - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.0 + source: github.com/daos-stack/google-cloud-daos//terraform/modules/daos_server?ref=v0.4.1&depth=1 use: [network1] settings: labels: {ghpc_role: file-system} # The default DAOS settings are optimized for TCO # The following will tune this system for best perf machine_type: "n2-standard-16" - os_disk_size_gb: 20 + os_family: $(vars.server_image_family) daos_disk_count: 4 daos_scm_size: 45 pools: diff --git a/community/examples/intel/pfs-daos.yaml b/community/examples/intel/pfs-daos.yaml index 00973524ba..648aba9403 100644 --- a/community/examples/intel/pfs-daos.yaml +++ b/community/examples/intel/pfs-daos.yaml @@ -21,32 +21,85 @@ vars: deployment_name: pfs-daos region: us-central1 zone: us-central1-c + server_image_family: daos-server-hpc-rocky-8 + client_image_family: daos-client-hpc-rocky-8 # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md +# Note: this blueprint assumes the existence of a default global network and +# subnetwork in the region chosen above + deployment_groups: - group: primary modules: - id: network1 source: modules/network/pre-existing-vpc - # This module creates a DAOS server. Server images MUST be created before running this. - # https://github.com/daos-stack/google-cloud-daos/tree/main/images - # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server +- group: daos-server-image + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/images + - id: daos-server-image + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.1&depth=1 + kind: packer + settings: + daos_version: 2.2.0 + daos_repo_base_url: https://packages.daos.io + daos_packages_repo_file: EL8/packages/x86_64/daos_packages.repo + use_iap: true + enable_oslogin: false + machine_type: n2-standard-32 + source_image_family: hpc-rocky-linux-8 + source_image_project_id: cloud-hpc-image-public + image_guest_os_features: ["GVNIC"] + disk_size: "20" + state_timeout: "10m" + scopes: ["https://www.googleapis.com/auth/cloud-platform"] + use_internal_ip: true + omit_external_ip: false + daos_install_type: server + image_family: $(vars.server_image_family) + +- group: daos-client-image + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/images + - id: daos-client-image + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.1&depth=1 + kind: packer + settings: + daos_version: 2.2.0 + daos_repo_base_url: https://packages.daos.io + daos_packages_repo_file: EL8/packages/x86_64/daos_packages.repo + use_iap: true + enable_oslogin: false + machine_type: n2-standard-32 + source_image_family: hpc-rocky-linux-8 + source_image_project_id: cloud-hpc-image-public + image_guest_os_features: ["GVNIC"] + disk_size: "20" + state_timeout: "10m" + scopes: ["https://www.googleapis.com/auth/cloud-platform"] + use_internal_ip: true + omit_external_ip: false + daos_install_type: client + image_family: $(vars.client_image_family) + +- group: daos-cluster + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/terraform/modules/daos_server - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.0 + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.1&depth=1 use: [network1] settings: number_of_instances: 2 labels: {ghpc_role: file-system} + os_family: $(vars.server_image_family) - # This module creates DAOS clients. Client images MUST be created before running this. - # https://github.com/daos-stack/google-cloud-daos/tree/main/images - # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_client + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/terraform/modules/daos_client - id: daos-client - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.4.0 + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.4.1&depth=1 use: [network1, daos-server] settings: number_of_instances: 2 labels: {ghpc_role: compute} + os_family: $(vars.client_image_family) diff --git a/community/examples/storage-gke.yaml b/community/examples/storage-gke.yaml new file mode 100644 index 0000000000..bc2f93ba64 --- /dev/null +++ b/community/examples/storage-gke.yaml @@ -0,0 +1,78 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +blueprint_name: storage-gke + +vars: + project_id: ## Set GCP Project ID Here ## + deployment_name: storage-gke-01 + region: us-central1 + zone: us-central1-c + + # Cidr block containing the IP of the machine calling terraform. + # The following line must be updated for this example to work. + authorized_cidr: /32 + +deployment_groups: +- group: primary + modules: + - id: network1 + source: modules/network/vpc + settings: + subnetwork_name: gke-subnet + secondary_ranges: + gke-subnet: + - range_name: pods + ip_cidr_range: 10.4.0.0/14 + - range_name: services + ip_cidr_range: 10.0.32.0/20 + + - id: gke_cluster + source: community/modules/scheduler/gke-cluster + use: [network1] + settings: + enable_filestore_csi: true + enable_private_endpoint: false # Allows for access from authorized public IPs + master_authorized_networks: + - display_name: deployment-machine + cidr_block: $(vars.authorized_cidr) + outputs: [instructions] + + - id: debug_pool + source: community/modules/compute/gke-node-pool + use: [gke_cluster] + settings: + zones: [$(vars.zone)] + machine_type: n2-standard-2 + + - id: sharedfs + source: modules/file-system/filestore + use: [network1] + settings: {local_mount: /shared} + + - id: sharedfs-pv + source: community/modules/file-system/gke-persistent-volume + use: [gke_cluster, sharedfs] + + - id: job-template + source: community/modules/compute/gke-job-template + use: [debug_pool, sharedfs-pv] + settings: + image: bash + command: + - bash + - -c + - echo \$(date) >> /shared/timestamp.log; cat /shared/timestamp.log + node_count: 3 + outputs: [instructions] diff --git a/community/front-end/ofe/requirements.txt b/community/front-end/ofe/requirements.txt index 0fe12c2c0e..ba99fa8c1e 100644 --- a/community/front-end/ofe/requirements.txt +++ b/community/front-end/ofe/requirements.txt @@ -1,64 +1,64 @@ -archspec==0.2.0 -argcomplete==3.0.5 -asgiref==3.6.0 -astroid==2.15.1 +archspec==0.2.1 +argcomplete==3.1.1 +asgiref==3.7.2 +astroid==2.15.5 backports.zoneinfo==0.2.1 cachetools==5.3.1 -certifi==2022.12.7 +certifi==2023.5.7 cffi==1.15.1 cfgv==3.3.1 charset-normalizer==3.1.0 click==8.1.3 -cryptography==41.0.0 +cryptography==41.0.1 decorator==5.1.1 defusedxml==0.7.1 dill==0.3.6 distlib==0.3.6 # django-revproxy==0.11.0 released but not yet in pypi git+https://github.com/jazzband/django-revproxy.git@d2234005135dc0771b7c4e0bb0465664ccfa5787 -Django==4.1.9 +Django==4.2.3 django-allauth==0.54.0 -django-extensions==3.2.1 +django-extensions==3.2.3 djangorestframework==3.14.0 -filelock==3.10.7 -google-api-core==2.11.0 -google-api-python-client==2.83.0 -google-auth==2.17.1 +filelock==3.12.2 +google-api-core==2.11.1 +google-api-python-client==2.90.0 +google-auth==2.20.0 google-auth-httplib2==0.1.0 -google-cloud-billing==1.10.1 +google-cloud-billing==1.11.0 google-cloud-core==2.3.2 -google-cloud-pubsub==2.15.2 +google-cloud-pubsub==2.17.1 google-cloud-storage==2.9.0 google-crc32c==1.5.0 -google-resumable-media==2.4.1 -googleapis-common-protos==1.59.0 +google-resumable-media==2.5.0 +googleapis-common-protos==1.59.1 grafana-api==1.0.3 grpc-google-iam-v1==0.12.6 -grpcio==1.53.0 -grpcio-status==1.53.0 +grpcio==1.56.0 +grpcio-status==1.56.0 h11==0.14.0 httplib2==0.22.0 -identify==2.5.22 +identify==2.5.24 idna==3.4 isort==5.12.0 lazy-object-proxy==1.9.0 -libcst==0.4.9 +libcst==1.0.1 mccabe==0.7.0 mypy-extensions==1.0.0 -nodeenv==1.7.0 +nodeenv==1.8.0 oauthlib==3.2.2 -platformdirs==3.2.0 -pre-commit==3.2.1 -proto-plus==1.22.2 -protobuf==4.22.3 +platformdirs==3.8.0 +pre-commit==3.3.3 +proto-plus==1.22.3 +protobuf==4.23.3 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 -PyJWT==2.6.0 -pylint==2.17.1 +PyJWT==2.7.0 +pylint==2.17.4 pylint-django==2.5.3 -pylint-plugin-utils==0.7 -pyparsing==3.0.9 +pylint-plugin-utils==0.8.2 +pyparsing==3.1.0 python3-openid==3.2.0 pytz==2023.3 PyYAML==6.0 @@ -67,18 +67,18 @@ requests-oauthlib==1.3.1 retry==0.9.2 rsa==4.9 semantic-version==2.10.0 -setuptools-rust==1.5.2 +setuptools-rust==1.6.0 six==1.16.0 sqlparse==0.4.4 toml==0.10.2 tomli==2.0.1 tomlkit==0.11.8 typing-inspect==0.9.0 -typing_extensions==4.5.0 +typing_extensions==4.6.3 uritemplate==4.1.1 -urllib3==2.0.2 -uvicorn==0.21.1 -virtualenv==20.21.0 +urllib3==1.26.16 +uvicorn==0.22.0 +virtualenv==20.23.1 wrapt==1.15.0 xmltodict==0.13.0 -yq==3.1.1 +yq==3.2.2 diff --git a/community/front-end/ofe/tf/README.md b/community/front-end/ofe/tf/README.md index f0c320cd96..faa58140ae 100644 --- a/community/front-end/ofe/tf/README.md +++ b/community/front-end/ofe/tf/README.md @@ -18,24 +18,24 @@ limitations under the License. | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 0.13 | -| [google](#requirement\_google) | ~> 3.0 | -| [google-beta](#requirement\_google-beta) | ~> 3.0 | -| [null](#requirement\_null) | >= 1.0 | +| [google](#requirement\_google) | ~> 4.0 | +| [google-beta](#requirement\_google-beta) | ~> 4.0 | +| [null](#requirement\_null) | ~> 3.0 | ## Providers | Name | Version | |------|---------| -| [google](#provider\_google) | ~> 3.0 | -| [null](#provider\_null) | >= 1.0 | +| [google](#provider\_google) | ~> 4.0 | +| [null](#provider\_null) | ~> 3.0 | ## Modules | Name | Source | Version | |------|--------|---------| -| [control\_bucket](#module\_control\_bucket) | terraform-google-modules/cloud-storage/google | ~> 2.2 | +| [control\_bucket](#module\_control\_bucket) | terraform-google-modules/cloud-storage/google | ~> 4.0 | | [network](#module\_network) | ./network | n/a | -| [pubsub](#module\_pubsub) | terraform-google-modules/pubsub/google | ~> 1.8 | +| [pubsub](#module\_pubsub) | terraform-google-modules/pubsub/google | ~> 5.0 | | [service\_account](#module\_service\_account) | terraform-google-modules/service-accounts/google | ~> 4.1 | ## Resources diff --git a/community/front-end/ofe/tf/main.tf b/community/front-end/ofe/tf/main.tf index 2efd9f7689..5954117432 100644 --- a/community/front-end/ofe/tf/main.tf +++ b/community/front-end/ofe/tf/main.tf @@ -64,7 +64,7 @@ module "service_account" { module "control_bucket" { source = "terraform-google-modules/cloud-storage/google" - version = "~> 2.2" + version = "~> 4.0" project_id = var.project_id names = ["storage"] @@ -109,7 +109,7 @@ resource "google_storage_bucket_object" "config_file" { module "pubsub" { source = "terraform-google-modules/pubsub/google" - version = "~> 1.8" + version = "~> 5.0" topic = var.deployment_name project_id = var.project_id diff --git a/community/front-end/ofe/tf/network/README.md b/community/front-end/ofe/tf/network/README.md index 94e1b6be47..ab79cd8e4e 100644 --- a/community/front-end/ofe/tf/network/README.md +++ b/community/front-end/ofe/tf/network/README.md @@ -18,13 +18,13 @@ limitations under the License. | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 0.13 | -| [google](#requirement\_google) | ~> 3.0 | +| [google](#requirement\_google) | ~> 4.0 | ## Providers | Name | Version | |------|---------| -| [google](#provider\_google) | ~> 3.0 | +| [google](#provider\_google) | ~> 4.0 | ## Modules diff --git a/community/front-end/ofe/tf/network/versions.tf b/community/front-end/ofe/tf/network/versions.tf index 6fdc70dca1..1e39af0ca1 100644 --- a/community/front-end/ofe/tf/network/versions.tf +++ b/community/front-end/ofe/tf/network/versions.tf @@ -20,7 +20,7 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 3.0" + version = "~> 4.0" } } } diff --git a/community/front-end/ofe/tf/versions.tf b/community/front-end/ofe/tf/versions.tf index 6151b7f184..e2b36c9849 100644 --- a/community/front-end/ofe/tf/versions.tf +++ b/community/front-end/ofe/tf/versions.tf @@ -20,14 +20,15 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 3.0" + version = "~> 4.0" } google-beta = { source = "hashicorp/google-beta" - version = "~> 3.0" + version = "~> 4.0" } null = { - version = ">= 1.0" + source = "hashicorp/null" + version = "~> 3.0" } } } diff --git a/community/modules/compute/gke-job-template/README.md b/community/modules/compute/gke-job-template/README.md index 706042f1b6..f3a0cfc8ca 100644 --- a/community/modules/compute/gke-job-template/README.md +++ b/community/modules/compute/gke-job-template/README.md @@ -95,6 +95,7 @@ No modules. | [node\_count](#input\_node\_count) | How many nodes the job should run in parallel. | `number` | `1` | no | | [node\_pool\_name](#input\_node\_pool\_name) | A list of node pool names on which to run the job. Can be populated via `use` feild. | `list(string)` | `[]` | no | | [node\_selectors](#input\_node\_selectors) | A list of node selectors to use to place the job. |
list(object({
key = string
value = string
}))
| `[]` | no | +| [persistent\_volume\_claims](#input\_persistent\_volume\_claims) | A list of objects that describes a k8s PVC that is to be used and mounted on the job. Generally supplied by the gke-persistent-volume module. |
list(object({
name = string
mount_path = string
mount_options = string
}))
| `[]` | no | | [random\_name\_sufix](#input\_random\_name\_sufix) | Appends a random suffix to the job name to avoid clashes. | `bool` | `true` | no | | [requested\_cpu\_per\_pod](#input\_requested\_cpu\_per\_pod) | The requested cpu per pod. If null, allocatable\_cpu\_per\_node will be used to claim whole nodes. If provided will override allocatable\_cpu\_per\_node. | `number` | `-1` | no | | [restart\_policy](#input\_restart\_policy) | Job restart policy. Only a RestartPolicy equal to `Never` or `OnFailure` is allowed. | `string` | `"Never"` | no | diff --git a/community/modules/compute/gke-job-template/main.tf b/community/modules/compute/gke-job-template/main.tf index 025b344aed..b7b719915e 100644 --- a/community/modules/compute/gke-job-template/main.tf +++ b/community/modules/compute/gke-job-template/main.tf @@ -38,36 +38,46 @@ locals { ) ) millicpu = floor(local.cpu_request * 1000) - should_request_cpu = local.millicpu >= 0 + cpu_request_string = local.millicpu >= 0 ? "${local.millicpu}m" : null full_node_request = local.min_allocatable_cpu >= 0 && var.requested_cpu_per_pod < 0 - should_request_gpu = alltrue(var.has_gpu) # arbitrarily, user can edit in template. # May come from node pool in future. - gpu_limit = 1 + gpu_limit_string = alltrue(var.has_gpu) ? "1" : null + + volumes = [for v in var.persistent_volume_claims : + { + name = "vol-${v.name}" + mount_path = v.mount_path + claim_name = v.name + } + ] suffix = var.random_name_sufix ? "-${random_id.resource_name_suffix.hex}" : "" + machine_family_node_selector = var.machine_family != null ? [{ + key = "cloud.google.com/machine-family" + value = var.machine_family + }] : [] + node_selectors = concat(local.machine_family_node_selector, var.node_selectors) job_template_contents = templatefile( "${path.module}/templates/gke-job-base.yaml.tftpl", { - name = var.name - suffix = local.suffix - image = var.image - command = var.command - node_count = var.node_count - machine_family = var.machine_family - node_pool_names = var.node_pool_name - node_selectors = var.node_selectors - should_request_cpu = local.should_request_cpu - full_node_request = local.full_node_request - millicpu_request = "${local.millicpu}m" - should_request_gpu = local.should_request_gpu - gpu_limit = local.gpu_limit - restart_policy = var.restart_policy - backoff_limit = var.backoff_limit - tolerations = distinct(var.tolerations) - labels = local.labels + name = var.name + suffix = local.suffix + image = var.image + command = var.command + node_count = var.node_count + node_pool_names = var.node_pool_name + node_selectors = local.node_selectors + full_node_request = local.full_node_request + cpu_request = local.cpu_request_string + gpu_limit = local.gpu_limit_string + restart_policy = var.restart_policy + backoff_limit = var.backoff_limit + tolerations = distinct(var.tolerations) + labels = local.labels + volumes = local.volumes } ) diff --git a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl index 4a8f21d079..8df480e7f2 100644 --- a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl +++ b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl @@ -25,13 +25,12 @@ spec: - ${node_pool} %{~ endfor ~} %{~ endif ~} + %{~ if length(node_selectors) > 0 ~} nodeSelector: - %{~ if machine_family != null ~} - cloud.google.com/machine-family: ${machine_family} - %{~ endif ~} - %{~ for key, val in node_selectors ~} - ${key}: ${val} + %{~ for selector in node_selectors ~} + ${selector.key}: ${selector.value} %{~ endfor ~} + %{~ endif ~} tolerations: %{~ for toleration in tolerations ~} - key: ${toleration.key} @@ -43,21 +42,36 @@ spec: - name: ${name}-container image: ${image} command: [%{~ for s in command ~}"${s}",%{~ endfor ~}] - %{~ if should_request_cpu || should_request_gpu ~} + %{~ if gpu_limit != null || cpu_request != null ~} resources: - %{~ if should_request_gpu ~} + %{~ if gpu_limit != null ~} limits: # GPUs should only be specified as limits # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/ nvidia.com/gpu: ${gpu_limit} %{~ endif ~} - %{~ if should_request_cpu ~} + %{~ if cpu_request != null ~} requests: %{~ if full_node_request ~} # cpu request attempts full node per pod %{~ endif ~} - cpu: ${millicpu_request} + cpu: ${cpu_request} %{~ endif ~} %{~ endif ~} + %{~ if length(volumes) > 0 ~} + volumeMounts: + %{~ for v in volumes ~} + - name: ${v.name} + mountPath: ${v.mount_path} + %{~ endfor ~} + %{~ endif ~} + %{~ if length(volumes) > 0 ~} + volumes: + %{~ for v in volumes ~} + - name: ${v.name} + persistentVolumeClaim: + claimName: ${v.claim_name} + %{~ endfor ~} + %{~ endif ~} restartPolicy: ${restart_policy} backoffLimit: ${backoff_limit} diff --git a/community/modules/compute/gke-job-template/variables.tf b/community/modules/compute/gke-job-template/variables.tf index 5bd34d2c1d..55fac4fee7 100644 --- a/community/modules/compute/gke-job-template/variables.tf +++ b/community/modules/compute/gke-job-template/variables.tf @@ -113,6 +113,16 @@ variable "random_name_sufix" { default = true } +variable "persistent_volume_claims" { + description = "A list of objects that describes a k8s PVC that is to be used and mounted on the job. Generally supplied by the gke-persistent-volume module." + type = list(object({ + name = string + mount_path = string + mount_options = string + })) + default = [] +} + variable "labels" { description = "Labels to add to the GKE job template. Key-value pairs." type = map(string) diff --git a/community/modules/compute/gke-node-pool/README.md b/community/modules/compute/gke-node-pool/README.md index ea8d952c4d..be38d67101 100644 --- a/community/modules/compute/gke-node-pool/README.md +++ b/community/modules/compute/gke-node-pool/README.md @@ -174,22 +174,30 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [auto\_upgrade](#input\_auto\_upgrade) | Whether the nodes will be automatically upgraded. | `bool` | `false` | no | +| [autoscaling\_total\_max\_nodes](#input\_autoscaling\_total\_max\_nodes) | Total maximum number of nodes in the NodePool. | `number` | `1000` | no | +| [autoscaling\_total\_min\_nodes](#input\_autoscaling\_total\_min\_nodes) | Total minimum number of nodes in the NodePool. | `number` | `0` | no | | [cluster\_id](#input\_cluster\_id) | projects/{{project}}/locations/{{location}}/clusters/{{cluster}} | `string` | n/a | yes | | [compact\_placement](#input\_compact\_placement) | Places node pool's nodes in a closer physical proximity in order to reduce network latency between nodes. | `bool` | `false` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Size of disk for each node. | `number` | `100` | no | | [disk\_type](#input\_disk\_type) | Disk type for each node. | `string` | `"pd-standard"` | no | +| [enable\_gcfs](#input\_enable\_gcfs) | Enable the Google Container Filesystem (GCFS). See [restrictions](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#gcfs_config). | `bool` | `false` | no | | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string
count = number
gpu_partition_size = string
gpu_sharing_config = list(object({
gpu_sharing_strategy = string
max_shared_clients_per_gpu = number
}))
}))
| `null` | no | | [image\_type](#input\_image\_type) | The default image type used by NAP once a new node pool is being created. Use either COS\_CONTAINERD or UBUNTU\_CONTAINERD. | `string` | `"COS_CONTAINERD"` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [machine\_type](#input\_machine\_type) | The name of a Google Compute Engine machine type. | `string` | `"c2-standard-60"` | no | | [name](#input\_name) | The name of the node pool. If left blank, will default to the machine type. | `string` | `null` | no | | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | Service account to use with the system node pool |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | +| [service\_account](#input\_service\_account) | DEPRECATED: use service\_account\_email and scopes. |
object({
email = string,
scopes = set(string)
})
| `null` | no | +| [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to use with the node pool | `string` | `null` | no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to to use with the node pool. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | +| [static\_node\_count](#input\_static\_node\_count) | The static number of nodes in the node pool. If set, autoscaling will be disabled. | `number` | `null` | no | | [taints](#input\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "user-workload",
"value": true
}
]
| no | | [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
to 2, Simultaneous Multithreading (SMT) is enabled extending the total number
of virtual cores. For example, a machine of type c2-standard-60 will have 60
virtual cores with threads\_per\_core equal to 2. With threads\_per\_core equal
to 1 (SMT turned off), only the 30 physical cores will be available on the VM.

The default value of \"0\" will turn off SMT for supported machine types, and
will fall back to GCE defaults for unsupported machine types (t2d, shared-core
instances, or instances with less than 2 vCPU).

Disabling SMT can be more performant in many HPC workloads, therefore it is
disabled by default where compatible.

null = SMT configuration will use the GCE defaults for the machine type
0 = SMT will be disabled where compatible (default)
1 = SMT will always be disabled (will fail on incompatible machine types)
2 = SMT will always be enabled (will fail on incompatible machine types) | `number` | `0` | no | -| [total\_max\_nodes](#input\_total\_max\_nodes) | Total maximum number of nodes in the NodePool. | `number` | `1000` | no | -| [total\_min\_nodes](#input\_total\_min\_nodes) | Total minimum number of nodes in the NodePool. | `number` | `0` | no | +| [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no | +| [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no | +| [total\_max\_nodes](#input\_total\_max\_nodes) | DEPRECATED: Use autoscaling\_total\_max\_nodes. | `number` | `null` | no | +| [total\_min\_nodes](#input\_total\_min\_nodes) | DEPRECATED: Use autoscaling\_total\_min\_nodes. | `number` | `null` | no | | [zones](#input\_zones) | A list of zones to be used. Zones must be in region of cluster. If null, cluster zones will be inherited. Note `zones` not `zone`; does not work with `zone` deployment variable. | `list(string)` | `null` | no | ## Outputs diff --git a/community/modules/compute/gke-node-pool/main.tf b/community/modules/compute/gke-node-pool/main.tf index 5dbff2f8ce..142dcfa208 100644 --- a/community/modules/compute/gke-node-pool/main.tf +++ b/community/modules/compute/gke-node-pool/main.tf @@ -20,7 +20,7 @@ locals { } locals { - sa_email = var.service_account.email != null ? var.service_account.email : data.google_compute_default_service_account.default_sa.email + sa_email = var.service_account_email != null ? var.service_account_email : data.google_compute_default_service_account.default_sa.email has_gpu = var.guest_accelerator != null || contains(["a2", "g2"], local.machine_family) gpu_taint = local.has_gpu ? [{ @@ -28,6 +28,9 @@ locals { value = "present" effect = "NO_SCHEDULE" }] : [] + + autoscale_set = var.autoscaling_total_min_nodes != 0 || var.autoscaling_total_max_nodes != 1000 + static_node_set = var.static_node_count != null } data "google_compute_default_service_account" "default_sa" { @@ -40,10 +43,15 @@ resource "google_container_node_pool" "node_pool" { name = var.name == null ? var.machine_type : var.name cluster = var.cluster_id node_locations = var.zones - autoscaling { - total_min_node_count = var.total_min_nodes - total_max_node_count = var.total_max_nodes - location_policy = "ANY" + + node_count = var.static_node_count + dynamic "autoscaling" { + for_each = local.static_node_set ? [] : [1] + content { + total_min_node_count = var.autoscaling_total_min_nodes + total_max_node_count = var.autoscaling_total_max_nodes + location_policy = "ANY" + } } management { @@ -68,8 +76,8 @@ resource "google_container_node_pool" "node_pool" { disk_size_gb = var.disk_size_gb disk_type = var.disk_type resource_labels = local.labels - service_account = var.service_account.email - oauth_scopes = var.service_account.scopes + service_account = var.service_account_email + oauth_scopes = var.service_account_scopes machine_type = var.machine_type spot = var.spot taint = concat(var.taints, local.gpu_taint) @@ -81,6 +89,13 @@ resource "google_container_node_pool" "node_pool" { enable_integrity_monitoring = true } + dynamic "gcfs_config" { + for_each = var.enable_gcfs ? [1] : [] + content { + enabled = true + } + } + gvnic { enabled = true } @@ -109,10 +124,19 @@ resource "google_container_node_pool" "node_pool" { } } + timeouts { + create = var.timeout_create + update = var.timeout_update + } + lifecycle { ignore_changes = [ node_config[0].labels, ] + precondition { + condition = !local.static_node_set || !local.autoscale_set + error_message = "static_node_count cannot be set with either autoscaling_total_min_nodes or autoscaling_total_max_nodes." + } } } diff --git a/community/modules/compute/gke-node-pool/variables.tf b/community/modules/compute/gke-node-pool/variables.tf index 343fbc002f..e7c164878b 100644 --- a/community/modules/compute/gke-node-pool/variables.tf +++ b/community/modules/compute/gke-node-pool/variables.tf @@ -54,6 +54,12 @@ variable "disk_type" { default = "pd-standard" } +variable "enable_gcfs" { + description = "Enable the Google Container Filesystem (GCFS). See [restrictions](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#gcfs_config)." + type = bool + default = false +} + variable "guest_accelerator" { description = "List of the type and count of accelerator cards attached to the instance." type = list(object({ @@ -74,19 +80,24 @@ variable "image_type" { default = "COS_CONTAINERD" } -# TODO -variable "total_min_nodes" { +variable "autoscaling_total_min_nodes" { description = "Total minimum number of nodes in the NodePool." type = number default = 0 } -variable "total_max_nodes" { +variable "autoscaling_total_max_nodes" { description = "Total maximum number of nodes in the NodePool." type = number default = 1000 } +variable "static_node_count" { + description = "The static number of nodes in the node pool. If set, autoscaling will be disabled." + type = number + default = null +} + variable "auto_upgrade" { description = "Whether the nodes will be automatically upgraded." type = bool @@ -134,16 +145,16 @@ variable "compact_placement" { default = false } -variable "service_account" { - description = "Service account to use with the system node pool" - type = object({ - email = string, - scopes = set(string) - }) - default = { - email = null - scopes = ["https://www.googleapis.com/auth/cloud-platform"] - } +variable "service_account_email" { + description = "Service account e-mail address to use with the node pool" + type = string + default = null +} + +variable "service_account_scopes" { + description = "Scopes to to use with the node pool." + type = set(string) + default = ["https://www.googleapis.com/auth/cloud-platform"] } variable "taints" { @@ -164,3 +175,50 @@ variable "labels" { description = "GCE resource labels to be applied to resources. Key-value pairs." type = map(string) } + +variable "timeout_create" { + description = "Timeout for creating a node pool" + type = string + default = null +} + +variable "timeout_update" { + description = "Timeout for updating a node pool" + type = string + default = null +} + +# Deprecated + +variable "total_min_nodes" { + description = "DEPRECATED: Use autoscaling_total_min_nodes." + type = number + default = null + validation { + condition = var.total_min_nodes == null + error_message = "total_min_nodes was renamed to autoscaling_total_min_nodes and is deprecated; use autoscaling_total_min_nodes" + } +} + +variable "total_max_nodes" { + description = "DEPRECATED: Use autoscaling_total_max_nodes." + type = number + default = null + validation { + condition = var.total_max_nodes == null + error_message = "total_max_nodes was renamed to autoscaling_total_max_nodes and is deprecated; use autoscaling_total_max_nodes" + } +} + +variable "service_account" { + description = "DEPRECATED: use service_account_email and scopes." + type = object({ + email = string, + scopes = set(string) + }) + default = null + validation { + condition = var.service_account == null + error_message = "service_account is deprecated and replaced with service_account_email and scopes." + } +} diff --git a/community/modules/compute/gke-node-pool/versions.tf b/community/modules/compute/gke-node-pool/versions.tf index 045878e228..b6d8925661 100644 --- a/community/modules/compute/gke-node-pool/versions.tf +++ b/community/modules/compute/gke-node-pool/versions.tf @@ -26,6 +26,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-node-pool/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:gke-node-pool/v1.20.0" } } diff --git a/community/modules/compute/htcondor-execute-point/README.md b/community/modules/compute/htcondor-execute-point/README.md index 723fe0bafd..68d327df4f 100644 --- a/community/modules/compute/htcondor-execute-point/README.md +++ b/community/modules/compute/htcondor-execute-point/README.md @@ -182,7 +182,7 @@ No resources. | [deployment\_name](#input\_deployment\_name) | HPC Toolkit deployment name. HTCondor cloud resource names will include this value. | `string` | n/a | yes | | [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB | `number` | `100` | no | | [enable\_oslogin](#input\_enable\_oslogin) | Enable or Disable OS Login with "ENABLE" or "DISABLE". Set to "INHERIT" to inherit project OS Login setting. | `string` | `"ENABLE"` | no | -| [image](#input\_image) | HTCondor execute point VM image |
object({
family = string,
project = string
})
|
{
"family": "hpc-centos-7",
"project": "cloud-hpc-image-public"
}
| no | +| [instance\_image](#input\_instance\_image) | HTCondor execute point VM image |
object({
family = string,
project = string
})
|
{
"family": "hpc-rocky-linux-8",
"project": "cloud-hpc-image-public"
}
| no | | [labels](#input\_labels) | Labels to add to HTConodr execute points | `map(string)` | n/a | yes | | [machine\_type](#input\_machine\_type) | Machine type to use for HTCondor execute points | `string` | `"n2-standard-4"` | no | | [max\_size](#input\_max\_size) | Maximum size of the HTCondor execute point pool. | `number` | `100` | no | diff --git a/community/modules/compute/htcondor-execute-point/main.tf b/community/modules/compute/htcondor-execute-point/main.tf index 37c0b5e5b1..091e329a8f 100644 --- a/community/modules/compute/htcondor-execute-point/main.tf +++ b/community/modules/compute/htcondor-execute-point/main.tf @@ -63,8 +63,8 @@ module "execute_point_instance_template" { preemptible = var.spot startup_script = var.startup_script metadata = local.metadata - source_image_family = var.image.family - source_image_project = var.image.project + source_image_family = var.instance_image.family + source_image_project = var.instance_image.project } module "mig" { diff --git a/community/modules/compute/htcondor-execute-point/variables.tf b/community/modules/compute/htcondor-execute-point/variables.tf index 683c89f4d6..f1a21bb2c7 100644 --- a/community/modules/compute/htcondor-execute-point/variables.tf +++ b/community/modules/compute/htcondor-execute-point/variables.tf @@ -65,14 +65,14 @@ variable "network_storage" { default = [] } -variable "image" { +variable "instance_image" { description = "HTCondor execute point VM image" type = object({ family = string, project = string }) default = { - family = "hpc-centos-7" + family = "hpc-rocky-linux-8" project = "cloud-hpc-image-public" } } diff --git a/community/modules/compute/pbspro-execution/README.md b/community/modules/compute/pbspro-execution/README.md index 4d0952accb..d01233508c 100644 --- a/community/modules/compute/pbspro-execution/README.md +++ b/community/modules/compute/pbspro-execution/README.md @@ -74,7 +74,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [execution\_startup\_script](#module\_execution\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [execution\_startup\_script](#module\_execution\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | | [pbs\_execution](#module\_pbs\_execution) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance | 6c6b9e0a | | [pbs\_install](#module\_pbs\_install) | github.com/GoogleCloudPlatform/hpc-toolkit//community/modules/scripts/pbspro-install | 6c6b9e0a | diff --git a/community/modules/compute/pbspro-execution/main.tf b/community/modules/compute/pbspro-execution/main.tf index a15cb90583..4d7c6a6d88 100644 --- a/community/modules/compute/pbspro-execution/main.tf +++ b/community/modules/compute/pbspro-execution/main.tf @@ -53,7 +53,7 @@ module "pbs_install" { } module "execution_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" deployment_name = var.deployment_name project_id = var.project_id diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md index f455751776..e6f582ba70 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md @@ -119,7 +119,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which the node group instances can be accessed via the internet. |
list(object({
network_tier = string
}))
| `[]` | no | -| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. |
list(object({
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | +| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. (do not use "disk\_type: local-ssd"; known issue being addressed) |
list(object({
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | | [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example. | `bool` | `false` | no | | [disable\_public\_ips](#input\_disable\_public\_ips) | If set to false. The node group VMs will have a random public IP assigned to it. Ignored if access\_config is set. | `bool` | `true` | no | @@ -134,7 +134,7 @@ No modules. | [enable\_spot\_vm](#input\_enable\_spot\_vm) | Enable the partition to use spot VMs (https://cloud.google.com/spot-vms). | `bool` | `false` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See
https://cloud.google.com/compute/docs/gpus more details.
- type : the GPU type, e.g. nvidia-tesla-t4, nvidia-a100-80gb, nvidia-tesla-a100, etc
- count : number of GPUs

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
object({
count = number,
type = string
})
| `null` | no | | [guest\_accelerator](#input\_guest\_accelerator) | Alternative method of providing 'var.gpu' with a consistent naming scheme to
other HPC Toolkit modules.

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
list(object({
type = string,
count = number
}))
| `null` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the node group VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "projects/schedmd-slurm-public/global/images/family"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the node group VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "schedmd-slurm-public"
}
| no | | [instance\_template](#input\_instance\_template) | Self link to a custom instance template. If set, other VM definition
variables such as machine\_type and instance\_image will be ignored in favor
of the provided instance template.

For more information on creating custom images for the instance template
that comply with Slurm on GCP see the "Slurm on GCP Custom Images" section
in docs/vm-images.md. | `string` | `null` | no | | [labels](#input\_labels) | Labels to add to partition compute instances. Key-value pairs. | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Compute Platform machine type to use for this partition compute nodes. | `string` | `"c2-standard-60"` | no | diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf index c70e26a501..601f64df15 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf @@ -20,14 +20,6 @@ locals { } locals { - - # Handle VM image format from 2 sources, prioritize source_image* variables - # over instance_image - source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" - source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") - source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") - source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") - enable_public_ip_access_config = var.disable_public_ips ? [] : [{ nat_ip = null, network_tier = null }] access_config = length(var.access_config) == 0 ? local.enable_public_ip_access_config : var.access_config @@ -70,9 +62,9 @@ locals { on_host_maintenance = var.on_host_maintenance preemptible = var.preemptible shielded_instance_config = var.shielded_instance_config - source_image_family = local.source_image_family - source_image_project = local.source_image_project - source_image = local.source_image + source_image_family = local.source_image_family # requires source_image_logic.tf + source_image_project = local.source_image_project_normalized # requires source_image_logic.tf + source_image = local.source_image # requires source_image_logic.tf tags = var.tags access_config = local.access_config service_account = var.service_account != null ? var.service_account : { diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf new file mode 100644 index 0000000000..8e5a1ea5ec --- /dev/null +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf @@ -0,0 +1,29 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + # Handle VM image format from 2 sources, prioritize source_image* variables + # over instance_image + source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" + source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") + source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") + source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") + source_image_project_normalized = ( + local.source_image != "" || length(regexall("/", local.source_image_project)) > 0 + ? local.source_image_project + : "projects/${local.source_image_project}/global/images/family" + ) +} diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf index fd8bd310bf..64703bb0dc 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "project_id" { description = "Project in which the HPC deployment will be created." @@ -98,7 +98,7 @@ variable "instance_image" { type = map(string) default = { family = "slurm-gcp-5-7-hpc-centos-7" - project = "projects/schedmd-slurm-public/global/images/family" + project = "schedmd-slurm-public" } validation { @@ -166,7 +166,7 @@ variable "disk_labels" { } variable "additional_disks" { - description = "Configurations of additional disks to be included on the partition nodes." + description = "Configurations of additional disks to be included on the partition nodes. (do not use \"disk_type: local-ssd\"; known issue being addressed)" type = list(object({ disk_name = string device_name = string diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/versions.tf index 1a49d06801..ad2ac86f04 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-node-group/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-node-group/v1.20.0" } required_version = ">= 0.13.0" } diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md index 4476baf44e..cad8fc0697 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md @@ -69,7 +69,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | 5.7.3 | +| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | 5.7.4 | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf index 9c5c707f50..554a4da771 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf @@ -16,8 +16,11 @@ */ locals { - # Default to value in partition_conf if both set "Default" - partition_conf = merge(var.is_default == true ? { "Default" : "YES" } : {}, var.partition_conf) + # Default to value in partition_conf if both set the same key + partition_conf = merge({ + "Default" = var.is_default ? "YES" : null, + "SuspendTime" = "INFINITE" + }, var.partition_conf) # Since deployment name may be used to create a cluster name, we remove any invalid character from the beginning # Also, slurm imposed a lot of restrictions to this name, so we format it to an acceptable string @@ -26,7 +29,7 @@ locals { } module "slurm_partition" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=5.7.4" slurm_cluster_name = local.slurm_cluster_name enable_job_exclusive = var.exclusive diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/variables.tf index f4fbe5116b..cfe375d34f 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "deployment_name" { description = "Name of the deployment." diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md index 866c63ab9a..7c91f2d941 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md @@ -146,7 +146,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | 5.7.3 | +| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | 5.7.4 | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf index b34ee29a9c..4e60ba24d2 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf @@ -38,7 +38,7 @@ data "google_compute_zones" "available" { } module "slurm_partition" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=5.7.4" slurm_cluster_name = local.slurm_cluster_name partition_nodes = var.node_groups diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf index 5ba1e8a7dd..8c8fadbe92 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "deployment_name" { description = "Name of the deployment." diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/versions.tf index 319382700d..7905f526f5 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-partition/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-partition/v1.20.0" } required_version = ">= 0.13.0" } diff --git a/community/modules/database/slurm-cloudsql-federation/versions.tf b/community/modules/database/slurm-cloudsql-federation/versions.tf index 2a247f4e0b..25f1c7a332 100644 --- a/community/modules/database/slurm-cloudsql-federation/versions.tf +++ b/community/modules/database/slurm-cloudsql-federation/versions.tf @@ -30,10 +30,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.20.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.20.0" } required_version = ">= 0.13.0" diff --git a/community/modules/file-system/Intel-DAOS/README.md b/community/modules/file-system/Intel-DAOS/README.md index 4875f2c656..f4b5370740 100644 --- a/community/modules/file-system/Intel-DAOS/README.md +++ b/community/modules/file-system/Intel-DAOS/README.md @@ -2,99 +2,64 @@ This module allows creating an instance of Distributed Asynchronous Object Storage ([DAOS](https://docs.daos.io/)) on Google Cloud Platform ([GCP](https://cloud.google.com/)). -For more information, please refer to the [Google Cloud DAOS repo on GitHub](https://github.com/daos-stack/google-cloud-daos). +> **_NOTE:_** +> DAOS on GCP does not require an HPC Toolkit wrapper. +> Terraform modules are sourced directly from GitHub. +> It will not work as a [local or embedded module](../../../../modules/README.md#embedded-modules). -For more information on this and other network storage options in the Cloud HPC -Toolkit, see the extended [Network Storage documentation](../../../../docs/network_storage.md). +Terraform modules for DAOS servers and clients are located in the [Google Cloud DAOS repo on GitHub](https://github.com/daos-stack/google-cloud-daos). -> **_NOTE:_** DAOS on GCP does not require an HPC Toolkit wrapper and, therefore, sources directly from GitHub. It will not work as a [local or embedded module](../../../../modules/README.md#embedded-modules). +DAOS Terraform module parameters can be found in the README.md files in each module directory. -## Examples - -Working examples of a DAOS deployment and how it can be used in conjunction with Slurm [can be found in the community examples folder](../../../examples/intel/). - -A full list of server module parameters can be found at [the DAOS Server module README](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server). - -### DAOS Server Images - -In order to use the DAOS server terraform module a DAOS server image must be created as instructed in the *images* directory [here](https://github.com/daos-stack/google-cloud-daos/tree/main/images). - -DAOS server images must be built from the same tagged version of the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository that is specified in the `source:` attribute for modules used in the [community examples](../../../examples/intel/). - -For example, in the following snippet taken from the [community/example/intel/pfs-daos.yml](../../../examples/intel/pfs-daos.yaml) the `source:` attribute specifies v0.3.0 of the daos_server terraform module +- [DAOS Server module](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server#readme) +- [DAOS Client module](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_client#readme) -```yaml - - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.3.0 - use: [network1] - settings: - number_of_instances: 2 - labels: {ghpc_role: file-system} -``` - -In order to use the daos_server module v0.3.0 , you need to - -1. Clone the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repo and check out v0.3.0 -2. Follow the instructions in the images/README.md directory to build a DAOS server image - -## Recommended settings - -By default, the DAOS system is created with 4 servers will be configured for best cost per GB (TCO, see below), the system will be formated at the server side using [`dmg format`](https://github.com/daos-stack/google-cloud-daos/tree/develop/terraform/examples/daos_cluster#format-storage) but no pool or containers will be created. +For more information on this and other network storage options in the Cloud HPC Toolkit, see the extended [Network Storage documentation](../../../../docs/network_storage.md). +## Examples -The following settings will configure this [system for TCO](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/examples/daos_cluster#the-terraformtfvarstcoexample-file) (default): +The [community examples folder](../../../examples/intel/) contains two example blueprints for deploying DAOS. -```yaml - - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.3.0 - use: [network1] - settings: - labels: {ghpc_role: file-system} - number_of_instances : 4 # number of DAOS server instances - machine_type : "n2-custom-36-215040" - os_disk_size_gb : 20 - daos_disk_count : 16 - daos_scm_size : 180 -``` +- [community/examples/intel/pfs-daos.yml](../../../examples/intel/pfs-daos.yml) + Blueprint for deploying a DAOS cluster consisting of servers and clients. + After deploying this example the DAOS storage system will be formatted but no pools or containers will exist. + The instructions in the [community/examples/intel/README.md](../../../examples/intel/README.md#create-a-daos-pool-and-container) describe how to -The following settings will configure this system for [best performance](https://github.com/daos-stack/google-cloud-daos/tree/develop/terraform/examples/daos_cluster#the-terraformtfvarsperfexample-file): + - Deploy a DAOS cluster + - Manage storage (create a [pool](https://docs.daos.io/v2.2/overview/storage/?h=container#daos-pool) and a [container](https://docs.daos.io/v2.2/overview/storage/?h=container#daos-container)) + - Mount a container on a client + - Store a large file in a DAOS container -```yaml - - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.3.0 - use: [network1] - settings: - labels: {ghpc_role: file-system} - # The default DAOS settings are optimized for TCO - # The following will tune this system for best perf - machine_type : "n2-standard-16" - os_disk_size_gb : 20 - daos_disk_count : 4 - daos_scm_size : 45 -``` +- [community/examples/intel/hpc-slurm-daos.yaml](../../../examples/intel/hpc-slurm-daos.yaml) + Blueprint for deploying a Slurm cluster and DAOS storage with 4 servers. + The Slurm compute nodes are configured as DAOS clients and have the ability to use the DAOS filesystem. + The instructions in the [community/examples/intel/README.md](../../../examples/intel/README.md#deploy-the-daosslurm-cluster) describe how to deploy the Slurm cluster and run a job which uses the DAOS file system. ## Support Content in the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository is licensed under the [Apache License Version 2.0](https://github.com/daos-stack/google-cloud-daos/blob/main/LICENSE) open-source license. -[DAOS](https://github.com/daos-stack/daos) is being distributed under the BSD-2-Clause-Patent open-source license. +[DAOS](https://github.com/daos-stack/daos) is distributed under the BSD-2-Clause-Patent open-source license. + +Intel Corporation provides two options for technical support: -Intel Corporation provides several ways for the users to get technical support: +1. Community Support -1. Community support is available to everybody through Jira and via the DAOS channel for the Google Cloud users on Slack. + Community support is available to anyone through Jira and via the DAOS channel for Google Cloud users on Slack. - To access Jira, please follow these steps: + JIRA: https://daosio.atlassian.net/jira/software/c/projects/DAOS/issues/ - - Navigate to https://daosio.atlassian.net/jira/software/c/projects/DAOS/issues/ + - An Atlassian account is not needed for read only access to Jira. + - An Atlassian account is required to create and update tickets. + To create an account follow the steps at https://support.atlassian.com/atlassian-account/docs/create-an-atlassian-account. - - You will need to request access to DAOS Jira to be able to create and update tickets. An Atlassian account is required for this type of access. Read-only access is available without an account. - - If you do not have an Atlassian account, follow the steps at https://support.atlassian.com/atlassian-account/docs/create-an-atlassian-account/ to create one. + Slack: https://daos-stack.slack.com/archives/C03GLTLHA59 - To access the Slack channel for DAOS on Google Cloud, please follow this link https://daos-stack.slack.com/archives/C03GLTLHA59 + Community support is provided on a best-effort basis. - > This type of support is provided on a best-effort basis, and it does not have any SLA attached. +2. Commercial L3 Support -2. Commercial L3 support is available on an on-demand basis. Please get in touch with Intel Corporation to obtain more information. + Commercial L3 support is available on an on-demand basis. - - You may inquire about the L3 support via the Slack channel (https://daos-stack.slack.com/archives/C03GLTLHA59) + Contact Intel Corporation to obtain more information about Commercial L3 support. -[here](https://github.com/daos-stack/google-cloud-daos/tree/main/images) + You may inquire about L3 support via the [Slack channel](https://daos-stack.slack.com/archives/C03GLTLHA59). diff --git a/community/modules/file-system/cloud-storage-bucket/versions.tf b/community/modules/file-system/cloud-storage-bucket/versions.tf index c502ae4337..72f75aa07f 100644 --- a/community/modules/file-system/cloud-storage-bucket/versions.tf +++ b/community/modules/file-system/cloud-storage-bucket/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.20.0" } required_version = ">= 0.14.0" } diff --git a/community/modules/file-system/gke-persistent-volume/README.md b/community/modules/file-system/gke-persistent-volume/README.md new file mode 100644 index 0000000000..d4810a5940 --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/README.md @@ -0,0 +1,132 @@ +## Description + +This module creates Kubernetes Persistent Volumes (PV) and Persistent Volume +Claims (PVC) that can be used by a [gke-job-template]. + +Currently, the `gke-persistent-volume` module only works with Filestore. Each +`gke-persistent-volume` can only use a single Filestore. If multiple Filestores +are used then multiple `gke-persistent-volume` modules are needed. + +> **_NOTE:_** This is an experimental module and the functionality and +> documentation will likely be updated in the near future. This module has only +> been tested in limited capacity. + +### Example + +The following example creates a Filestore and then uses the +`gke-persistent-volume` module to use the Filestore as shared storage in a +`gke-job-template`. + +```yaml + - id: gke_cluster + source: community/modules/scheduler/gke-cluster + use: [network1] + settings: + master_authorized_networks: + - display_name: deployment-machine + cidr_block: /32 + + - id: datafs + source: modules/file-system/filestore + use: [network1] + settings: { local_mount: /data } + + - id: datafs-pv + source: community/modules/file-system/gke-persistent-volume + use: [datafs, gke_cluster] + + - id: job-template + source: community/modules/compute/gke-job-template + use: [datafs-pv, compute_pool] +``` + +### Authorized Network + +Since the `gke-persistent-volume` module is making calls to the Kubernetes API +to create Kubernetes entities, the machine performing the deployment must be +authorized to connect to the Kubernetes API. You can add the +`master_authorized_networks` settings block, as shown in the example above, with +the IP address of the machine performing the deployment. This will ensure that +the deploying machine can connect to the cluster. + +### Connecting Via Use + +The diagram below shows the valid `use` relationships for the GKE HPC Toolkit +modules. For example the `gke-persistent-volume` module can `use` a +`gke-cluster` module and a `filestore` module, as shown in the example above. + +```mermaid +graph TD; + vpc-->|OneToMany|gke-cluster; + gke-cluster-->|OneToMany|gke-node-pool; + gke-node-pool-->|ManyToMany|gke-job-template; + gke-cluster-->|OneToMany|gke-persistent-volume; + gke-persistent-volume-->|ManyToMany|gke-job-template; + vpc-->|OneToMany|filestore; + filestore-->|OneToOne|gke-persistent-volume; +``` + +## License + + +Copyright 2023 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [google](#requirement\_google) | >= 4.42 | +| [kubectl](#requirement\_kubectl) | >= 1.7.0 | +| [local](#requirement\_local) | >= 2.0.0 | + +## Providers + +| Name | Version | +|------|---------| +| [google](#provider\_google) | >= 4.42 | +| [kubectl](#provider\_kubectl) | >= 1.7.0 | +| [local](#provider\_local) | >= 2.0.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [kubectl_manifest.filestore_pv](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | +| [kubectl_manifest.filestore_pvc](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | +| [local_file.debug_file](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | +| [google_client_config.default](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_config) | data source | +| [google_container_cluster.gke_cluster](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_cluster) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [capacity\_gb](#input\_capacity\_gb) | The storage capacity with which to create the persistent volume. | `number` | n/a | yes | +| [cluster\_id](#input\_cluster\_id) | An identifier for the GKE cluster in the format `projects/{{project}}/locations/{{location}}/clusters/{{cluster}}` | `string` | n/a | yes | +| [filestore\_id](#input\_filestore\_id) | An identifier for a filestore with the format `projects/{{project}}/locations/{{location}}/instances/{{name}}`. | `string` | n/a | yes | +| [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | +| [network\_storage](#input\_network\_storage) | Network attached storage mount to be configured. |
object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
})
| n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [persistent\_volume\_claims](#output\_persistent\_volume\_claims) | An object that describes a k8s PVC created by this module. | + diff --git a/community/modules/file-system/gke-persistent-volume/main.tf b/community/modules/file-system/gke-persistent-volume/main.tf new file mode 100644 index 0000000000..9e09d691f7 --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/main.tf @@ -0,0 +1,86 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + # This label allows for billing report tracking based on module. + labels = merge(var.labels, { ghpc_module = "gke-persistent-volume" }) +} + +locals { + location = split("/", var.filestore_id)[3] + filestore_name = split("/", var.filestore_id)[5] + filestore_share_name = trimprefix(var.network_storage.remote_mount, "/") + + pv_name = "${local.filestore_name}-pv" + pvc_name = "${local.filestore_name}-pvc" + + filestore_pv_contents = templatefile( + "${path.module}/templates/filestore-pv.yaml.tftpl", + { + pv_name = local.pv_name + capacity = "${var.capacity_gb}Gi" + location = local.location + filestore_name = local.filestore_name + share_name = local.filestore_share_name + ip_address = var.network_storage.server_ip + labels = local.labels + } + ) + + filestore_pvc_contents = templatefile( + "${path.module}/templates/filestore-pvc.yaml.tftpl", + { + pv_name = local.pv_name + capacity = "${var.capacity_gb}Gi" + pvc_name = local.pvc_name + labels = local.labels + } + ) + + cluster_name = split("/", var.cluster_id)[5] + cluster_location = split("/", var.cluster_id)[3] +} + +resource "local_file" "debug_file" { + content = <<-EOF + ${local.filestore_pv_contents} + ${local.filestore_pvc_contents} + EOF + filename = "${path.root}/pv-pvc-debug-file-${local.filestore_name}.yaml" +} + +data "google_container_cluster" "gke_cluster" { + name = local.cluster_name + location = local.cluster_location +} + +data "google_client_config" "default" {} + +provider "kubectl" { + host = "https://${data.google_container_cluster.gke_cluster.endpoint}" + cluster_ca_certificate = base64decode(data.google_container_cluster.gke_cluster.master_auth[0].cluster_ca_certificate) + token = data.google_client_config.default.access_token + load_config_file = false +} + +resource "kubectl_manifest" "filestore_pv" { + yaml_body = local.filestore_pv_contents +} + +resource "kubectl_manifest" "filestore_pvc" { + yaml_body = local.filestore_pvc_contents + depends_on = [kubectl_manifest.filestore_pv] +} diff --git a/community/modules/file-system/gke-persistent-volume/outputs.tf b/community/modules/file-system/gke-persistent-volume/outputs.tf new file mode 100644 index 0000000000..6cdbf388a3 --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/outputs.tf @@ -0,0 +1,25 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "persistent_volume_claims" { + description = "An object that describes a k8s PVC created by this module." + value = { + name = local.pvc_name + mount_path = var.network_storage.local_mount + mount_options = var.network_storage.mount_options + } + depends_on = [kubectl_manifest.filestore_pvc] +} diff --git a/community/modules/file-system/gke-persistent-volume/templates/filestore-pv.yaml.tftpl b/community/modules/file-system/gke-persistent-volume/templates/filestore-pv.yaml.tftpl new file mode 100644 index 0000000000..cfda33978c --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/templates/filestore-pv.yaml.tftpl @@ -0,0 +1,23 @@ +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: ${pv_name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +spec: + storageClassName: "" + capacity: + storage: ${capacity} + accessModes: + - ReadWriteMany + persistentVolumeReclaimPolicy: Retain + volumeMode: Filesystem + csi: + driver: filestore.csi.storage.gke.io + volumeHandle: "modeInstance/${location}/${filestore_name}/${share_name}" + volumeAttributes: + ip: ${ip_address} + volume: ${share_name} diff --git a/community/modules/file-system/gke-persistent-volume/templates/filestore-pvc.yaml.tftpl b/community/modules/file-system/gke-persistent-volume/templates/filestore-pvc.yaml.tftpl new file mode 100644 index 0000000000..5bcd735807 --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/templates/filestore-pvc.yaml.tftpl @@ -0,0 +1,17 @@ +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: ${pvc_name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +spec: + accessModes: + - ReadWriteMany + storageClassName: "" + volumeName: ${pv_name} + resources: + requests: + storage: ${capacity} diff --git a/community/modules/file-system/gke-persistent-volume/variables.tf b/community/modules/file-system/gke-persistent-volume/variables.tf new file mode 100644 index 0000000000..235affff0d --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/variables.tf @@ -0,0 +1,52 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +variable "cluster_id" { + description = "An identifier for the GKE cluster in the format `projects/{{project}}/locations/{{location}}/clusters/{{cluster}}`" + type = string +} + +variable "network_storage" { + description = "Network attached storage mount to be configured." + type = object({ + server_ip = string, + remote_mount = string, + local_mount = string, + fs_type = string, + mount_options = string, + client_install_runner = map(string) + mount_runner = map(string) + }) +} + +variable "filestore_id" { + description = "An identifier for a filestore with the format `projects/{{project}}/locations/{{location}}/instances/{{name}}`." + type = string + validation { + condition = length(split("/", var.filestore_id)) == 6 + error_message = "filestore_id must be in the format of 'projects/{{project}}/locations/{{location}}/instances/{{name}}'." + } +} + +variable "capacity_gb" { + description = "The storage capacity with which to create the persistent volume." + type = number +} + +variable "labels" { + description = "GCE resource labels to be applied to resources. Key-value pairs." + type = map(string) +} diff --git a/community/modules/file-system/gke-persistent-volume/versions.tf b/community/modules/file-system/gke-persistent-volume/versions.tf new file mode 100644 index 0000000000..c3c0003c5c --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/versions.tf @@ -0,0 +1,34 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +terraform { + required_version = ">= 1.0" + required_providers { + google = { + source = "hashicorp/google" + version = ">= 4.42" + } + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + local = { + source = "hashicorp/local" + version = ">= 2.0.0" + } + } + provider_meta "google" { + module_name = "blueprints/terraform/hpc-toolkit:gke-persistent-volume/v1.20.0" + } +} diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf index 645d41dc84..9230a6d48c 100644 --- a/community/modules/file-system/nfs-server/versions.tf +++ b/community/modules/file-system/nfs-server/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf index 2d01317135..0b9170fe1a 100644 --- a/community/modules/project/service-enablement/versions.tf +++ b/community/modules/project/service-enablement/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/remote-desktop/chrome-remote-desktop/README.md b/community/modules/remote-desktop/chrome-remote-desktop/README.md index dce66471fa..941e7a813c 100644 --- a/community/modules/remote-desktop/chrome-remote-desktop/README.md +++ b/community/modules/remote-desktop/chrome-remote-desktop/README.md @@ -63,7 +63,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [client\_startup\_script](#module\_client\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [client\_startup\_script](#module\_client\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | | [instances](#module\_instances) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance | 69848ab | ## Resources diff --git a/community/modules/remote-desktop/chrome-remote-desktop/main.tf b/community/modules/remote-desktop/chrome-remote-desktop/main.tf index 9c5fa761a6..93ac14bd08 100644 --- a/community/modules/remote-desktop/chrome-remote-desktop/main.tf +++ b/community/modules/remote-desktop/chrome-remote-desktop/main.tf @@ -55,7 +55,7 @@ locals { } module "client_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" deployment_name = var.deployment_name project_id = var.project_id diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf index 02d5ebd34b..ec48f280bb 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf @@ -16,7 +16,7 @@ terraform { provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf index c2d62da384..50fe9e61e3 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf @@ -16,7 +16,7 @@ terraform { provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/gke-cluster/README.md b/community/modules/scheduler/gke-cluster/README.md index 2b29505398..6e9811c105 100644 --- a/community/modules/scheduler/gke-cluster/README.md +++ b/community/modules/scheduler/gke-cluster/README.md @@ -130,13 +130,18 @@ No modules. | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | | [region](#input\_region) | The region to host the cluster in. | `string` | n/a | yes | | [release\_channel](#input\_release\_channel) | The release channel of this cluster. Accepted values are `UNSPECIFIED`, `RAPID`, `REGULAR` and `STABLE`. | `string` | `"UNSPECIFIED"` | no | -| [service\_account](#input\_service\_account) | Service account to use with the system node pool |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | +| [service\_account](#input\_service\_account) | DEPRECATED: use service\_account\_email and scopes. |
object({
email = string,
scopes = set(string)
})
| `null` | no | +| [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to use with the system node pool | `string` | `null` | no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to to use with the system node pool. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | | [services\_ip\_range\_name](#input\_services\_ip\_range\_name) | The name of the secondary subnet range to use for services. | `string` | `"services"` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to host the cluster in. | `string` | n/a | yes | +| [system\_node\_pool\_enabled](#input\_system\_node\_pool\_enabled) | Create a system node pool. | `bool` | `true` | no | | [system\_node\_pool\_machine\_type](#input\_system\_node\_pool\_machine\_type) | Machine type for the system node pool. | `string` | `"e2-standard-4"` | no | | [system\_node\_pool\_name](#input\_system\_node\_pool\_name) | Name of the system node pool. | `string` | `"system"` | no | | [system\_node\_pool\_node\_count](#input\_system\_node\_pool\_node\_count) | The total min and max nodes to be maintained in the system node pool. |
object({
total_min_nodes = number
total_max_nodes = number
})
|
{
"total_max_nodes": 10,
"total_min_nodes": 2
}
| no | | [system\_node\_pool\_taints](#input\_system\_node\_pool\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "components.gke.io/gke-managed-components",
"value": true
}
]
| no | +| [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no | +| [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no | ## Outputs diff --git a/community/modules/scheduler/gke-cluster/main.tf b/community/modules/scheduler/gke-cluster/main.tf index d31db4fa13..c03d2874d2 100644 --- a/community/modules/scheduler/gke-cluster/main.tf +++ b/community/modules/scheduler/gke-cluster/main.tf @@ -29,7 +29,7 @@ locals { security_group = var.authenticator_security_group }] - sa_email = var.service_account.email != null ? var.service_account.email : data.google_compute_default_service_account.default_sa.email + sa_email = var.service_account_email != null ? var.service_account_email : data.google_compute_default_service_account.default_sa.email } data "google_compute_default_service_account" "default_sa" { @@ -154,6 +154,11 @@ resource "google_container_cluster" "gke_cluster" { } } + timeouts { + create = var.timeout_create + update = var.timeout_update + } + lifecycle { # Ignore all changes to the default node pool. It's being removed after creation. ignore_changes = [ @@ -169,6 +174,7 @@ resource "google_container_cluster" "gke_cluster" { # having to destroy the entire cluster. resource "google_container_node_pool" "system_node_pools" { provider = google-beta + count = var.system_node_pool_enabled ? 1 : 0 project = var.project_id name = var.system_node_pool_name @@ -190,8 +196,8 @@ resource "google_container_node_pool" "system_node_pools" { node_config { resource_labels = local.labels - service_account = var.service_account.email - oauth_scopes = var.service_account.scopes + service_account = var.service_account_email + oauth_scopes = var.service_account_scopes machine_type = var.system_node_pool_machine_type taint = var.system_node_pool_taints diff --git a/community/modules/scheduler/gke-cluster/variables.tf b/community/modules/scheduler/gke-cluster/variables.tf index 1fc8ecef8a..a4f284e2ec 100644 --- a/community/modules/scheduler/gke-cluster/variables.tf +++ b/community/modules/scheduler/gke-cluster/variables.tf @@ -127,6 +127,12 @@ variable "enable_persistent_disk_csi" { default = true } +variable "system_node_pool_enabled" { + description = "Create a system node pool." + type = bool + default = true +} + variable "system_node_pool_name" { description = "Name of the system node pool." type = string @@ -198,16 +204,16 @@ variable "master_authorized_networks" { default = [] } -variable "service_account" { - description = "Service account to use with the system node pool" - type = object({ - email = string, - scopes = set(string) - }) - default = { - email = null - scopes = ["https://www.googleapis.com/auth/cloud-platform"] - } +variable "service_account_email" { + description = "Service account e-mail address to use with the system node pool" + type = string + default = null +} + +variable "service_account_scopes" { + description = "Scopes to to use with the system node pool." + type = set(string) + default = ["https://www.googleapis.com/auth/cloud-platform"] } variable "autoscaling_profile" { @@ -232,3 +238,29 @@ variable "labels" { description = "GCE resource labels to be applied to resources. Key-value pairs." type = map(string) } + +variable "timeout_create" { + description = "Timeout for creating a node pool" + type = string + default = null +} + +variable "timeout_update" { + description = "Timeout for updating a node pool" + type = string + default = null +} + +# Deprecated +variable "service_account" { + description = "DEPRECATED: use service_account_email and scopes." + type = object({ + email = string, + scopes = set(string) + }) + default = null + validation { + condition = var.service_account == null + error_message = "service_account is deprecated and replaced with service_account_email and scopes." + } +} diff --git a/community/modules/scheduler/gke-cluster/versions.tf b/community/modules/scheduler/gke-cluster/versions.tf index a763888558..fb7b6bf7b6 100644 --- a/community/modules/scheduler/gke-cluster/versions.tf +++ b/community/modules/scheduler/gke-cluster/versions.tf @@ -26,6 +26,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-cluster/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:gke-cluster/v1.20.0" } } diff --git a/community/modules/scheduler/htcondor-configure/files/htcondor_configure.yml b/community/modules/scheduler/htcondor-configure/files/htcondor_configure.yml index 1b69a8149b..faa8b348a7 100644 --- a/community/modules/scheduler/htcondor-configure/files/htcondor_configure.yml +++ b/community/modules/scheduler/htcondor-configure/files/htcondor_configure.yml @@ -41,7 +41,6 @@ - name: Set HTCondor Pool password (token signing key) ansible.builtin.shell: | set -e -o pipefail - export CLOUDSDK_PYTHON=/usr/bin/python POOL_PASSWORD=$(gcloud secrets versions access latest --secret={{ password_id }}) echo -n "$POOL_PASSWORD" | sh -c "condor_store_cred add -c -i -" args: diff --git a/community/modules/scheduler/htcondor-configure/versions.tf b/community/modules/scheduler/htcondor-configure/versions.tf index 0f755aeeb1..6827118448 100644 --- a/community/modules/scheduler/htcondor-configure/versions.tf +++ b/community/modules/scheduler/htcondor-configure/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.20.0" } required_version = ">= 0.13.0" diff --git a/community/modules/scheduler/pbspro-client/README.md b/community/modules/scheduler/pbspro-client/README.md index ee1ec11597..4e90e39b12 100644 --- a/community/modules/scheduler/pbspro-client/README.md +++ b/community/modules/scheduler/pbspro-client/README.md @@ -74,7 +74,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [client\_startup\_script](#module\_client\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [client\_startup\_script](#module\_client\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | | [pbs\_client](#module\_pbs\_client) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance | 6c6b9e0a | | [pbs\_install](#module\_pbs\_install) | github.com/GoogleCloudPlatform/hpc-toolkit//community/modules/scripts/pbspro-install | 6c6b9e0a | diff --git a/community/modules/scheduler/pbspro-client/main.tf b/community/modules/scheduler/pbspro-client/main.tf index 5b42f5e2a1..75db8bfaa9 100644 --- a/community/modules/scheduler/pbspro-client/main.tf +++ b/community/modules/scheduler/pbspro-client/main.tf @@ -43,7 +43,7 @@ module "pbs_install" { } module "client_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" deployment_name = var.deployment_name project_id = var.project_id diff --git a/community/modules/scheduler/pbspro-server/README.md b/community/modules/scheduler/pbspro-server/README.md index afc0b82965..7d29a5f456 100644 --- a/community/modules/scheduler/pbspro-server/README.md +++ b/community/modules/scheduler/pbspro-server/README.md @@ -72,7 +72,7 @@ No providers. | [pbs\_install](#module\_pbs\_install) | github.com/GoogleCloudPlatform/hpc-toolkit//community/modules/scripts/pbspro-install | 6c6b9e0a | | [pbs\_qmgr](#module\_pbs\_qmgr) | github.com/GoogleCloudPlatform/hpc-toolkit//community/modules/scripts/pbspro-qmgr | 6c6b9e0a | | [pbs\_server](#module\_pbs\_server) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance | 6c6b9e0a | -| [server\_startup\_script](#module\_server\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [server\_startup\_script](#module\_server\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | ## Resources diff --git a/community/modules/scheduler/pbspro-server/main.tf b/community/modules/scheduler/pbspro-server/main.tf index a0603f4a93..34d8767bd6 100644 --- a/community/modules/scheduler/pbspro-server/main.tf +++ b/community/modules/scheduler/pbspro-server/main.tf @@ -55,7 +55,7 @@ module "pbs_qmgr" { } module "server_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" deployment_name = var.deployment_name project_id = var.project_id diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md index f94bfd4135..ba42bd22e0 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md @@ -17,14 +17,14 @@ controller for optimal performance at different scales. > > ```shell > # Install Python3 and run -> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt +> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt > ``` -[SchedMD/slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 -[slurm\_controller\_instance]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_controller_instance -[slurm\_instance\_template]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_instance_template +[SchedMD/slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 +[slurm\_controller\_instance]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_controller_instance +[slurm\_instance\_template]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_instance_template [slurm-ug]: https://goo.gle/slurm-gcp-user-guide. -[requirements.txt]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/scripts/requirements.txt +[requirements.txt]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/scripts/requirements.txt [enable\_cleanup\_compute]: #input\_enable\_cleanup\_compute [enable\_cleanup\_subscriptions]: #input\_enable\_cleanup\_subscriptions [enable\_reconfigure]: #input\_enable\_reconfigure @@ -94,12 +94,12 @@ This option has some additional requirements: development environment deploying the cluster. One can use following commands: ```bash - pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt + pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt ``` For more information, see the [description][optdeps] of this module. -[optdeps]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster#optional +[optdeps]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster#optional ## Custom Images @@ -163,8 +163,8 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | 5.7.3 | -| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 5.7.3 | +| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | 5.7.4 | +| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 5.7.4 | ## Resources @@ -205,7 +205,7 @@ limitations under the License. | [epilog\_scripts](#input\_epilog\_scripts) | List of scripts to be used for Epilog. Programs for the slurmd to execute
on every node when a user's job completes.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Epilog. |
list(object({
filename = string
content = string
}))
| `[]` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See
https://cloud.google.com/compute/docs/gpus more details.
- type : the GPU type, e.g. nvidia-tesla-t4, nvidia-a100-80gb, nvidia-tesla-a100, etc
- count : number of GPUs

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
object({
type = string
count = number
})
| `null` | no | | [guest\_accelerator](#input\_guest\_accelerator) | Alternative method of providing 'var.gpu' with a consistent naming scheme to
other HPC Toolkit modules.

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
list(object({
type = string,
count = number
}))
| `null` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "projects/schedmd-slurm-public/global/images/family"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "schedmd-slurm-public"
}
| no | | [instance\_template](#input\_instance\_template) | Self link to a custom instance template. If set, other VM definition
variables such as machine\_type and instance\_image will be ignored in favor
of the provided instance template.

For more information on creating custom images for the instance template
that comply with Slurm on GCP see the "Slurm on GCP Custom Images" section
in docs/vm-images.md. | `string` | `null` | no | | [labels](#input\_labels) | Labels, provided as a map. | `map(string)` | `{}` | no | | [login\_startup\_scripts\_timeout](#input\_login\_startup\_scripts\_timeout) | The timeout (seconds) applied to the login startup script. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf index b907ef54b2..73ef3e2453 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf @@ -36,13 +36,6 @@ locals { enable_public_ip_access_config = var.disable_controller_public_ips ? [] : [{ nat_ip = null, network_tier = null }] access_config = length(var.access_config) == 0 ? local.enable_public_ip_access_config : var.access_config - # Handle VM image format from 2 sources, prioritize source_image* variables - # over instance_image - source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" - source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") - source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") - source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") - additional_disks = [ for ad in var.additional_disks : { disk_name = ad.disk_name @@ -61,7 +54,7 @@ data "google_compute_default_service_account" "default" { } module "slurm_controller_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=5.7.4" access_config = local.access_config slurm_cluster_name = local.slurm_cluster_name @@ -97,7 +90,7 @@ module "slurm_controller_instance" { } module "slurm_controller_template" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=5.7.4" additional_disks = local.additional_disks can_ip_forward = var.can_ip_forward @@ -121,9 +114,9 @@ module "slurm_controller_template" { region = var.region shielded_instance_config = var.shielded_instance_config slurm_instance_role = "controller" - source_image_family = local.source_image_family - source_image_project = local.source_image_project - source_image = local.source_image + source_image_family = local.source_image_family # requires source_image_logic.tf + source_image_project = local.source_image_project_normalized # requires source_image_logic.tf + source_image = local.source_image # requires source_image_logic.tf network = var.network_self_link == null ? "" : var.network_self_link subnetwork_project = var.subnetwork_project == null ? "" : var.subnetwork_project subnetwork = var.subnetwork_self_link == null ? "" : var.subnetwork_self_link diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf new file mode 100644 index 0000000000..8e5a1ea5ec --- /dev/null +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf @@ -0,0 +1,29 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + # Handle VM image format from 2 sources, prioritize source_image* variables + # over instance_image + source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" + source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") + source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") + source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") + source_image_project_normalized = ( + local.source_image != "" || length(regexall("/", local.source_image_project)) > 0 + ? local.source_image_project + : "projects/${local.source_image_project}/global/images/family" + ) +} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf index 064359b0e3..fb20beb52b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "access_config" { description = "Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet." @@ -540,7 +540,7 @@ variable "instance_image" { type = map(string) default = { family = "slurm-gcp-5-7-hpc-centos-7" - project = "projects/schedmd-slurm-public/global/images/family" + project = "schedmd-slurm-public" } validation { diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/versions.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/versions.tf index b9ddedc830..5c2986b93f 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/versions.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-controller/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-controller/v1.20.0" } required_version = ">= 0.14.0" } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md index c40a761c75..d79fa9cc32 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md @@ -38,7 +38,7 @@ manually. This will require addition configuration and verification of permissions. For more information see the [hybrid.md] documentation on [slurm-gcp]. -[slurm-controller-hybrid]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_controller_hybrid +[slurm-controller-hybrid]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_controller_hybrid > **_NOTE:_** The hybrid module requires the following dependencies to be > installed on the system deploying the module: @@ -58,15 +58,15 @@ permissions. For more information see the [hybrid.md] documentation on [pyyaml]: https://pypi.org/project/PyYAML/ [google-api-python-client]: https://pypi.org/project/google-api-python-client/ [google-cloud-pubsub]: https://pypi.org/project/google-cloud-pubsub/ -[requirements.txt]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/scripts/requirements.txt +[requirements.txt]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/scripts/requirements.txt ### Manual Configuration This module *does not* complete the installation of hybrid partitions on your slurm cluster. After deploying, you must follow the steps listed out in the [hybrid.md] documentation under [manual steps]. -[hybrid.md]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/docs/hybrid.md -[manual steps]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/docs/hybrid.md#manual-configurations +[hybrid.md]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/docs/hybrid.md +[manual steps]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/docs/hybrid.md#manual-configurations ### Example Usage The hybrid module can be added to a blueprint as follows: @@ -146,10 +146,10 @@ strongly advise only using versions 21 or 22 when using this module. Attempting to use this module with any version older than 21 may lead to unexpected results. -[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 [pre-existing-network-storage]: ../../../../modules/file-system/pre-existing-network-storage/ [schedmd-slurm-gcp-v5-partition]: ../../compute/schedmd-slurm-gcp-v5-partition/ -[packer templates]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/packer +[packer templates]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/packer ## License @@ -181,7 +181,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_hybrid | 5.7.3 | +| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_hybrid | 5.7.4 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf index b4f1c6d781..787e558726 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf @@ -28,7 +28,7 @@ locals { } module "slurm_controller_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_hybrid?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_hybrid?ref=5.7.4" project_id = var.project_id slurm_cluster_name = local.slurm_cluster_name diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md index e2431a0f2e..421a9e8ee6 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md @@ -5,9 +5,9 @@ This module creates a login node for a Slurm cluster based on the terraform modules. The login node is used in conjunction with the [Slurm controller](../schedmd-slurm-gcp-v5-controller/README.md). -[SchedMD/slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 -[slurm\_login\_instance]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_login_instance -[slurm\_instance\_template]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_instance_template +[SchedMD/slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 +[slurm\_login\_instance]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_login_instance +[slurm\_instance\_template]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_instance_template ### Example @@ -49,8 +49,8 @@ The HPC Toolkit team maintains the wrapper around the [slurm-on-gcp] terraform modules. For support with the underlying modules, see the instructions in the [slurm-gcp README][slurm-gcp-readme]. -[slurm-on-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 -[slurm-gcp-readme]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3#slurm-on-google-cloud-platform +[slurm-on-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 +[slurm-gcp-readme]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4#slurm-on-google-cloud-platform ## License @@ -85,8 +85,8 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | 5.7.3 | -| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 5.7.3 | +| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | 5.7.4 | +| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 5.7.4 | ## Resources @@ -114,7 +114,7 @@ limitations under the License. | [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Enable the Shielded VM configuration. Note: the instance image must support option. | `bool` | `false` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See
https://cloud.google.com/compute/docs/gpus more details.
- type : the GPU type, e.g. nvidia-tesla-t4, nvidia-a100-80gb, nvidia-tesla-a100, etc
- count : number of GPUs

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
object({
type = string
count = number
})
| `null` | no | | [guest\_accelerator](#input\_guest\_accelerator) | Alternative method of providing 'var.gpu' with a consistent naming scheme to
other HPC Toolkit modules.

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
list(object({
type = string,
count = number
}))
| `null` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm login node VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "projects/schedmd-slurm-public/global/images/family"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm login node VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "schedmd-slurm-public"
}
| no | | [instance\_template](#input\_instance\_template) | Self link to a custom instance template. If set, other VM definition
variables such as machine\_type and instance\_image will be ignored in favor
of the provided instance template.

For more information on creating custom images for the instance template
that comply with Slurm on GCP see the "Slurm on GCP Custom Images" section
in docs/vm-images.md. | `string` | `null` | no | | [labels](#input\_labels) | Labels, provided as a map. | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Machine type to create. | `string` | `"n2-standard-2"` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf index 3e5bd0fd85..da2fb1b759 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf @@ -32,13 +32,6 @@ locals { enable_public_ip_access_config = var.disable_login_public_ips ? [] : [{ nat_ip = null, network_tier = null }] access_config = length(var.access_config) == 0 ? local.enable_public_ip_access_config : var.access_config - # Handle VM image format from 2 sources, prioritize source_image* variables - # over instance_image - source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" - source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") - source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") - source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") - additional_disks = [ for ad in var.additional_disks : { disk_name = ad.disk_name @@ -57,7 +50,7 @@ data "google_compute_default_service_account" "default" { } module "slurm_login_template" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=5.7.4" additional_disks = local.additional_disks can_ip_forward = var.can_ip_forward @@ -81,9 +74,9 @@ module "slurm_login_template" { region = var.region shielded_instance_config = var.shielded_instance_config slurm_instance_role = "login" - source_image_family = local.source_image_family - source_image_project = local.source_image_project - source_image = local.source_image + source_image_family = local.source_image_family # requires source_image_logic.tf + source_image_project = local.source_image_project_normalized # requires source_image_logic.tf + source_image = local.source_image # requires source_image_logic.tf network = var.network_self_link == null ? "" : var.network_self_link subnetwork_project = var.subnetwork_project == null ? "" : var.subnetwork_project subnetwork = var.subnetwork_self_link == null ? "" : var.subnetwork_self_link @@ -95,7 +88,7 @@ module "slurm_login_template" { } module "slurm_login_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=5.7.4" access_config = local.access_config slurm_cluster_name = local.slurm_cluster_name diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf new file mode 100644 index 0000000000..8e5a1ea5ec --- /dev/null +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf @@ -0,0 +1,29 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + # Handle VM image format from 2 sources, prioritize source_image* variables + # over instance_image + source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" + source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") + source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") + source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") + source_image_project_normalized = ( + local.source_image != "" || length(regexall("/", local.source_image_project)) > 0 + ? local.source_image_project + : "projects/${local.source_image_project}/global/images/family" + ) +} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf index dcafbcaada..44ed037c5c 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "project_id" { type = string @@ -298,7 +298,7 @@ variable "instance_image" { type = map(string) default = { family = "slurm-gcp-5-7-hpc-centos-7" - project = "projects/schedmd-slurm-public/global/images/family" + project = "schedmd-slurm-public" } validation { diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf index afdf62ab5c..d195c77a28 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-login/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-login/v1.20.0" } required_version = ">= 0.14.0" } diff --git a/community/modules/scripts/htcondor-install/README.md b/community/modules/scripts/htcondor-install/README.md index 7025bf93b4..7ba8d16fd1 100644 --- a/community/modules/scripts/htcondor-install/README.md +++ b/community/modules/scripts/htcondor-install/README.md @@ -1,12 +1,14 @@ ## Description This module creates a Toolkit runner that will install HTCondor on RedHat 7 or -derivative operating systems such as the CentOS 7 release in the [HPC VM -Image][hpcvmimage]. It should also function on RedHat or Rocky Linux releases 8 -and 9, however it is not yet supported. Please report any [issues] on these -platforms. +8 and its derivative operating systems. These include the CentOS 7 and Rocky +Linux 8 releases of the [HPC VM Image][hpcvmimage]. It may also function on +RedHat 9 and derivatives, however it is not yet supported. Please report any +[issues] on these 3 distributions or open a [discussion] to request support on +Debian or Ubuntu distributions. [issues]: https://github.com/GoogleCloudPlatform/hpc-toolkit/issues +[discussion]: https://github.com/GoogleCloudPlatform/hpc-toolkit/discussions It also exports a list of Google Cloud APIs which must be enabled prior to provisioning an HTCondor Pool. @@ -124,6 +126,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [condor\_version](#input\_condor\_version) | Yum/DNF-compatible version string; leave unset to default to 10.x series (examples: "10.5.1","10.*")) | `string` | `"10.*"` | no | | [enable\_docker](#input\_enable\_docker) | Install and enable docker daemon alongside HTCondor | `bool` | `true` | no | ## Outputs diff --git a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml index 70c0055cb4..7aecf62b9a 100644 --- a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml +++ b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml @@ -23,9 +23,12 @@ vars: enable_docker: true become: true + module_defaults: + ansible.builtin.yum: + lock_timeout: 300 tasks: - name: Enable EPEL repository - ansible.builtin.package: + ansible.builtin.yum: name: - epel-release - name: Enable HTCondor Feature Release repository @@ -49,8 +52,8 @@ repo_gpgcheck: true priority: "90" - name: Install HTCondor - ansible.builtin.package: - name: condor + ansible.builtin.yum: + name: condor-{{ condor_version | default("10.*") | string }} state: present - name: Ensure token directory ansible.builtin.file: @@ -70,7 +73,7 @@ gpgcheck: yes gpgkey: https://download.docker.com/linux/centos/gpg - name: Install Docker - ansible.builtin.package: + ansible.builtin.yum: name: - docker-ce - docker-ce-cli diff --git a/community/modules/scripts/htcondor-install/main.tf b/community/modules/scripts/htcondor-install/main.tf index 6659a1965a..afd7430dd9 100644 --- a/community/modules/scripts/htcondor-install/main.tf +++ b/community/modules/scripts/htcondor-install/main.tf @@ -19,7 +19,10 @@ locals { "type" = "ansible-local" "source" = "${path.module}/files/install-htcondor.yaml" "destination" = "install-htcondor.yaml" - "args" = "-e enable_docker=${var.enable_docker}" + "args" = join(" ", [ + "-e enable_docker=${var.enable_docker}", + "-e condor_version=${var.condor_version}", + ]) } runner_install_autoscaler_deps = { diff --git a/community/modules/scripts/htcondor-install/variables.tf b/community/modules/scripts/htcondor-install/variables.tf index 2ccd9d1156..0334d62d52 100644 --- a/community/modules/scripts/htcondor-install/variables.tf +++ b/community/modules/scripts/htcondor-install/variables.tf @@ -19,3 +19,9 @@ variable "enable_docker" { type = bool default = true } + +variable "condor_version" { + description = "Yum/DNF-compatible version string; leave unset to default to 10.x series (examples: \"10.5.1\",\"10.*\"))" + type = string + default = "10.*" +} diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf index be1799aca6..3b8ac805b8 100644 --- a/community/modules/scripts/wait-for-startup/versions.tf +++ b/community/modules/scripts/wait-for-startup/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scripts/windows-startup-script/README.md b/community/modules/scripts/windows-startup-script/README.md new file mode 100644 index 0000000000..d5e4585bd6 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/README.md @@ -0,0 +1,104 @@ +## Description + +This module contains a set of scripts to be used in customizing Windows VMs at +boot or during image building. Please note that the installation of NVIDIA GPU +drivers takes, at minimum, 30-60 minutes. It is therefore recommended to build +a custom image and reuse it as shown below, rather than install GPU drivers at +boot time. + +> NOTE: the output `windows_startup_ps1` must be passed explicitly as shown +> below when used with Packer modules. This is due to a limitation in the `use` +> keyword and inputs of type `list` in Packer modules; this does not impact +> Terraform modules + +### NVIDIA Drivers and CUDA Toolkit + +Many Google Cloud VM families include or can have NVIDIA GPUs attached to them. +This module supports GPU applications by enabling you to easily install +a compatible release of NVIDIA drivers and of the CUDA Toolkit. The script is +the [solution recommended by our documentation][docs] and is [directly sourced +from GitHub][script-src]. + +[docs]: https://cloud.google.com/compute/docs/gpus/install-drivers-gpu#windows +[script-src]: https://github.com/GoogleCloudPlatform/compute-gpu-installation/blob/24dac3004360e0696c49560f2da2cd60fcb80107/windows/install_gpu_driver.ps1 + +```yaml +- group: primary + modules: + - id: network1 + source: modules/network/vpc + settings: + enable_iap_rdp_ingress: true + enable_iap_winrm_ingress: true + + - id: windows_startup + source: community/modules/scripts/windows-startup-script + settings: + install_nvidia_driver: true + +- group: packer + modules: + - id: image + source: modules/packer/custom-image + kind: packer + use: + - network1 + - windows_startup + settings: + source_image_family: windows-2016 + machine_type: n1-standard-8 + accelerator_count: 1 + accelerator_type: nvidia-tesla-t4 + disk_size: 75 + disk_type: pd-ssd + omit_external_ip: false + state_timeout: 15m +``` + +## License + + +Copyright 2023 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.14.0 | + +## Providers + +No providers. + +## Modules + +No modules. + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [install\_nvidia\_driver](#input\_install\_nvidia\_driver) | Generate a PowerShell script that installs NVIDIA GPU drivers and the CUDA Toolkit | `bool` | `false` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [windows\_startup\_ps1](#output\_windows\_startup\_ps1) | A string list of scripts selected by this module | + diff --git a/community/modules/scripts/windows-startup-script/files/install_gpu_driver.ps1 b/community/modules/scripts/windows-startup-script/files/install_gpu_driver.ps1 new file mode 100644 index 0000000000..89d4da0644 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/files/install_gpu_driver.ps1 @@ -0,0 +1,111 @@ +#Requires -RunAsAdministrator + +<# + # Copyright 2021 Google Inc. + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. +#> + +# Determine which management interface to use +# +# Get-WmiObject is deprecated and removed in Powershell 6.0+ +# https://learn.microsoft.com/en-us/powershell/scripting/whats-new/differences-from-windows-powershell?view=powershell-7#cmdlets-removed-from-powershell +# +# We maintain backwards compabitility with older versions of Powershell by using Get-WmiObject if available +function Get-Mgmt-Command { + $Command = 'Get-CimInstance' + if (Get-Command Get-WmiObject 2>&1>$null) { + $Command = 'Get-WmiObject' + } + return $Command +} + +# Check if the GPU exists with Windows Management Instrumentation, returning the device ID if it exists +function Find-GPU { + $MgmtCommand = Get-Mgmt-Command + try { + $Command = "(${MgmtCommand} -query ""select DeviceID from Win32_PNPEntity Where (deviceid Like '%PCI\\VEN_10DE%') and (PNPClass = 'Display' or Name = '3D Video Controller')"" | Select-Object DeviceID -ExpandProperty DeviceID).substring(13,8)" + $dev_id = Invoke-Expression -Command $Command + return $dev_id + } + catch { + Write-Output "There doesn't seem to be a GPU unit connected to your system." + return "" + } +} + +# Check if the Driver is already installed +function Check-Driver { + try { + &'nvidia-smi.exe' + Write-Output 'Driver is already installed.' + Exit + } + catch { + Write-Output 'Driver is not installed, proceeding with installation' + } +} + +# Install the driver +function Install-Driver { + + # Check if the GPU exists and if the driver is already installed + $gpu_dev_id = Find-GPU + + # Set the correct URL, filename, and arguments to the installer + $url = 'https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_531.14_windows.exe'; + $file_dir = 'C:\NVIDIA-Driver\cuda_12.1.1_531.14_windows.exe'; + $install_args = '/s /n'; + $os_name = Invoke-Expression -Command 'systeminfo | findstr /B /C:"OS Name"' + if ($os_name.Contains("Microsoft Windows Server 2016 Datacenter")) { + # Windows Server 2016 needs an older version of the installer to work properly + $url = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe" + $file_dir = "C:\NVIDIA-Driver\cuda_11.8.0_522.06_windows.exe" + # Windows 2016 also requires manual setting of TLS version + [Net.ServicePointManager]::SecurityProtocol = 'Tls12' + } + if ("DEV_102D".Equals($gpu_dev_id)) { + # K80 GPUs must use an older driver/CUDA version + $url = 'https://developer.download.nvidia.com/compute/cuda/11.4.0/network_installers/cuda_11.4.0_win10_network.exe'; + $file_dir = 'C:\NVIDIA-Driver\cuda_11.4.0_win10_network.exe'; + } + if ("DEV_27B8".Equals($gpu_dev_id)) { + # The latest CUDA bundle (12.1.1) does not support L4 GPUs, so this script + # only installs the driver (version 528.89). There is a different installer + # for Windows server 2016/2019/2022 and Windows 10/11, so use systeminfo + # to determine which installer to use. + $install_args = '/s /noeula /noreboot'; + if ($os_name.Contains("Server")) { + $url = 'https://us.download.nvidia.com/tesla/528.89/528.89-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe'; + $file_dir = 'C:\NVIDIA-Driver\528.89-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe'; + } else { + $url = 'https://us.download.nvidia.com/tesla/528.89/528.89-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; + $file_dir = 'C:\NVIDIA-Driver\528.89-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; + } + } + Check-Driver + + # Create the folder for the driver download + if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { + New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null + } + + # Download the file to a specfied directory + Invoke-WebRequest $url -OutFile $file_dir + + # Install the file with the specified path from earlier as well as the RunAs admin option + Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait +} + +# Run the functions +Install-Driver diff --git a/community/modules/scripts/windows-startup-script/main.tf b/community/modules/scripts/windows-startup-script/main.tf new file mode 100644 index 0000000000..7974559ee6 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/main.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +locals { + nvidia_ps1 = var.install_nvidia_driver ? [file("${path.module}/files/install_gpu_driver.ps1")] : [] + + # anticipate concat multiple solutions over time + startup_ps1 = local.nvidia_ps1 +} diff --git a/community/modules/scripts/windows-startup-script/outputs.tf b/community/modules/scripts/windows-startup-script/outputs.tf new file mode 100644 index 0000000000..006ea312ad --- /dev/null +++ b/community/modules/scripts/windows-startup-script/outputs.tf @@ -0,0 +1,20 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "windows_startup_ps1" { + description = "A string list of scripts selected by this module" + value = local.startup_ps1 +} diff --git a/community/modules/scripts/windows-startup-script/variables.tf b/community/modules/scripts/windows-startup-script/variables.tf new file mode 100644 index 0000000000..53b3b21045 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/variables.tf @@ -0,0 +1,21 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "install_nvidia_driver" { + description = "Generate a PowerShell script that installs NVIDIA GPU drivers and the CUDA Toolkit" + type = bool + default = false +} diff --git a/community/modules/scripts/windows-startup-script/versions.tf b/community/modules/scripts/windows-startup-script/versions.tf new file mode 100644 index 0000000000..145b1fbfda --- /dev/null +++ b/community/modules/scripts/windows-startup-script/versions.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +terraform { + provider_meta "google" { + module_name = "blueprints/terraform/hpc-toolkit:windows-startup-script/v1.20.0" + } + + required_version = ">= 0.14.0" +} diff --git a/docs/hybrid-slurm-cluster/demo-with-cloud-controller-instructions.md b/docs/hybrid-slurm-cluster/demo-with-cloud-controller-instructions.md index 16fc7b7a61..67178fa5db 100644 --- a/docs/hybrid-slurm-cluster/demo-with-cloud-controller-instructions.md +++ b/docs/hybrid-slurm-cluster/demo-with-cloud-controller-instructions.md @@ -22,7 +22,7 @@ for use with an on-premise slurm-cluster. > further testing is done, documentation on applying the hybrid module to > on-premise slurm clusters will be added and expanded. -[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 ## Definitions diff --git a/docs/hybrid-slurm-cluster/deploy-instructions.md b/docs/hybrid-slurm-cluster/deploy-instructions.md index cc7be13fe1..781d4588ed 100644 --- a/docs/hybrid-slurm-cluster/deploy-instructions.md +++ b/docs/hybrid-slurm-cluster/deploy-instructions.md @@ -260,8 +260,8 @@ sudo systemctl restart slurmctld If the restart did not succeed, the logs at `/var/log/slurm/slurmctld.log` should point you in the right direction. -[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 -[slurm-gcp-hybrid]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/docs/hybrid.md +[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 +[slurm-gcp-hybrid]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/docs/hybrid.md [demo-with-cloud-controller-instructions.md]: ./demo-with-cloud-controller-instructions.md ## Validate the Hybrid Cluster diff --git a/docs/hybrid-slurm-cluster/on-prem-instructions.md b/docs/hybrid-slurm-cluster/on-prem-instructions.md index d982c2be2a..1c35bc8246 100644 --- a/docs/hybrid-slurm-cluster/on-prem-instructions.md +++ b/docs/hybrid-slurm-cluster/on-prem-instructions.md @@ -39,9 +39,9 @@ detail, as well as how to customize many of these assumptions to fit your needs. deployments in their [hybrid.md] documentation. [hybridmodule]: ../../community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md -[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 [slurm\_controller\_hybrid]: https://github.com/SchedMD/slurm-gcp/tree/master/terraform/slurm_cluster/modules/slurm_controller_hybrid -[hybrid.md]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/docs/hybrid.md +[hybrid.md]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/docs/hybrid.md ### NFS Mounts @@ -235,12 +235,12 @@ image created with slurm 21.08.8: partition_name: compute ``` -[slurmgcppacker]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/packer -[example.pkrvars.hcl]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/packer/example.pkrvars.hcl -[slurmversion]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/packer/variables.pkr.hcl#L97 -[`service_account_scopes`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/packer/variables.pkr.hcl#L166 -[`munge_user`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/ansible/roles/munge/defaults/main.yml#L17 -[`slurm_user`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/ansible/roles/slurm/defaults/main.yml#L31 +[slurmgcppacker]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/packer +[example.pkrvars.hcl]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/packer/example.pkrvars.hcl +[slurmversion]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/packer/variables.pkr.hcl#L97 +[`service_account_scopes`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/packer/variables.pkr.hcl#L166 +[`munge_user`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/ansible/roles/munge/defaults/main.yml#L17 +[`slurm_user`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/ansible/roles/slurm/defaults/main.yml#L31 ## On Premise Setup diff --git a/docs/module-guidelines.md b/docs/module-guidelines.md new file mode 100644 index 0000000000..19a681e61b --- /dev/null +++ b/docs/module-guidelines.md @@ -0,0 +1,185 @@ +# Module authoring guidelines + +Modules should adhere to following guidelines. + +## Terraform Requirements + +The module source field must point to a single terraform module. We recommend +the following structure: + +* `main.tf` file composing the terraform resources using provided variables. +* `variables.tf` file defining the variables used. +* (Optional) `outputs.tf` file defining any exported outputs used (if any). +* (Optional) `modules/` sub-directory pointing to submodules needed to create the + top level module. + +## General Best Practices + +* Variables for environment-specific values (like `project_id`) should not be + given defaults. This forces the calling module to provide meaningful values. +* Variables should only have zero-value defaults, such as null (preferred) or empty string, + where leaving the variable empty is a valid preference which will not be + rejected by the underlying API(s). +* Set good defaults wherever possible. Be opinionated about HPC use cases. +* Follow common variable [naming conventions](#use-common-names-and-types-for-common-variables). +* If there are common hpc-toolkit variables already defined, then do not set defaults (`region`, `zone`, `project_id`, `deployment_name`, etc.) +* All files should contain a license header. Headers can be added automatically using [addlicense](https://github.com/google/addlicense), + or `make add-google-license` if adding a Google License. +* No `provider` blocks should be defined in re-usable modules. It is OK to impose a range of acceptable provider versions. + In the case on conflicts, the root module will configure all providers and pass alternatives as an alias. See: +https://developer.hashicorp.com/terraform/language/modules/develop/providers#implicit-provider-inheritance + +## Terraform Coding Standards + +Any Terraform based modules in the HPC Toolkit should implement the following +standards: + +* `terraform-docs` is used to generate `README` files for each module. +* The order for parameters in inputs should be: + * `description` + * `type` + * `default` +* The order for parameters in outputs should be: + * `description` + * `value` + +## Do not create resources that can be passed externally + +Do not create resources that can be passed externally unless: +* resource has to be owned uniquely by the module; +* resource has to conform to module specific constraints (e.g. vm-instance with particular image, or firewall rule to serve needs of this module); +* the resource cannot possibly be (re)used outside of this module. + +Examples resources already provided by core toolkit modules: + +* [vm-instance](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/compute/vm-instance) +* [vpc & subnetworks](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/network) +* [filestore](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/file-system/filestore) + +File systems, networks, service accounts, GCS buckets can all be passed into the module and should not be created within the module. + +## Prefer FQN over ambiguous formats + +For instance, in networks `network_self_link` over `network_name`, `subnetwork_self_link` over `subnetwork_name`, as these immediately work with shared VPCs, and already specify `region/zone/project_ids`. + +## All resources should be labeled + +The module, if it creates any resource, should take a variable called `labels` and apply it to every resource. + +```hcl +variable "labels" { + description = "Labels to add to the resources. Key-value pairs." + type = map(string) +} +``` + +If the module creates its own labels, then we recommend merging user-provided labels into the module’s labels: + +```hcl +locals { + labels = merge(var.labels, { ghpc_module = "my-module" }) +} +``` + +## Use common names and types for common variables + +Matching names allow implicitly inject variables into the module. + +* `project_id {type=string}` - the GCP project ID in which to create the GCP resources; +* `labels {type=map(string)}` - [labels](https://cloud.google.com/resource-manager/docs/creating-managing-labels) added to the module. In order to include any module in advanced + monitoring, labels must be exposed. We strongly recommend that all modules + expose this variable. It also makes it easy for customers to filter resources on the cloud console and billing; +* `region {type=string}` - the GCP; + [region](https://cloud.google.com/compute/docs/regions-zones) the module will be created in; +* `zone {type=string}` - the GCP [zone](https://cloud.google.com/compute/docs/regions-zones) + the module will be created in; +* `deployment_name {type=string}` - the name of the current deployment of a blueprint. This + can help to avoid naming conflicts of modules when multiple deployments are + created from the same blueprint. `deployment_name` is often used to determine default resource names, or a prefix to the resource names e.g. [`modules/filestore.deploy_name`](../modules/file-system/filestore/README.md#inputs); + +### `instance_image {type=object({family=string,project=string})}` + +To take/return information about instance image use variable `instance_image`. If it's critical for the module to include the `name` use `type=map(string)` instead. + +### `enable_oslogin {type=string}` + +When relevant, Enable or Disable OS Login with `"ENABLE"` or `"DISABLE"`. Set to `"INHERIT"` to inherit the project OS Login setting. . Note this ongoing development is not yet fully homogenized in the Cloud HPC Toolkit. + +### Network + +Properties of networks are represented by scattered variables: +* `network_name` - name of the network (avoid using this); +* `network_id` - ID of the network; +* `network_self_link` - URI of the VPC (preferred); +* `subnetwork_name` - the name of the primary subnetwork; +* `subnetwork_self_link` - self-link to the primary subnetwork (preferred); +* `subnetwork_address` - address range of the primary subnetwork. + +### Network Storage + +If your module provides a mountable network storage it should output `network_storage` of type: + +```hcl +object({ + server_ip = string + remote_mount = string + local_mount = string + fs_type = string + mount_options = string + client_install_runner = map(string) # Runner for installing client + mount_runner = map(string) # Runner to mount the file-system +}) +``` + +If a module returns multiple "storages" it should output `network_storage` of type `list(object(... same as above...))`. + +If a module consumes network storage it should have a variable `network_storage` of type `list(object(... any subset of fields from above ...))`. + +## Use startup-script module + +If there is a need to execute shell script, ansible playbook or just upload file to the vm-instance, consider using `startup-script` [module](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/scripts/startup-script) as a first option. `startup-script` module takes care of uploading local files to the GCS and downloading files to the vm-instance, and installing ansible if needed, configuring ssh, and executing requested scripts. + +To represent a script to execute HPC Toolkit modules use "runners". Runner is a `map(string)` with following expected fields: + +* `destination`: (Required) The name of the file at the destination VM; +* `type`: (Required) One of the following: shell, ansible-local, and data; +* `content`: (Optional) Content to be uploaded and executed; +* `source`: (Optional) A path to the file or data you want to upload; +* `args`: (Optional) Arguments to be passed to the executable. + +If your module consumes/produces scripts to run vm-instances, please adhere to this format. + +`startup-script` module example of usage: + +```hcl +variable "extra_runner" { + description = "Custom script provided by user to run on vm-instance" + type = map(string) +} +... +locals { + common_runner = { # some setup required by your module + "type" = "shell" + "content" = "echo Hello" + "destination" = "say_hi.sh" + } + runners = [local.common_runner, var.extra_runner] +} +... +module "startup_script" { + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script" + deployment_name = var.deployment_name + project_id = var.project_id + region = var.region + labels = local.labels + runners = local.runners +} +... +module "vm" { + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance" + ... + startup_script = module.startup_script.startup_script +} +``` + +For more information see [startup-script/README](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/scripts/startup-script#readme). diff --git a/docs/tutorials/htcondor.md b/docs/tutorials/htcondor.md index 27f8168522..1f7c7b9932 100644 --- a/docs/tutorials/htcondor.md +++ b/docs/tutorials/htcondor.md @@ -20,7 +20,7 @@ Talk with your tutorial leader to see if Google Cloud credits are available. ## Enable APIs & Permissions -In a new Google Cloud project there are several apis that must be enabled to +In a new Google Cloud project there are several APIs that must be enabled to deploy your HPC cluster. These will be caught when you perform `terraform apply` but you can save time by enabling them now by running: @@ -63,8 +63,8 @@ a basic auto-scaling HTCondor pool. * a new VPC network secured from the public internet * an HTCondor Access Point for users to submit jobs * an HTCondor Central Manager that will operate the pool -* a Managed Instance Group to scale a pool of HTCondor Execute Points to serve - new jobs as they are submitted +* 2 Managed Instance Groups for HTCondor Execute Points: 1 is configured with + Spot pricing and the other with On-Demand pricing The blueprint `community/examples/htc-htcondor.yaml` should be open in the Cloud Shell Editor (on the left). @@ -89,15 +89,13 @@ contains the terraform needed to deploy your cluster. Use the following commands to run terraform and deploy your cluster. ```bash -terraform -chdir=htcondor-001/htcondor init -terraform -chdir=htcondor-001/htcondor validate -terraform -chdir=htcondor-001/htcondor apply -auto-approve +./ghpc deploy htcondor-pool --auto-approve ``` -If you receive any errors during `apply`, you may re-run it to resolve them. -The deployment will take about 3 minutes. There should be regular status updates -in the terminal. If the `apply` is successful, a message similar to the -following will be displayed: +The Toolkit will automatically approve provisioning a network, building a VM +image with HTCondor and, finally, the HTCondor pool itself. There will be +regular status updates in the terminal. At the conclusion, a message similar to +the following will be displayed: @@ -111,10 +109,10 @@ Apply complete! Resources: xx added, 0 changed, 0 destroyed. Once terraform has finished, you may SSH to the HTCondor Access Point: ```bash -gcloud compute ssh htcondor001-ap-0 --tunnel-through-iap --project --zone us-central1-c +gcloud compute ssh htcondor-pool-ap-0 --tunnel-through-iap --project --zone us-central1-c ``` -Alternatively, you may browse to the `htcondor001-ap-0` VM and click on "SSH" in +Alternatively, you may browse to the `htcondor-pool-ap-0` VM and click on "SSH" in the Cloud Console at this address: ```text @@ -142,7 +140,7 @@ connect"). Installation may take 5 minutes or more. When it succeeds, you will observe output similar to ```text -htcondor001-ap-0.us-central1-c.c..internal +htcondor-pool-ap-0.us-central1-c.c..internal ``` ## Submit an example job @@ -224,7 +222,7 @@ You should be returned to the Cloud Shell console. You may then destroy your HTCondor pool: ```bash -terraform -chdir=htcondor-001/htcondor destroy -auto-approve +./ghpc destroy htcondor-pool --auto-approve ``` When complete you should see output similar to: diff --git a/docs/vm-images.md b/docs/vm-images.md index 048f6e8a38..4cfca0b1cb 100644 --- a/docs/vm-images.md +++ b/docs/vm-images.md @@ -2,6 +2,7 @@ * [HPC CentOS 7 VM Image](#hpc-centos-7-vm-image) * [Ubuntu](#ubuntu) +* [Windows](#windows) * [Other Images](#other-images) ## HPC CentOS 7 VM Image @@ -57,6 +58,20 @@ settings: [omnia-install]: ../community/modules/scripts/omnia-install/README.md [hpc-slurm-ubuntu2004.yaml]: ../community/examples/hpc-slurm-ubuntu2004.yaml +## Windows + +The HPC Toolkit provides limited support for building custom VM images based on +the [Windows images][windows-images] published by Google. The custom VM images +can be used in blueprints so long as the underlying scheduler and workload +supports Windows. Windows solutions do not receive the same level of testing as +Linux solutions so you should anticipate that there will not be functionality +parity. Please file [issues] when encountering specific problems and [feature +requests][features] when requesting new functionality. + +[windows-images]: https://cloud.google.com/compute/docs/images/os-details#windows_server +[issues]: https://github.com/GoogleCloudPlatform/hpc-toolkit/issues +[features]: https://github.com/GoogleCloudPlatform/hpc-toolkit/discussions/categories/ideas-and-feature-requests + ## Other Images The HPC Toolkit strives to provide flexibility wherever possible. It is possible diff --git a/examples/README.md b/examples/README.md index c454437329..36a5ebd46c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -30,6 +30,7 @@ md_toc github examples/README.md | sed -e "s/\s-\s/ * /" * [hpc-slurm-local-ssd.yaml](#hpc-slurm-local-ssdyaml--) ![community-badge] ![experimental-badge] * [hpc-gke.yaml](#hpc-gkeyaml--) ![community-badge] ![experimental-badge] * [ml-gke](#ml-gkeyaml--) ![community-badge] ![experimental-badge] + * [storage-gke](#storage-gkeyaml--) ![community-badge] ![experimental-badge] * [htc-slurm.yaml](#htc-slurmyaml--) ![community-badge] ![experimental-badge] * [htc-htcondor.yaml](#htc-htcondoryaml--) ![community-badge] ![experimental-badge] * [tutorial-starccm.yaml](#tutorial-starccmyaml--) ![community-badge] ![experimental-badge] @@ -119,7 +120,7 @@ the experimental badge (![experimental-badge]). > > ```shell > # Install Python3 and run -> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt +> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt > ``` Creates a basic auto-scaling Slurm cluster with mostly default settings. The @@ -524,7 +525,7 @@ For this example the following is needed in the selected region: > > ```shell > # Install Python3 and run -> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt +> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt > ``` Similar to the [hpc-slurm.yaml] example, but using Ubuntu 20.04 instead of CentOS 7. @@ -809,6 +810,25 @@ credentials for the created cluster_ and _submit a job calling `nvidia_smi`_. [ml-gke.yaml]: ../community/examples/ml-gke.yaml [`kubernetes-operations`]: ../community/modules/scripts/kubernetes-operations/README.md +### [storage-gke.yaml] ![community-badge] ![experimental-badge] + +This blueprint shows how to use different storage options with GKE in the toolkit. + +The blueprint contains the following: + +* A K8s Job that uses a Filestore as a shared file system between pods. +* More coming in the future... + +> **Note**: The Kubernetes API server will only allow requests from authorized +> networks. The `gke-persistent-volume` module needs access to the Kubernetes +> API server to create a Persistent Volume and a Persistent Volume Claim. **You +> must use the `authorized_cidr` variable to supply an authorized network which +> contains the IP address of the machine deploying the blueprint, for example +> `--vars authorized_cidr=/32`.** You can use a service like +> [whatismyip.com](https://whatismyip.com) to determine your IP address. + +[storage-gke.yaml]: ../community/examples/storage-gke.yaml + ### [htc-htcondor.yaml] ![community-badge] ![experimental-badge] This blueprint provisions an auto-scaling [HTCondor][htcondor] pool based upon diff --git a/examples/hpc-enterprise-slurm.yaml b/examples/hpc-enterprise-slurm.yaml index a24871540a..35ed922618 100644 --- a/examples/hpc-enterprise-slurm.yaml +++ b/examples/hpc-enterprise-slurm.yaml @@ -25,7 +25,7 @@ vars: # Visit https://github.com/SchedMD/slurm-gcp/blob/master/docs/images.md#published-image-family # for a list of valid family options with Slurm family: schedmd-v5-slurm-22-05-9-hpc-centos-7 - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public # Set to true for active cluster reconfiguration. # Note that setting this option requires additional dependencies to be installed locally. # https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/community/modules/scheduler/schedmd-slurm-gcp-v5-controller#description @@ -34,6 +34,14 @@ vars: # Note that setting this option requires additional dependencies to be installed locally. enable_cleanup_compute: true +# Recommended to use GCS backend for Terraform state +# See https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/examples#optional-setting-up-a-remote-terraform-state +# +# terraform_backend_defaults: +# type: gcs +# configuration: +# bucket: <> + # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md diff --git a/examples/ml-slurm.yaml b/examples/ml-slurm.yaml index 45e420494b..f2dfe6ca9a 100644 --- a/examples/ml-slurm.yaml +++ b/examples/ml-slurm.yaml @@ -27,6 +27,14 @@ vars: new_image_family: ml-slurm disk_size_gb: 200 +# Recommended to use GCS backend for Terraform state +# See https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/examples#optional-setting-up-a-remote-terraform-state +# +# terraform_backend_defaults: +# type: gcs +# configuration: +# bucket: <> + deployment_groups: - group: primary modules: diff --git a/examples/serverless-batch-mpi.yaml b/examples/serverless-batch-mpi.yaml index 660d6c7aa7..ba3abda1c0 100644 --- a/examples/serverless-batch-mpi.yaml +++ b/examples/serverless-batch-mpi.yaml @@ -134,7 +134,7 @@ deployment_groups: settings: name_prefix: spack-builder add_deployment_name_before_prefix: true - machine_type: c2-standard-30 + machine_type: c2-standard-16 ### Batch Modules ### - id: batch-job diff --git a/go.mod b/go.mod index e329dff200..3ff2d6f15a 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/hashicorp/hcl v1.0.0 // indirect github.com/hashicorp/hcl/v2 v2.17.0 github.com/hashicorp/terraform-config-inspect v0.0.0-20221020162138-81db043ad408 - github.com/otiai10/copy v1.11.0 + github.com/otiai10/copy v1.12.0 github.com/pkg/errors v0.9.1 github.com/spf13/afero v1.9.5 github.com/spf13/cobra v1.7.0 @@ -26,7 +26,7 @@ require ( github.com/google/go-cmp v0.5.9 github.com/hashicorp/terraform-exec v0.18.1 github.com/zclconf/go-cty-debug v0.0.0-20191215020915-b22d67c1ba0b - google.golang.org/api v0.125.0 + google.golang.org/api v0.128.0 ) require ( @@ -54,7 +54,7 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/google/s2a-go v0.1.4 // indirect github.com/google/uuid v1.3.0 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.2.3 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.2.4 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-safetemp v1.0.0 // indirect github.com/hashicorp/go-version v1.6.0 // indirect @@ -80,7 +80,7 @@ require ( golang.org/x/crypto v0.9.0 // indirect golang.org/x/net v0.10.0 // indirect golang.org/x/oauth2 v0.8.0 // indirect - golang.org/x/sys v0.8.0 + golang.org/x/sys v0.9.0 golang.org/x/text v0.9.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect diff --git a/go.sum b/go.sum index 37d522970d..3a2108a99c 100644 --- a/go.sum +++ b/go.sum @@ -346,8 +346,8 @@ github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/googleapis/enterprise-certificate-proxy v0.0.0-20220520183353-fd19c99a87aa/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.1.0/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.2.0/go.mod h1:8C0jb7/mgJe/9KK8Lm7X9ctZC2t60YyIpYEI16jx0Qg= -github.com/googleapis/enterprise-certificate-proxy v0.2.3 h1:yk9/cqRKtT9wXZSsRH9aurXEpJX+U6FLtpYTdC3R06k= -github.com/googleapis/enterprise-certificate-proxy v0.2.3/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= +github.com/googleapis/enterprise-certificate-proxy v0.2.4 h1:uGy6JWR/uMIILU8wbf+OkstIrNiMjGpEIyhx8f6W7s4= +github.com/googleapis/enterprise-certificate-proxy v0.2.4/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= @@ -427,8 +427,8 @@ github.com/mitchellh/go-wordwrap v1.0.0 h1:6GlHJ/LTGMrIJbwgdqdl2eEH8o+Exx/0m8ir9 github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc= -github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww= +github.com/otiai10/copy v1.12.0 h1:cLMgSQnXBs1eehF0Wy/FAGsgDTDmAqFR7rQylBb1nDY= +github.com/otiai10/copy v1.12.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww= github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/pjbgf/sha1cd v0.3.0 h1:4D5XXmUUBUl/xQ6IjCkEAbqXskkq/4O7LmGn0AqMDs4= github.com/pjbgf/sha1cd v0.3.0/go.mod h1:nZ1rrWOcGJ5uZgEEVL1VUM9iRQiZvWdbZjkKyFzPPsI= @@ -718,8 +718,8 @@ golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= +golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -855,8 +855,8 @@ google.golang.org/api v0.96.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ google.golang.org/api v0.97.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ13s= google.golang.org/api v0.98.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ13s= google.golang.org/api v0.100.0/go.mod h1:ZE3Z2+ZOr87Rx7dqFsdRQkRBk36kDtp/h+QpHbB7a70= -google.golang.org/api v0.125.0 h1:7xGvEY4fyWbhWMHf3R2/4w7L4fXyfpRGE9g6lp8+DCk= -google.golang.org/api v0.125.0/go.mod h1:mBwVAtz+87bEN6CbA1GtZPDOqY2R5ONPqJeIlvyo4Aw= +google.golang.org/api v0.128.0 h1:RjPESny5CnQRn9V6siglged+DZCgfu9l6mO9dkX9VOg= +google.golang.org/api v0.128.0/go.mod h1:Y611qgqaE92On/7g65MQgxYul3c0rEB894kniWLY750= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= diff --git a/modules/README.md b/modules/README.md index 9809b8e473..99c404f242 100644 --- a/modules/README.md +++ b/modules/README.md @@ -80,6 +80,7 @@ Modules that are still in development and less stable are labeled with the * **[Intel-DAOS]** ![community-badge] : Creates a [DAOS](https://docs.daos.io/) file system. * **[cloud-storage-bucket]** ![community-badge] ![experimental-badge] : Creates a Google Cloud Storage (GCS) bucket. +* **[gke-persistent-volume]** ![community-badge] ![experimental-badge] : Creates persistent volumes and persistent volume claims for shared storage. * **[nfs-server]** ![community-badge] ![experimental-badge] : Creates a VM and configures an NFS server that can be mounted by other VM. @@ -89,6 +90,7 @@ Modules that are still in development and less stable are labeled with the [intel-daos]: ../community/modules/file-system/Intel-DAOS/README.md [nfs-server]: ../community/modules/file-system/nfs-server/README.md [cloud-storage-bucket]: ../community/modules/file-system/cloud-storage-bucket/README.md +[gke-persistent-volume]: ../community/modules/file-system/gke-persistent-volume/README.md ### Monitoring @@ -180,6 +182,9 @@ Modules that are still in development and less stable are labeled with the * **[startup-script]** ![core-badge] : Creates a customizable startup script that can be fed into compute VMs. +* **[windows-startup-script]** ![community-badge] ![experimental-badge]: Creates + Windows PowerShell (PS1) scripts that can be used to customize Windows VMs + and VM images. * **[htcondor-install]** ![community-badge] ![experimental-badge] : Creates a startup script to install HTCondor and exports a list of required APIs * **[kubernetes-operations]** ![community-badge] ![experimental-badge] : @@ -208,6 +213,7 @@ Modules that are still in development and less stable are labeled with the successful completion of a startup script on a compute VM. [startup-script]: scripts/startup-script/README.md +[windows-startup-script]: ../community/modules/scripts/windows-startup-script/README.md [htcondor-install]: ../community/modules/scripts/htcondor-install/README.md [kubernetes-operations]: ../community/modules/scripts/kubernetes-operations/README.md [omnia-install]: ../community/modules/scripts/omnia-install/README.md @@ -235,24 +241,33 @@ at the top level main.tf file. ### Source (Required) -The source is a path or URL that points to the source files for a module. The -actual content of those files is determined by the [kind](#kind-may-be-required) of the -module. +The source is a path or URL that points to the source files for Packer or +Terraform modules. A source can either be a filesystem path or a URL to a git +repository: -A source can be a path which may refer to a module embedded in the `ghpc` -binary or a local file. It can also be a URL pointing to a GitHub path -containing a conforming module. +* Filesystem paths + * modules embedded in the `ghpc` executable + * modules in the local filesystem +* Remote modules hosted on github.com or any `git::` repository + * when modules are in a subdirectory of the git repository, a special + double-slash "//" notation can be required as described below + +An important distinction is that git URLs are natively supported by Terraform so +they are not copied to your deployment directory. Packer does not have native +support for git-hosted modules so the Toolkit will copy these modules into the +deployment folder on your behalf. #### Embedded Modules -Embedded modules are embedded in the ghpc binary during compilation and cannot +Embedded modules are added to the ghpc binary during compilation and cannot be edited. To refer to embedded modules, set the source path to -`modules/<>`. +`modules/<>` or `community/modules/<>`. + +The paths match the modules in the repository structure for [core modules](./) +and [community modules](../community/modules/). Because the modules are embedded +during compilation, your local copies may differ unless you recompile ghpc. -The paths match the modules in the repository at compilation time. You can -review the directory structure of [the core modules](./) and -[community modules](../community/modules/) to determine which path to use. For -example, the following code is using the embedded pre-existing-vpc module: +For example, this example snippet uses the embedded pre-existing-vpc module: ```yaml - id: network1 @@ -271,52 +286,97 @@ following module definition refers the local pre-existing-vpc modules. source: ./modules/network/pre-existing-vpc ``` -> **_NOTE:_** This example would have to be run from the HPC Toolkit repository -> directory, otherwise the path would need to be updated to point at the correct -> directory. - -#### GitHub Modules - -To use a Terraform module available on GitHub, set the source to a path starting -with `github.com` (over HTTPS) or `git@github.com` (over SSH). For instance, the -following module definitions are sourcing the vpc module by pointing at the HPC -Toolkit GitHub repository: +> **_NOTE:_** Relative paths (beginning with `.` or `..` must be relative to the +> working directory from which `ghpc` is executed. This example would have to be +> run from a local copy of the HPC Toolkit repository. An alternative is to use +> absolute paths to modules. -Get module from GitHub over SSH: +#### GitHub-hosted Modules and Packages -```yaml - - id: network1 - source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc -``` +The [Intel DAOS blueprint][pfs-daos.yaml] makes extensive use of GitHub-hosted +Terraform and Packer modules. You may wish to use it as an example reference for +this documentation. -Get module from GitHub over HTTPS: +To use a Terraform module available on GitHub, set the source to a path starting +with `github.com` (HTTPS) or `git@github.com` (SSH). For instance, the following +module definition sources the Toolkit vpc module: ```yaml - id: network1 source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc ``` -Both examples above use the [double-slash notation][tfsubdir] (`//`) to indicate -the root directory of the git repository and the remainder of the path indicates -the location of the Terraform module. +This example uses the [double-slash notation][tfsubdir] (`//`) to indicate that +the Toolkit is a "package" of multiple modules whose root directory is the root +of the git repository. The remainder of the path indicates the sub-directory of +the vpc module. -Additionally, [specific revisions of a remote module][tfrev] can be selected by -any valid [git reference][gitref]. Typically, these are a git branch, commit -hash or tag. The [Intel DAOS blueprint][pfs-daos.yaml] makes extensive use -of this feature. For example, to temporarily point to a development copy of the -Toolkit vpc module, use: +The example above uses the default `main` branch of the Toolkit. Specific +[revisions][tfrev] can be selected with any valid [git reference][gitref]. +(git branch, commit hash or tag). If the git reference is a tag or branch, we +recommend setting `&depth=1` to reduce the data transferred over the network. +This option cannot be set when the reference is a commit hash. The following +examples select the vpc module on the active `develop` branch and also an older +release of the filestore module: ```yaml - id: network1 source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc?ref=develop + ... + - id: homefs + source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/file-system/filestore?ref=v1.10.0&depth=1 ``` +Because Terraform modules natively support this syntax, ghpc will not copy +GitHub-hosted modules into your deployment folder. Terraform will download them +into a hidden folder when you run `terraform init`. + [tfrev]: https://www.terraform.io/language/modules/sources#selecting-a-revision [gitref]: https://git-scm.com/book/en/v2/Git-Tools-Revision-Selection#_single_revisions [tfsubdir]: https://www.terraform.io/language/modules/sources#modules-in-package-sub-directories [pfs-daos.yaml]: ../community/examples/intel/pfs-daos.yaml -#### Generic Git Modules +##### GitHub-hosted Packer modules + +Packer does not natively support GitHub-hosted modules so `ghpc create` will +copy modules into your deployment folder. + +If the module uses `//` package notation, `ghpc create` will copy the entire +repository to the module path: `deployment_name/group_name/module_id`. However, +when `ghpc deploy` is invoked, it will run Packer from the subdirectory +`deployment_name/group_name/module_id/subdirectory/after/double_slash`. + +Referring back to the [Intel DAOS blueprint][pfs-daos.yaml], we see that it will +create 2 deployment groups at `pfs-daos/daos-client-image` and +`pfs-daos/daos-server-image`. However, Packer will actually be invoked from +a subdirectories ending in `daos-client-image/images` and +`daos-server-image/images`. + +If the module does not use `//` package notation, `ghpc create` will copy +only the final directory in the path to `deployment_name/group_name/module_id`. + +In all cases, `ghpc create` will remove the `.git` directory from the packer +module to ensure that you can manage the entire deployment directory with its +own git versioning. + +##### GitHub over SSH + +Get module from GitHub over SSH: + +```yaml + - id: network1 + source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc +``` + +Specific versions can be selected as for HTTPS: + +```yaml + - id: network1 + source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc?ref=v1.10.0&depth=1 +``` + +##### Generic Git Modules + To use a Terraform module available in a non-GitHub git repository such as gitlab, set the source to a path starting `git::`. Two Standard git protocols are supported, `git::https://` for HTTPS or `git::git@github.com` for SSH. @@ -433,34 +493,11 @@ in the project used by the HPC environment. For example, the [creation of VMs](compute/vm-instance/) requires the Compute Engine API (compute.googleapis.com). The [startup-script](scripts/startup-script/) module requires the Cloud Storage API (storage.googleapis.com) for storage of the -scripts themselves. Each module includes in the Toolkit source code describes -its required APIs internally. The Toolkit will merge the requiements from all +scripts themselves. Each module included in the Toolkit source code describes +its required APIs internally. The Toolkit will merge the requirements from all modules and [automatically validate](../README.md#blueprint-validation) that all APIs are enabled in the project specified by `$(vars.project_id)`. -For advanced multi-project use cases and for modules not included with the -Toolkit, you may manually add required APIs to each module with the following -format: - -```yaml -deployment_groups: -- group: primary - modules: - ... - - id: examplevm - source: modules/example/module - required_apis: - $(vars.project_id): - - compute.googleapis.com - - storage.googleapis.com - $(vars.other_project_id): - - storage.googleapis.com - explicit-project-id: - - file.googleapis.com - settings: - ... -``` - ## Common Settings The following common naming conventions should be used to decrease the verbosity @@ -472,7 +509,7 @@ For example, if all modules are to be created in a single region, that region can be defined as a deployment variable named `region`, which is shared between all modules without an explicit setting. Similarly, if many modules need to be connected to the same VPC network, they all can add the vpc module ID to their -`use` list so that `network_name` would be inferred from that vpc module rather +`use` list so that `network_self_link` would be inferred from that vpc module rather than having to set it manually. * **project_id**: The GCP project ID in which to create the GCP resources. @@ -484,7 +521,6 @@ than having to set it manually. will be created in. * **zone**: The GCP [zone](https://cloud.google.com/compute/docs/regions-zones) the module will be created in. -* **network_name**: The name of the network a module will use or connect to. * **labels**: [Labels](https://cloud.google.com/resource-manager/docs/creating-managing-labels) added to the module. In order to include any module in advanced @@ -493,42 +529,5 @@ than having to set it manually. ## Writing Custom HPC Modules -Modules are flexible by design, however we do define some best practices when +Modules are flexible by design, however we do define some [best practices](../docs/module-guidelines.md) when creating a new module meant to be used with the HPC Toolkit. - -### Terraform Requirements - -The module source field must point to a single terraform module. We recommend -the following structure: - -* main.tf file composing the terraform resources using provided variables. -* variables.tf file defining the variables used. -* (Optional) outputs.tf file defining any exported outputs used (if any). -* (Optional) modules/ sub-directory pointing to submodules needed to create the - top level module. - -### General Best Practices - -* Variables for environment-specific values (like project_id) should not be - given defaults. This forces the calling module to provide meaningful values. -* Variables should only have zero-value defaults (like null or empty strings) - where leaving the variable empty is a valid preference which will not be - rejected by the underlying API(s). -* Set good defaults wherever possible. Be opinionated about HPC use cases. -* Follow common variable [naming conventions](#common-settings). - -### Terraform Coding Standards - -Any Terraform based modules in the HPC Toolkit should implement the following -standards: - -* terraform-docs is used to generate README files for each module. -* The first parameter listed under a module should be source (when referring to - an external implementation). -* The order for parameters in inputs should be: - * description - * type - * default -* The order for parameters in outputs should be: - * description - * value diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index 62c8869662..ef7b11add7 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -122,6 +122,24 @@ Use the following settings for spread placement: More information on GPU support in `vm-instance` and other HPC Toolkit modules can be found at [docs/gpu-support.md](../../../docs/gpu-support.md) +## Lifecycle + +The `vm-instance` module will be replaced when the `instance_image` variable is +changed and `terraform apply` is run on the deployment group folder or +`ghpc deploy` is run. However, it will not be automatically replaced if a new +image is created in a family. + +To selectively replace the vm-instance(s), consider running terraform +`apply -replace` such as: + +> See https://developer.hashicorp.com/terraform/cli/commands/plan#replace-address for precise syntax terraform apply -replace=ADDRESS + +```shell +terraform state list +# search for the module ID and resource +terraform apply -replace="address" +``` + ## License @@ -143,9 +161,10 @@ limitations under the License. | Name | Version | |------|---------| -| [terraform](#requirement\_terraform) | >= 0.14.0 | +| [terraform](#requirement\_terraform) | >= 1.2.0 | | [google](#requirement\_google) | >= 4.42 | | [google-beta](#requirement\_google-beta) | >= 4.12 | +| [null](#requirement\_null) | >= 1.0 | ## Providers @@ -153,12 +172,13 @@ limitations under the License. |------|---------| | [google](#provider\_google) | >= 4.42 | | [google-beta](#provider\_google-beta) | >= 4.12 | +| [null](#provider\_null) | >= 1.0 | ## Modules | Name | Source | Version | |------|--------|---------| -| [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | ## Resources @@ -167,6 +187,7 @@ limitations under the License. | [google-beta_google_compute_instance.compute_vm](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_compute_instance) | resource | | [google_compute_disk.boot_disk](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource | | [google_compute_resource_policy.placement_policy](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_resource_policy) | resource | +| [null_resource.image](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [google_compute_image.compute_image](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | ## Inputs @@ -175,6 +196,7 @@ limitations under the License. |------|-------------|------|---------|:--------:| | [add\_deployment\_name\_before\_prefix](#input\_add\_deployment\_name\_before\_prefix) | If true, the names of VMs and disks will always be prefixed with `deployment_name` to enable uniqueness across deployments.
See `name_prefix` for further details on resource naming behavior. | `bool` | `false` | no | | [auto\_delete\_boot\_disk](#input\_auto\_delete\_boot\_disk) | Controls if boot disk should be auto-deleted when instance is deleted. | `bool` | `true` | no | +| [automatic\_restart](#input\_automatic\_restart) | Specifies if the instance should be restarted if it was terminated by Compute Engine (not a user). | `bool` | `null` | no | | [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image that supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no | | [deployment\_name](#input\_deployment\_name) | Name of the deployment, will optionally be used name resources according to `name_prefix` | `string` | n/a | yes | | [disable\_public\_ips](#input\_disable\_public\_ips) | If set to true, instances will not have public IPs | `bool` | `false` | no | diff --git a/modules/compute/vm-instance/main.tf b/modules/compute/vm-instance/main.tf index 73a80351cb..140c2aa91c 100644 --- a/modules/compute/vm-instance/main.tf +++ b/modules/compute/vm-instance/main.tf @@ -43,9 +43,15 @@ locals { # both of these must be false if either compact placement or preemptible/spot instances are used # automatic restart is tolerant of GPUs while on host maintenance is not - automatic_restart = local.compact_placement || var.spot ? false : null + automatic_restart_default = local.compact_placement || var.spot ? false : null on_host_maintenance_default = local.compact_placement || var.spot || local.gpu_attached ? "TERMINATE" : "MIGRATE" + automatic_restart = ( + var.automatic_restart != null + ? var.automatic_restart + : local.automatic_restart_default + ) + on_host_maintenance = ( var.on_host_maintenance != null ? var.on_host_maintenance @@ -85,6 +91,13 @@ data "google_compute_image" "compute_image" { project = var.instance_image.project } +resource "null_resource" "image" { + triggers = { + image = var.instance_image.family, + project = var.instance_image.project + } +} + resource "google_compute_disk" "boot_disk" { project = var.project_id @@ -96,6 +109,14 @@ resource "google_compute_disk" "boot_disk" { size = var.disk_size_gb labels = local.labels zone = var.zone + + lifecycle { + replace_triggered_by = [null_resource.image] + + ignore_changes = [ + image + ] + } } resource "google_compute_resource_policy" "placement_policy" { diff --git a/modules/compute/vm-instance/startup_from_network_storage.tf b/modules/compute/vm-instance/startup_from_network_storage.tf index f9e53f44a6..46f913334c 100644 --- a/modules/compute/vm-instance/startup_from_network_storage.tf +++ b/modules/compute/vm-instance/startup_from_network_storage.tf @@ -55,7 +55,7 @@ locals { } module "netstorage_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" labels = local.labels project_id = var.project_id diff --git a/modules/compute/vm-instance/variables.tf b/modules/compute/vm-instance/variables.tf index 8632a1c260..e513dfcf70 100644 --- a/modules/compute/vm-instance/variables.tf +++ b/modules/compute/vm-instance/variables.tf @@ -245,6 +245,12 @@ variable "guest_accelerator" { default = null } +variable "automatic_restart" { + description = "Specifies if the instance should be restarted if it was terminated by Compute Engine (not a user)." + type = bool + default = null +} + variable "on_host_maintenance" { description = "Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE`" type = string diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 8c5e31c035..9adc408492 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -25,13 +25,16 @@ terraform { source = "hashicorp/google-beta" version = ">= 4.12" } + null = { + version = ">= 1.0" + } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.20.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.20.0" } - required_version = ">= 0.14.0" + required_version = ">= 1.2.0" } diff --git a/modules/file-system/filestore/README.md b/modules/file-system/filestore/README.md index 333122b8b5..74383a5252 100644 --- a/modules/file-system/filestore/README.md +++ b/modules/file-system/filestore/README.md @@ -172,6 +172,8 @@ No modules. | Name | Description | |------|-------------| +| [capacity\_gb](#output\_capacity\_gb) | File share capacity in GiB. | +| [filestore\_id](#output\_filestore\_id) | An identifier for the resource with format `projects/{{project}}/locations/{{location}}/instances/{{name}}` | | [install\_nfs\_client](#output\_install\_nfs\_client) | Script for installing NFS client | | [install\_nfs\_client\_runner](#output\_install\_nfs\_client\_runner) | Runner to install NFS client using the startup-script module | | [mount\_runner](#output\_mount\_runner) | Runner to mount the file-system using an ansible playbook. The startup-script
module will automatically handle installation of ansible.
- id: example-startup-script
source: modules/scripts/startup-script
settings:
runners:
- $(your-fs-id.mount\_runner)
... | diff --git a/modules/file-system/filestore/outputs.tf b/modules/file-system/filestore/outputs.tf index e9a546c316..bd9126798c 100644 --- a/modules/file-system/filestore/outputs.tf +++ b/modules/file-system/filestore/outputs.tf @@ -50,3 +50,13 @@ output "mount_runner" { EOT value = local.mount_runner } + +output "filestore_id" { + description = "An identifier for the resource with format `projects/{{project}}/locations/{{location}}/instances/{{name}}`" + value = google_filestore_instance.filestore_instance.id +} + +output "capacity_gb" { + description = "File share capacity in GiB." + value = google_filestore_instance.filestore_instance.file_shares[0].capacity_gb +} diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf index 2ad4eee7d1..779bfb96cf 100644 --- a/modules/file-system/filestore/versions.tf +++ b/modules/file-system/filestore/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.20.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/file-system/pre-existing-network-storage/README.md b/modules/file-system/pre-existing-network-storage/README.md index f2d9b8031c..2580169785 100644 --- a/modules/file-system/pre-existing-network-storage/README.md +++ b/modules/file-system/pre-existing-network-storage/README.md @@ -81,7 +81,7 @@ Both of these steps are automatically handled with the use of the `use` command in a selection of HPC Toolkit modules. See the [compatibility matrix][matrix] in the network storage doc for a complete list of supported modules. -[matrix]: ../../../../docs/network_storage.md#compatibility-matrix +[matrix]: ../../../docs/network_storage.md#compatibility-matrix ## License diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf index 17a7c1a0b2..2341c79401 100644 --- a/modules/monitoring/dashboard/versions.tf +++ b/modules/monitoring/dashboard/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf index d952b61492..b763751779 100644 --- a/modules/network/pre-existing-vpc/versions.tf +++ b/modules/network/pre-existing-vpc/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/network/vpc/README.md b/modules/network/vpc/README.md index 1d74a796d9..2dbc21e02e 100644 --- a/modules/network/vpc/README.md +++ b/modules/network/vpc/README.md @@ -184,12 +184,15 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [additional\_subnetworks](#input\_additional\_subnetworks) | DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions | `list(map(string))` | `null` | no | +| [allowed\_ssh\_ip\_ranges](#input\_allowed\_ssh\_ip\_ranges) | A list of CIDR IP ranges from which to allow ssh access | `list(string)` | `[]` | no | | [default\_primary\_subnetwork\_size](#input\_default\_primary\_subnetwork\_size) | The size, in CIDR bits, of the default primary subnetwork unless explicitly defined in var.subnetworks | `number` | `15` | no | | [delete\_default\_internet\_gateway\_routes](#input\_delete\_default\_internet\_gateway\_routes) | If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted | `bool` | `false` | no | | [deployment\_name](#input\_deployment\_name) | The name of the current deployment | `string` | n/a | yes | | [enable\_iap\_rdp\_ingress](#input\_enable\_iap\_rdp\_ingress) | Enable a firewall rule to allow Windows Remote Desktop Protocol access using IAP tunnels | `bool` | `false` | no | | [enable\_iap\_ssh\_ingress](#input\_enable\_iap\_ssh\_ingress) | Enable a firewall rule to allow SSH access using IAP tunnels | `bool` | `true` | no | +| [enable\_iap\_winrm\_ingress](#input\_enable\_iap\_winrm\_ingress) | Enable a firewall rule to allow Windows Remote Management (WinRM) access using IAP tunnels | `bool` | `false` | no | | [enable\_internal\_traffic](#input\_enable\_internal\_traffic) | Enable a firewall rule to allow all internal TCP, UDP, and ICMP traffic within the network | `bool` | `true` | no | +| [extra\_iap\_ports](#input\_extra\_iap\_ports) | A list of TCP ports for which to create firewall rules that enable IAP for TCP forwarding (use dedicated enable\_iap variables for standard ports) | `list(string)` | `[]` | no | | [firewall\_rules](#input\_firewall\_rules) | List of firewall rules | `any` | `[]` | no | | [ips\_per\_nat](#input\_ips\_per\_nat) | The number of IP addresses to allocate for each regional Cloud NAT (set to 0 to disable NAT) | `number` | `2` | no | | [mtu](#input\_mtu) | The network MTU (default: 8896). Recommended values: 0 (use Compute Engine default), 1460 (default outside HPC environments), 1500 (Internet default), or 8896 (for Jumbo packets). Allowed are all values in the range 1300 to 8896, inclusively. | `number` | `8896` | no | diff --git a/modules/network/vpc/main.tf b/modules/network/vpc/main.tf index d672a5baac..c330c35f99 100644 --- a/modules/network/vpc/main.tf +++ b/modules/network/vpc/main.tf @@ -19,9 +19,8 @@ locals { subnetwork_name = var.subnetwork_name == null ? "${var.deployment_name}-primary-subnet" : var.subnetwork_name # define a default subnetwork for cases in which no explicit subnetworks are - # defined in var.primary_subnetwork or var.subnetworks - default_primary_subnetwork_new_bits = coalesce(try(var.primary_subnetwork.new_bits, var.subnetwork_size), var.default_primary_subnetwork_size) - default_primary_subnetwork_cidr_block = cidrsubnet(var.network_address_range, local.default_primary_subnetwork_new_bits, 0) + # defined in var.subnetworks + default_primary_subnetwork_cidr_block = cidrsubnet(var.network_address_range, var.default_primary_subnetwork_size, 0) default_primary_subnetwork = { subnet_name = local.subnetwork_name subnet_ip = local.default_primary_subnetwork_cidr_block @@ -34,22 +33,14 @@ locals { } # Identify user-supplied primary subnetwork - # (1) explicit var.primary_subnetwork - # (2) explicit var.subnetworks[0] - # (3) implicit local default subnetwork - input_primary_subnetwork = try(coalesce( - var.primary_subnetwork, - try(var.subnetworks[0], null) - ), local.default_primary_subnetwork) + # (1) explicit var.subnetworks[0] + # (2) implicit local default subnetwork + input_primary_subnetwork = coalesce(try(var.subnetworks[0], null), local.default_primary_subnetwork) # Identify user-supplied additional subnetworks - # (1) explicit var.additional_subnetworks - # (2) explicit var.subnetworks[1:end] - # (3) empty list - input_additional_subnetworks = try(coalescelist( - var.additional_subnetworks, - try(slice(var.subnetworks, 1, length(var.subnetworks)), []), - ), []) + # (1) explicit var.subnetworks[1:end] + # (2) empty list + input_additional_subnetworks = try(slice(var.subnetworks, 1, length(var.subnetworks)), []) # at this point we have constructed a list of subnetworks but need to extract # user-provided CIDR blocks or calculate them from user-provided new_bits @@ -75,9 +66,15 @@ locals { output_primary_subnetwork_self_link = local.output_primary_subnetwork.self_link output_primary_subnetwork_ip_cidr_range = local.output_primary_subnetwork.ip_cidr_range - allow_iap_ssh_ingress = { - name = "${local.network_name}-fw-allow-iap-ssh-ingress" - description = "allow SSH access via Identity-Aware Proxy" + iap_ports = distinct(concat(compact([ + var.enable_iap_rdp_ingress ? "3389" : "", + var.enable_iap_ssh_ingress ? "22" : "", + var.enable_iap_winrm_ingress ? "5986" : "", + ]), var.extra_iap_ports)) + + allow_iap_ingress = { + name = "${local.network_name}-fw-allow-iap-ingress" + description = "allow TCP access via Identity-Aware Proxy" direction = "INGRESS" priority = null ranges = ["35.235.240.0/20"] @@ -87,7 +84,7 @@ locals { target_service_accounts = null allow = [{ protocol = "tcp" - ports = ["22"] + ports = local.iap_ports }] deny = [] log_config = { @@ -95,19 +92,19 @@ locals { } } - allow_iap_rdp_ingress = { - name = "${local.network_name}-fw-allow-iap-rdp-ingress" - description = "allow Windows remote desktop access via Identity-Aware Proxy" + allow_ssh_ingress = { + name = "${local.network_name}-fw-allow-ssh-ingress" + description = "allow SSH access" direction = "INGRESS" priority = null - ranges = ["35.235.240.0/20"] + ranges = var.allowed_ssh_ip_ranges source_tags = null source_service_accounts = null target_tags = null target_service_accounts = null allow = [{ protocol = "tcp" - ports = ["3389"] + ports = ["22"] }] deny = [] log_config = { @@ -115,7 +112,6 @@ locals { } } - allow_internal_traffic = { name = "${local.network_name}-fw-allow-internal-traffic" priority = null @@ -143,10 +139,11 @@ locals { } } - firewall_rules = concat(var.firewall_rules, + firewall_rules = concat( + var.firewall_rules, + length(var.allowed_ssh_ip_ranges) > 0 ? [local.allow_ssh_ingress] : [], var.enable_internal_traffic ? [local.allow_internal_traffic] : [], - var.enable_iap_rdp_ingress ? [local.allow_iap_rdp_ingress] : [], - var.enable_iap_ssh_ingress ? [local.allow_iap_ssh_ingress] : [], + length(local.iap_ports) > 0 ? [local.allow_iap_ingress] : [] ) } diff --git a/modules/network/vpc/variables.tf b/modules/network/vpc/variables.tf index cad16b009f..f88e5aca01 100644 --- a/modules/network/vpc/variables.tf +++ b/modules/network/vpc/variables.tf @@ -190,12 +190,35 @@ variable "enable_iap_rdp_ingress" { default = false } +variable "enable_iap_winrm_ingress" { + type = bool + description = "Enable a firewall rule to allow Windows Remote Management (WinRM) access using IAP tunnels" + default = false +} + variable "enable_internal_traffic" { type = bool description = "Enable a firewall rule to allow all internal TCP, UDP, and ICMP traffic within the network" default = true } +variable "extra_iap_ports" { + type = list(string) + description = "A list of TCP ports for which to create firewall rules that enable IAP for TCP forwarding (use dedicated enable_iap variables for standard ports)" + default = [] +} + +variable "allowed_ssh_ip_ranges" { + type = list(string) + description = "A list of CIDR IP ranges from which to allow ssh access" + default = [] + + validation { + condition = alltrue([for r in var.allowed_ssh_ip_ranges : can(cidrhost(r, 32))]) + error_message = "Each element of var.allowed_ssh_ip_ranges must be a valid CIDR-formatted IPv4 range." + } +} + variable "firewall_rules" { type = any description = "List of firewall rules" diff --git a/modules/packer/custom-image/README.md b/modules/packer/custom-image/README.md index 3e7f692518..731b463e10 100644 --- a/modules/packer/custom-image/README.md +++ b/modules/packer/custom-image/README.md @@ -241,6 +241,7 @@ No resources. | [communicator](#input\_communicator) | Communicator to use for provisioners that require access to VM ("ssh" or "winrm") | `string` | `null` | no | | [deployment\_name](#input\_deployment\_name) | HPC Toolkit deployment name | `string` | n/a | yes | | [disk\_size](#input\_disk\_size) | Size of disk image in GB | `number` | `null` | no | +| [disk\_type](#input\_disk\_type) | Type of persistent disk to provision | `string` | `"pd-balanced"` | no | | [image\_family](#input\_image\_family) | The family name of the image to be built. Defaults to `deployment_name` | `string` | `null` | no | | [image\_name](#input\_image\_name) | The name of the image to be built. If not supplied, it will be set to image\_family-$ISO\_TIMESTAMP | `string` | `null` | no | | [image\_storage\_locations](#input\_image\_storage\_locations) | Storage location, either regional or multi-regional, where snapshot content is to be stored and only accepts 1 value.
See https://developer.hashicorp.com/packer/plugins/builders/googlecompute#image_storage_locations | `list(string)` | `null` | no | @@ -266,6 +267,7 @@ No resources. | [tags](#input\_tags) | Assign network tags to apply firewall rules to VM instance | `list(string)` | `null` | no | | [use\_iap](#input\_use\_iap) | Use IAP proxy when connecting by SSH | `bool` | `true` | no | | [use\_os\_login](#input\_use\_os\_login) | Use OS Login when connecting by SSH | `bool` | `false` | no | +| [windows\_startup\_ps1](#input\_windows\_startup\_ps1) | A list of strings containing PowerShell scripts which will customize a Windows VM image (requires WinRM communicator) | `list(string)` | `[]` | no | | [wrap\_startup\_script](#input\_wrap\_startup\_script) | Wrap startup script with Packer-generated wrapper | `bool` | `true` | no | | [zone](#input\_zone) | Cloud zone in which to provision image building VM | `string` | n/a | yes | diff --git a/modules/packer/custom-image/image.pkr.hcl b/modules/packer/custom-image/image.pkr.hcl index 6ee3b63554..fa9a372cc5 100644 --- a/modules/packer/custom-image/image.pkr.hcl +++ b/modules/packer/custom-image/image.pkr.hcl @@ -18,9 +18,25 @@ locals { image_name_default = "${local.image_family}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" image_name = var.image_name != null ? var.image_name : local.image_name_default + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.windows_startup_ps1) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap + # construct metadata from startup_script and metadata variables startup_script_metadata = var.startup_script == null ? {} : { startup-script = var.startup_script } - user_management_metadata = { + + linux_user_metadata = { block-project-ssh-keys = "TRUE" shutdown-script = <<-EOT #!/bin/bash @@ -28,23 +44,21 @@ locals { sed -i '/${var.ssh_username}/d' /var/lib/google/google_users EOT } + windows_packer_user = "packer_user" + windows_user_metadata = { + sysprep-specialize-script-cmd = "winrm quickconfig -quiet & net user /add ${local.windows_packer_user} & net localgroup administrators ${local.windows_packer_user} /add & winrm set winrm/config/service/auth @{Basic=\\\"true\\\"}" + windows-shutdown-script-cmd = "net user /delete ${local.windows_packer_user}" + } + user_metadata = local.communicator == "winrm" ? local.windows_user_metadata : local.linux_user_metadata # merge metadata such that var.metadata always overrides user management # metadata but always allow var.startup_script to override var.metadata metadata = merge( - local.user_management_metadata, + local.user_metadata, var.metadata, local.startup_script_metadata, ) - # determine communicator to use and whether to enable Identity-Aware Proxy - no_shell_scripts = length(var.shell_scripts) == 0 - no_ansible_playbooks = length(var.ansible_playbooks) == 0 - no_provisioners = local.no_shell_scripts && local.no_ansible_playbooks - communicator_default = local.no_provisioners ? "none" : "ssh" - communicator = var.communicator == null ? local.communicator_default : var.communicator - use_iap = local.communicator == "none" ? false : var.use_iap - # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) machine_family = local.machine_vals[0] @@ -55,6 +69,12 @@ locals { ? var.on_host_maintenance : local.on_host_maintenance_default ) + + accelerator_type = var.accelerator_type == null ? null : "projects/${var.project_id}/zones/${var.zone}/acceleratorTypes/${var.accelerator_type}" + + winrm_username = local.communicator == "winrm" ? "packer_user" : null + winrm_insecure = local.communicator == "winrm" ? true : null + winrm_use_ssl = local.communicator == "winrm" ? true : null } source "googlecompute" "toolkit_image" { @@ -64,10 +84,11 @@ source "googlecompute" "toolkit_image" { image_family = local.image_family image_labels = var.labels machine_type = var.machine_type - accelerator_type = var.accelerator_type + accelerator_type = local.accelerator_type accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size + disk_type = var.disk_type omit_external_ip = var.omit_external_ip use_internal_ip = var.omit_external_ip subnetwork = var.subnetwork_name @@ -80,6 +101,9 @@ source "googlecompute" "toolkit_image" { tags = var.tags use_iap = local.use_iap use_os_login = var.use_os_login + winrm_username = local.winrm_username + winrm_insecure = local.winrm_insecure + winrm_use_ssl = local.winrm_use_ssl zone = var.zone labels = var.labels metadata = local.metadata @@ -107,6 +131,15 @@ build { } } + # provisioner "powershell" blocks + dynamic "provisioner" { + labels = ["powershell"] + for_each = var.windows_startup_ps1 + content { + inline = split("\n", provisioner.value) + } + } + # provisioner "ansible-local" blocks # this installs custom roles/collections from ansible-galaxy in /home/packer # which will be removed at the end; consider modifying /etc/ansible/ansible.cfg diff --git a/modules/packer/custom-image/variables.pkr.hcl b/modules/packer/custom-image/variables.pkr.hcl index 11a887bc85..9589669153 100644 --- a/modules/packer/custom-image/variables.pkr.hcl +++ b/modules/packer/custom-image/variables.pkr.hcl @@ -34,6 +34,12 @@ variable "disk_size" { default = null } +variable "disk_type" { + description = "Type of persistent disk to provision" + type = string + default = "pd-balanced" +} + variable "zone" { description = "Cloud zone in which to provision image building VM" type = string @@ -150,6 +156,12 @@ variable "shell_scripts" { default = [] } +variable "windows_startup_ps1" { + description = "A list of strings containing PowerShell scripts which will customize a Windows VM image (requires WinRM communicator)" + type = list(string) + default = [] +} + variable "startup_script" { description = "Startup script (as raw string) used to build the custom Linux VM image (overridden by var.startup_script_file if both are set)" type = string diff --git a/modules/scheduler/batch-job-template/README.md b/modules/scheduler/batch-job-template/README.md index f469e95322..b4ab8a7939 100644 --- a/modules/scheduler/batch-job-template/README.md +++ b/modules/scheduler/batch-job-template/README.md @@ -135,7 +135,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| | [instance\_template](#module\_instance\_template) | terraform-google-modules/vm/google//modules/instance_template | ~> 8.0 | -| [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | ## Resources @@ -150,7 +150,7 @@ limitations under the License. | [deployment\_name](#input\_deployment\_name) | Name of the deployment, used for the job\_id | `string` | n/a | yes | | [enable\_public\_ips](#input\_enable\_public\_ips) | If set to true, instances will have public IPs | `bool` | `true` | no | | [gcloud\_version](#input\_gcloud\_version) | The version of the gcloud cli being used. Used for output instructions. Valid inputs are `"alpha"`, `"beta"` and "" (empty string for default version) | `string` | `"alpha"` | no | -| [image](#input\_image) | Google Cloud Batch compute node image. Ignored if `instance_template` is provided. |
object({
family = string
project = string
})
|
{
"family": "hpc-centos-7",
"project": "cloud-hpc-image-public"
}
| no | +| [image](#input\_image) | Google Cloud Batch compute node image. Ignored if `instance_template` is provided. |
object({
family = string
project = string
})
|
{
"family": "batch-hpc-centos-7-official",
"project": "batch-custom-image"
}
| no | | [instance\_template](#input\_instance\_template) | Compute VM instance template self-link to be used for Google Cloud Batch compute node. If provided, a number of other variables will be ignored as noted by `Ignored if instance_template is provided` in descriptions. | `string` | `null` | no | | [job\_filename](#input\_job\_filename) | The filename of the generated job template file. Will default to `cloud-batch-.json` if not specified | `string` | `null` | no | | [job\_id](#input\_job\_id) | An id for the Google Cloud Batch job. Used for output instructions and file naming. Defaults to deployment name. | `string` | `null` | no | diff --git a/modules/scheduler/batch-job-template/startup_from_network_storage.tf b/modules/scheduler/batch-job-template/startup_from_network_storage.tf index f9e53f44a6..46f913334c 100644 --- a/modules/scheduler/batch-job-template/startup_from_network_storage.tf +++ b/modules/scheduler/batch-job-template/startup_from_network_storage.tf @@ -55,7 +55,7 @@ locals { } module "netstorage_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" labels = local.labels project_id = var.project_id diff --git a/modules/scheduler/batch-job-template/variables.tf b/modules/scheduler/batch-job-template/variables.tf index aa0b053112..19f8426030 100644 --- a/modules/scheduler/batch-job-template/variables.tf +++ b/modules/scheduler/batch-job-template/variables.tf @@ -173,8 +173,8 @@ variable "image" { project = string }) default = { - family = "hpc-centos-7" - project = "cloud-hpc-image-public" + family = "batch-hpc-centos-7-official" + project = "batch-custom-image" } } diff --git a/modules/scheduler/batch-login-node/README.md b/modules/scheduler/batch-login-node/README.md index 122d574350..8be40bd1cb 100644 --- a/modules/scheduler/batch-login-node/README.md +++ b/modules/scheduler/batch-login-node/README.md @@ -89,7 +89,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [login\_startup\_script](#module\_login\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [login\_startup\_script](#module\_login\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | ## Resources diff --git a/modules/scheduler/batch-login-node/main.tf b/modules/scheduler/batch-login-node/main.tf index 3ecbca3b97..c3fe8d5326 100644 --- a/modules/scheduler/batch-login-node/main.tf +++ b/modules/scheduler/batch-login-node/main.tf @@ -24,17 +24,7 @@ data "google_compute_instance_template" "batch_instance_template" { } locals { - - # Handle directly created job data (deprecated). All of job_id, job_template_contents and job_filename must be set. - default_job_data = var.job_template_contents == null || var.job_id == null || var.job_filename == null ? [] : [{ - id = var.job_id - filename = var.job_filename - template_contents = var.job_template_contents - }] - - job_data = concat(local.default_job_data, var.job_data) - - job_template_runners = [for job in local.job_data : { + job_template_runners = [for job in var.job_data : { content = job.template_contents destination = "${var.batch_job_directory}/${job.filename}" type = "data" @@ -51,7 +41,7 @@ locals { login_metadata = merge(local.instance_template_metadata, local.startup_metadata, local.oslogin_metadata) - batch_command_instructions = join("\n", [for job in local.job_data : <<-EOT + batch_command_instructions = join("\n", [for job in var.job_data : <<-EOT ## For job: ${job.id} ## Submit your job from login node: @@ -104,7 +94,7 @@ locals { } module "login_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" labels = local.labels project_id = var.project_id deployment_name = var.deployment_name diff --git a/modules/scheduler/batch-login-node/versions.tf b/modules/scheduler/batch-login-node/versions.tf index 565be47f1f..b1927fdf65 100644 --- a/modules/scheduler/batch-login-node/versions.tf +++ b/modules/scheduler/batch-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:batch-login-node/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:batch-login-node/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf index e48ca9e18c..38824dcba8 100644 --- a/modules/scripts/startup-script/versions.tf +++ b/modules/scripts/startup-script/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.20.0" } required_version = ">= 0.14.0" diff --git a/pkg/config/config.go b/pkg/config/config.go index f4c477ffae..eec9ecf95a 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -20,8 +20,8 @@ import ( "fmt" "io/ioutil" "log" - "os" "regexp" + "sort" "strings" "github.com/pkg/errors" @@ -47,18 +47,14 @@ var errorMessages = map[string]string{ "yamlMarshalError": "failed to export the configuration to a blueprint yaml file", "fileSaveError": "failed to write the expanded yaml", // expand - "missingSetting": "a required setting is missing from a module", - "globalLabelType": "deployment variable 'labels' are not a map", - "settingsLabelType": "labels in module settings are not a map", - "invalidVar": "invalid variable definition in", - "invalidMod": "invalid module reference", - "invalidDeploymentRef": "invalid deployment-wide reference (only \"vars\") is supported)", - "varNotFound": "Could not find source of variable", - "intergroupOrder": "References to outputs from other groups must be to earlier groups", - "referenceWrongGroup": "Reference specified the wrong group for the module", - "noOutput": "Output not found for a variable", - "groupNotFound": "The group ID was not found", - "cannotUsePacker": "Packer modules cannot be used by other modules", + "missingSetting": "a required setting is missing from a module", + "settingsLabelType": "labels in module settings are not a map", + "invalidVar": "invalid variable definition in", + "invalidMod": "invalid module reference", + "varNotFound": "Could not find source of variable", + "intergroupOrder": "References to outputs from other groups must be to earlier groups", + "noOutput": "Output not found for a variable", + "cannotUsePacker": "Packer modules cannot be used by other modules", // validator "emptyID": "a module id cannot be empty", "emptySource": "a module source cannot be empty", @@ -69,9 +65,7 @@ var errorMessages = map[string]string{ "duplicateGroup": "group names must be unique", "duplicateID": "module IDs must be unique", "emptyGroupName": "group name must be set for each deployment group", - "illegalChars": "invalid character(s) found in group name", "invalidOutput": "requested output was not found in the module", - "varNotDefined": "variable not defined", "valueNotString": "value was not of type string", "valueEmptyString": "value is an empty string", "labelNameReqs": "name must begin with a lowercase letter, can only contain lowercase letters, numeric characters, underscores and dashes, and must be between 1 and 63 characters long", @@ -92,8 +86,10 @@ func (n GroupName) Validate() error { if n == "" { return errors.New(errorMessages["emptyGroupName"]) } - if hasIllegalChars(string(n)) { - return fmt.Errorf("%s %s", errorMessages["illegalChars"], n) + + if !regexp.MustCompile(`^\w(-*\w)*$`).MatchString(string(n)) { + return fmt.Errorf("invalid character(s) found in group name %q.\n"+ + "Allowed : alphanumeric, '_', and '-'; can not start/end with '-'", n) } return nil } @@ -101,7 +97,7 @@ func (n GroupName) Validate() error { // DeploymentGroup defines a group of Modules that are all executed together type DeploymentGroup struct { Name GroupName `yaml:"group"` - TerraformBackend TerraformBackend `yaml:"terraform_backend"` + TerraformBackend TerraformBackend `yaml:"terraform_backend,omitempty"` Modules []Module `yaml:"modules"` Kind ModuleKind } @@ -182,24 +178,6 @@ var TerraformKind = ModuleKind{kind: "terraform"} // PackerKind is the kind for Packer modules (should be treated as const) var PackerKind = ModuleKind{kind: "packer"} -// UnmarshalYAML implements a custom unmarshaler from YAML string to ModuleKind -func (mk *ModuleKind) UnmarshalYAML(n *yaml.Node) error { - var kind string - const yamlErrorMsg string = "block beginning at line %d: %s" - - err := n.Decode(&kind) - if err == nil && IsValidModuleKind(kind) { - mk.kind = kind - return nil - } - return fmt.Errorf(yamlErrorMsg, n.Line, "kind must be \"packer\" or \"terraform\" or removed from YAML") -} - -// MarshalYAML implements a custom marshaler from ModuleKind to YAML string -func (mk ModuleKind) MarshalYAML() (interface{}, error) { - return mk.String(), nil -} - // IsValidModuleKind ensures that the user has specified a supported kind func IsValidModuleKind(kind string) bool { return kind == TerraformKind.String() || kind == PackerKind.String() || @@ -259,8 +237,8 @@ func (v validatorName) String() string { type validatorConfig struct { Validator string - Inputs Dict - Skip bool + Inputs Dict `yaml:"inputs,omitempty"` + Skip bool `yaml:"skip,omitempty"` } func (v *validatorConfig) check(name validatorName, requiredInputs []string) error { @@ -292,26 +270,20 @@ func (v *validatorConfig) check(name validatorName, requiredInputs []string) err // ModuleID is a unique identifier for a module in a blueprint type ModuleID string +// ModuleIDs is a list of ModuleID +type ModuleIDs []ModuleID + // Module stores YAML definition of an HPC cluster component defined in a blueprint type Module struct { - Source string - // DeploymentSource - is source to be used for this module in written deployment. - DeploymentSource string `yaml:"-"` // "-" prevents user from specifying it - Kind ModuleKind - ID ModuleID - Use []ModuleID - WrapSettingsWith map[string][]string - Outputs []modulereader.OutputInfo `yaml:"outputs,omitempty"` - Settings Dict - RequiredApis map[string][]string `yaml:"required_apis"` -} - -// createWrapSettingsWith ensures WrapSettingsWith field is not nil, if it is -// a new map is created. -func (m *Module) createWrapSettingsWith() { - if m.WrapSettingsWith == nil { - m.WrapSettingsWith = make(map[string][]string) - } + Source string + Kind ModuleKind + ID ModuleID + Use ModuleIDs `yaml:"use,omitempty"` + Outputs []modulereader.OutputInfo `yaml:"outputs,omitempty"` + Settings Dict `yaml:"settings,omitempty"` + // DEPRECATED fields, keep in the struct for backwards compatibility + RequiredApis interface{} `yaml:"required_apis,omitempty"` + WrapSettingsWith interface{} `yaml:"wrapsettingswith,omitempty"` } // InfoOrDie returns the ModuleInfo for the module or panics @@ -328,13 +300,13 @@ func (m Module) InfoOrDie() modulereader.ModuleInfo { // unless it has been set to a non-default value; the implementation as an // integer is primarily for internal purposes even if it can be set in blueprint type Blueprint struct { - BlueprintName string `yaml:"blueprint_name"` - GhpcVersion string `yaml:"ghpc_version,omitempty"` - Validators []validatorConfig - ValidationLevel int `yaml:"validation_level,omitempty"` + BlueprintName string `yaml:"blueprint_name"` + GhpcVersion string `yaml:"ghpc_version,omitempty"` + Validators []validatorConfig `yaml:"validators,omitempty"` + ValidationLevel int `yaml:"validation_level,omitempty"` Vars Dict DeploymentGroups []DeploymentGroup `yaml:"deployment_groups"` - TerraformBackendDefaults TerraformBackend `yaml:"terraform_backend_defaults"` + TerraformBackendDefaults TerraformBackend `yaml:"terraform_backend_defaults,omitempty"` } // DeploymentConfig is a container for the imported YAML data and supporting data for @@ -350,9 +322,15 @@ func (dc *DeploymentConfig) ExpandConfig() error { } dc.Config.setGlobalLabels() dc.Config.addKindToModules() - dc.validateConfig() - dc.expand() - dc.validate() + if err := dc.validateConfig(); err != nil { + return err + } + if err := dc.expand(); err != nil { + return err + } + if err := dc.validate(); err != nil { + return err + } return nil } @@ -364,17 +342,17 @@ func (bp *Blueprint) setGlobalLabels() { // listUnusedModules provides a list modules that are in the // "use" field, but not actually used. -func (m Module) listUnusedModules() []ModuleID { +func (m Module) listUnusedModules() ModuleIDs { used := map[ModuleID]bool{} // Recurse through objects/maps/lists checking each element for having `ProductOfModuleUse` mark. cty.Walk(m.Settings.AsObject(), func(p cty.Path, v cty.Value) (bool, error) { - if mark, has := HasMark[ProductOfModuleUse](v); has { - used[mark.Module] = true + for _, mod := range IsProductOfModuleUse(v) { + used[mod] = true } return true, nil }) - unused := []ModuleID{} + unused := ModuleIDs{} for _, w := range m.Use { if !used[w] { unused = append(unused, w) @@ -440,39 +418,17 @@ func (bp Blueprint) checkMovedModules() error { } // NewDeploymentConfig is a constructor for DeploymentConfig -func NewDeploymentConfig(configFilename string) (DeploymentConfig, error) { - blueprint, err := importBlueprint(configFilename) - if err != nil { - return DeploymentConfig{}, err - } - return DeploymentConfig{Config: blueprint}, nil -} - -// ImportBlueprint imports the blueprint configuration provided. -func importBlueprint(blueprintFilename string) (Blueprint, error) { - var blueprint Blueprint - - reader, err := os.Open(blueprintFilename) +func NewDeploymentConfig(configFilename string) (DeploymentConfig, YamlCtx, error) { + bp, ctx, err := importBlueprint(configFilename) if err != nil { - return blueprint, fmt.Errorf("%s, filename=%s: %v", - errorMessages["fileLoadError"], blueprintFilename, err) + return DeploymentConfig{}, YamlCtx{}, err } - - decoder := yaml.NewDecoder(reader) - decoder.KnownFields(true) - - if err = decoder.Decode(&blueprint); err != nil { - return blueprint, fmt.Errorf(errorMessages["yamlUnmarshalError"], - blueprintFilename, err) - } - // if the validation level has been explicitly set to an invalid value // in YAML blueprint then silently default to validationError - if !isValidValidationLevel(blueprint.ValidationLevel) { - blueprint.ValidationLevel = ValidationError + if !isValidValidationLevel(bp.ValidationLevel) { + bp.ValidationLevel = ValidationError } - - return blueprint, nil + return DeploymentConfig{Config: bp}, ctx, nil } // ExportBlueprint exports the internal representation of a blueprint config @@ -534,14 +490,16 @@ func checkModulesAndGroups(groups []DeploymentGroup) error { if err := grp.Name.Validate(); err != nil { return err } + pg := Root.Groups.At(ig) if seenGroups[grp.Name] { - return fmt.Errorf("%s: %s used more than once", errorMessages["duplicateGroup"], grp.Name) + return BpError{pg.Name, fmt.Errorf("%s: %s used more than once", errorMessages["duplicateGroup"], grp.Name)} } seenGroups[grp.Name] = true - for _, mod := range grp.Modules { + for im, mod := range grp.Modules { + pm := pg.Modules.At(im) if seenMod[mod.ID] { - return fmt.Errorf("%s: %s used more than once", errorMessages["duplicateID"], mod.ID) + return BpError{pm.ID, fmt.Errorf("%s: %s used more than once", errorMessages["duplicateID"], mod.ID)} } seenMod[mod.ID] = true @@ -550,9 +508,11 @@ func checkModulesAndGroups(groups []DeploymentGroup) error { grp.Kind = mod.Kind } if grp.Kind != mod.Kind { - return fmt.Errorf( - "mixing modules of differing kinds in a deployment group is not supported: deployment group %s, got %s and %s", - grp.Name, grp.Kind, mod.Kind) + return BpError{ + pm.Kind, + fmt.Errorf( + "mixing modules of differing kinds in a deployment group is not supported: deployment group %s, got %s and %s", + grp.Name, grp.Kind, mod.Kind)} } } } @@ -602,45 +562,46 @@ func checkBackends(bp Blueprint) error { } // validateConfig runs a set of simple early checks on the imported input YAML -func (dc *DeploymentConfig) validateConfig() { - _, err := dc.Config.DeploymentName() - if err != nil { - log.Fatal(err) +func (dc *DeploymentConfig) validateConfig() error { + + if _, err := dc.Config.DeploymentName(); err != nil { + return err } - err = dc.Config.checkBlueprintName() - if err != nil { - log.Fatal(err) + + if err := dc.Config.checkBlueprintName(); err != nil { + return err } - if err = dc.validateVars(); err != nil { - log.Fatal(err) + if err := dc.validateVars(); err != nil { + return err } - if err = dc.Config.checkModulesInfo(); err != nil { - log.Fatal(err) + if err := dc.Config.checkModulesInfo(); err != nil { + return err } - if err = checkModulesAndGroups(dc.Config.DeploymentGroups); err != nil { - log.Fatal(err) + if err := checkModulesAndGroups(dc.Config.DeploymentGroups); err != nil { + return err } // checkPackerGroups must come after checkModulesAndGroups, in which group // Kind is set and aligned with module Kinds - if err = checkPackerGroups(dc.Config.DeploymentGroups); err != nil { - log.Fatal(err) + if err := checkPackerGroups(dc.Config.DeploymentGroups); err != nil { + return err } - if err = checkUsedModuleNames(dc.Config); err != nil { - log.Fatal(err) + if err := checkUsedModuleNames(dc.Config); err != nil { + return err } - if err = checkBackends(dc.Config); err != nil { - log.Fatal(err) + if err := checkBackends(dc.Config); err != nil { + return err } - if err = checkModuleSettings(dc.Config); err != nil { - log.Fatal(err) + if err := checkModuleSettings(dc.Config); err != nil { + return err } + return nil } // SkipValidator marks validator(s) as skipped, @@ -668,7 +629,7 @@ type InputValueError struct { cause string } -func (err *InputValueError) Error() string { +func (err InputValueError) Error() string { return fmt.Sprintf("%v input error, cause: %v", err.inputKey, err.cause) } @@ -692,34 +653,35 @@ func isValidLabelValue(value string) bool { // DeploymentName returns the deployment_name from the config and does approperate checks. func (bp *Blueprint) DeploymentName() (string, error) { if !bp.Vars.Has("deployment_name") { - return "", &InputValueError{ + return "", InputValueError{ inputKey: "deployment_name", cause: errorMessages["varNotFound"], } } + path := Root.Vars.Dot("deployment_name") v := bp.Vars.Get("deployment_name") if v.Type() != cty.String { - return "", &InputValueError{ + return "", BpError{path, InputValueError{ inputKey: "deployment_name", cause: errorMessages["valueNotString"], - } + }} } s := v.AsString() if len(s) == 0 { - return "", &InputValueError{ + return "", BpError{path, InputValueError{ inputKey: "deployment_name", cause: errorMessages["valueEmptyString"], - } + }} } // Check that deployment_name is a valid label if !isValidLabelValue(s) { - return "", &InputValueError{ + return "", BpError{path, InputValueError{ inputKey: "deployment_name", cause: errorMessages["labelValueReqs"], - } + }} } return s, nil @@ -728,28 +690,52 @@ func (bp *Blueprint) DeploymentName() (string, error) { // checkBlueprintName returns an error if blueprint_name does not comply with // requirements for correct GCP label values. func (bp *Blueprint) checkBlueprintName() error { - if len(bp.BlueprintName) == 0 { - return &InputValueError{ + return BpError{Root.BlueprintName, InputValueError{ inputKey: "blueprint_name", cause: errorMessages["valueEmptyString"], - } + }} } if !isValidLabelValue(bp.BlueprintName) { - return &InputValueError{ + return BpError{Root.BlueprintName, InputValueError{ inputKey: "blueprint_name", cause: errorMessages["labelValueReqs"], - } + }} } return nil } -// ProductOfModuleUse is a "mark" applied to values in Module.Settings if -// this value was modified as a result of applying `use`. -type ProductOfModuleUse struct { - Module ModuleID +// productOfModuleUseMark is a "mark" applied to values that are result of `use`. +// Should not be used directly, use AsProductOfModuleUse and IsProductOfModuleUse instead. +type productOfModuleUseMark struct { + mods string +} + +// AsProductOfModuleUse marks a value as a result of `use` of given modules. +func AsProductOfModuleUse(v cty.Value, mods ...ModuleID) cty.Value { + s := make([]string, len(mods)) + for i, m := range mods { + s[i] = string(m) + } + sort.Strings(s) + return v.Mark(productOfModuleUseMark{strings.Join(s, ",")}) +} + +// IsProductOfModuleUse returns list of modules that contributed (by `use`) to this value. +func IsProductOfModuleUse(v cty.Value) []ModuleID { + mark, marked := HasMark[productOfModuleUseMark](v) + if !marked { + return []ModuleID{} + } + + s := strings.Split(mark.mods, ",") + mods := make([]ModuleID, len(s)) + for i, m := range s { + mods[i] = ModuleID(m) + } + return mods } // WalkModules walks all modules in the blueprint and calls the walker function diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 23279f184f..2b987e9061 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -61,10 +61,9 @@ deployment_groups: `) testModules = []Module{ { - Source: "./modules/network/vpc", - Kind: TerraformKind, - ID: "vpc", - WrapSettingsWith: make(map[string][]string), + Source: "./modules/network/vpc", + Kind: TerraformKind, + ID: "vpc", Settings: NewDict(map[string]cty.Value{ "network_name": cty.StringVal("$\"${var.deployment_name}_net\""), "project_id": cty.StringVal("project_name"), @@ -165,18 +164,14 @@ func setTestModuleInfo(mod Module, info modulereader.ModuleInfo) { func getDeploymentConfigForTest() DeploymentConfig { testModule := Module{ - Source: "testSource", - Kind: TerraformKind, - ID: "testModule", - Use: []ModuleID{}, - WrapSettingsWith: make(map[string][]string), + Source: "testSource", + Kind: TerraformKind, + ID: "testModule", } testModuleWithLabels := Module{ - Source: "./role/source", - ID: "testModuleWithLabels", - Kind: TerraformKind, - Use: []ModuleID{}, - WrapSettingsWith: make(map[string][]string), + Source: "./role/source", + ID: "testModuleWithLabels", + Kind: TerraformKind, Settings: NewDict(map[string]cty.Value{ "moduleLabel": cty.StringVal("moduleLabelValue"), }), @@ -187,7 +182,6 @@ func getDeploymentConfigForTest() DeploymentConfig { } testBlueprint := Blueprint{ BlueprintName: "simple", - Validators: nil, Vars: NewDict(map[string]cty.Value{ "deployment_name": cty.StringVal("deployment_name"), "project_id": cty.StringVal("test-project"), @@ -204,8 +198,7 @@ func getDeploymentConfigForTest() DeploymentConfig { setTestModuleInfo(testModule, testModuleInfo) setTestModuleInfo(testModuleWithLabels, testModuleInfo) - // the next two steps simulate relevant steps in ghpc expand - dc.addMetadataToModules() + // the next step simulates relevant step in ghpc expand dc.addDefaultValidators() return dc @@ -224,6 +217,7 @@ func getBasicDeploymentConfigWithTestModule() DeploymentConfig { }, }, } + return DeploymentConfig{ Config: Blueprint{ BlueprintName: "simple", @@ -322,7 +316,7 @@ func getMultiGroupDeploymentConfig() DeploymentConfig { matchingIntragroupName1: cty.StringVal("explicit-intra-value"), matchingIntragroupName2: ModuleRef(mod0.ID, matchingIntragroupName2).AsExpression().AsValue(), }), - Use: []ModuleID{mod0.ID}, + Use: ModuleIDs{mod0.ID}, } setTestModuleInfo(mod1, testModuleInfo1) @@ -335,7 +329,7 @@ func getMultiGroupDeploymentConfig() DeploymentConfig { ID: "TestModule2", Kind: TerraformKind, Source: testModuleSource2, - Use: []ModuleID{mod0.ID}, + Use: ModuleIDs{mod0.ID}, } setTestModuleInfo(mod2, testModuleInfo2) @@ -356,7 +350,6 @@ func getMultiGroupDeploymentConfig() DeploymentConfig { }, } - dc.addMetadataToModules() dc.addDefaultValidators() return dc } @@ -393,20 +386,25 @@ func getDeploymentConfigWithTestModuleEmptyKind() DeploymentConfig { // config.go func (s *MySuite) TestExpandConfig(c *C) { dc := getBasicDeploymentConfigWithTestModule() - dc.ExpandConfig() + for v := range dc.getValidators() { // skip all validators + dc.Config.Validators = append( + dc.Config.Validators, + validatorConfig{Validator: v, Skip: true}) + } + c.Check(dc.ExpandConfig(), IsNil) } func (s *MySuite) TestCheckModulesAndGroups(c *C) { { // Duplicate module name same group g := DeploymentGroup{Name: "ice", Modules: []Module{{ID: "pony"}, {ID: "pony"}}} err := checkModulesAndGroups([]DeploymentGroup{g}) - c.Check(err, ErrorMatches, "module IDs must be unique: pony used more than once") + c.Check(err, ErrorMatches, ".*pony used more than once") } { // Duplicate module name different groups ice := DeploymentGroup{Name: "ice", Modules: []Module{{ID: "pony"}}} fire := DeploymentGroup{Name: "fire", Modules: []Module{{ID: "pony"}}} err := checkModulesAndGroups([]DeploymentGroup{ice, fire}) - c.Check(err, ErrorMatches, "module IDs must be unique: pony used more than once") + c.Check(err, ErrorMatches, ".*pony used more than once") } { // Mixing module kinds g := DeploymentGroup{Name: "ice", Modules: []Module{ @@ -414,32 +412,32 @@ func (s *MySuite) TestCheckModulesAndGroups(c *C) { {ID: "zebra", Kind: TerraformKind}, }} err := checkModulesAndGroups([]DeploymentGroup{g}) - c.Check(err, ErrorMatches, "mixing modules of differing kinds in a deployment group is not supported: deployment group ice, got packer and terraform") + c.Check(err, ErrorMatches, ".*got packer and terraform") } } func (s *MySuite) TestListUnusedModules(c *C) { { // No modules in "use" m := Module{ID: "m"} - c.Check(m.listUnusedModules(), DeepEquals, []ModuleID{}) + c.Check(m.listUnusedModules(), DeepEquals, ModuleIDs{}) } { // Useful m := Module{ ID: "m", - Use: []ModuleID{"w"}, + Use: ModuleIDs{"w"}, Settings: NewDict(map[string]cty.Value{ - "x": cty.True.Mark(ProductOfModuleUse{"w"})})} - c.Check(m.listUnusedModules(), DeepEquals, []ModuleID{}) + "x": AsProductOfModuleUse(cty.True, "w")})} + c.Check(m.listUnusedModules(), DeepEquals, ModuleIDs{}) } { // Unused m := Module{ ID: "m", - Use: []ModuleID{"w", "u"}, + Use: ModuleIDs{"w", "u"}, Settings: NewDict(map[string]cty.Value{ - "x": cty.True.Mark(ProductOfModuleUse{"w"})})} - c.Check(m.listUnusedModules(), DeepEquals, []ModuleID{"u"}) + "x": AsProductOfModuleUse(cty.True, "w")})} + c.Check(m.listUnusedModules(), DeepEquals, ModuleIDs{"u"}) } } @@ -518,7 +516,7 @@ func (s *MySuite) TestGetModule(c *C) { func (s *MySuite) TestDeploymentName(c *C) { bp := Blueprint{} - var e *InputValueError + var e InputValueError // Is deployment_name a valid string? bp.Vars.Set("deployment_name", cty.StringVal("yellow")) @@ -565,7 +563,7 @@ func (s *MySuite) TestDeploymentName(c *C) { func (s *MySuite) TestCheckBlueprintName(c *C) { dc := getDeploymentConfigForTest() - var e *InputValueError + var e InputValueError // Is blueprint_name a valid string? err := dc.Config.checkBlueprintName() @@ -601,21 +599,21 @@ func (s *MySuite) TestNewBlueprint(c *C) { dc := getDeploymentConfigForTest() outFile := filepath.Join(tmpTestDir, "out_TestNewBlueprint.yaml") c.Assert(dc.ExportBlueprint(outFile), IsNil) - newDC, err := NewDeploymentConfig(outFile) + newDC, _, err := NewDeploymentConfig(outFile) c.Assert(err, IsNil) c.Assert(dc.Config, DeepEquals, newDC.Config) } func (s *MySuite) TestImportBlueprint(c *C) { - obtainedBlueprint, err := importBlueprint(simpleYamlFilename) + bp, _, err := importBlueprint(simpleYamlFilename) c.Assert(err, IsNil) - c.Assert(obtainedBlueprint.BlueprintName, + c.Assert(bp.BlueprintName, Equals, expectedSimpleBlueprint.BlueprintName) c.Assert( - obtainedBlueprint.Vars.Get("labels"), + bp.Vars.Get("labels"), DeepEquals, expectedSimpleBlueprint.Vars.Get("labels")) - c.Assert(obtainedBlueprint.DeploymentGroups[0].Modules[0].ID, + c.Assert(bp.DeploymentGroups[0].Modules[0].ID, Equals, expectedSimpleBlueprint.DeploymentGroups[0].Modules[0].ID) } @@ -725,7 +723,7 @@ dragon: "Lews Therin Telamon"`) file.Close() // should fail on strict unmarshal as field does not match schema - _, err := importBlueprint(filename) + _, _, err := importBlueprint(filename) c.Check(err, NotNil) } @@ -1037,3 +1035,23 @@ func (s *MySuite) TestCheckModuleSettings(c *C) { bp.Vars.Set("zebra", cty.StringVal("stripes")) c.Check(checkModuleSettings(bp), IsNil) } + +func (s *MySuite) TestGroupNameValidate(c *C) { + // Invalid + c.Check(GroupName("").Validate(), NotNil) + c.Check(GroupName("-").Validate(), NotNil) + c.Check(GroupName("-g").Validate(), NotNil) + c.Check(GroupName("g-").Validate(), NotNil) + c.Check(GroupName("g+").Validate(), NotNil) + c.Check(GroupName("a b").Validate(), NotNil) + + // Valid + c.Check(GroupName("g").Validate(), IsNil) + c.Check(GroupName("gg").Validate(), IsNil) + c.Check(GroupName("_g").Validate(), IsNil) + c.Check(GroupName("g_dd").Validate(), IsNil) + c.Check(GroupName("g_dd-ff").Validate(), IsNil) + c.Check(GroupName("g-dd_ff").Validate(), IsNil) + c.Check(GroupName("1").Validate(), IsNil) + c.Check(GroupName("12g").Validate(), IsNil) +} diff --git a/pkg/config/dict.go b/pkg/config/dict.go index f9308f17d0..8e789488a8 100644 --- a/pkg/config/dict.go +++ b/pkg/config/dict.go @@ -15,13 +15,9 @@ package config import ( - "encoding/json" "fmt" "github.com/zclconf/go-cty/cty" - "github.com/zclconf/go-cty/cty/gocty" - ctyJson "github.com/zclconf/go-cty/cty/json" - "gopkg.in/yaml.v3" ) // Dict maps string key to cty.Value. @@ -85,119 +81,10 @@ func (d *Dict) AsObject() cty.Value { return cty.ObjectVal(d.Items()) } -// YamlValue is wrapper around cty.Value to handle YAML unmarshal. -type YamlValue struct { - v cty.Value -} - -// Unwrap returns wrapped cty.Value. -func (y YamlValue) Unwrap() cty.Value { - return y.v -} - -// UnmarshalYAML implements custom YAML unmarshaling. -func (y *YamlValue) UnmarshalYAML(n *yaml.Node) error { - var err error - switch n.Kind { - case yaml.ScalarNode: - err = y.unmarshalScalar(n) - case yaml.MappingNode: - err = y.unmarshalObject(n) - case yaml.SequenceNode: - err = y.unmarshalTuple(n) - default: - err = fmt.Errorf("line %d: cannot decode node with unknown kind %d", n.Line, n.Kind) - } - return err -} - -func (y *YamlValue) unmarshalScalar(n *yaml.Node) error { - var s interface{} - if err := n.Decode(&s); err != nil { - return err - } - ty, err := gocty.ImpliedType(s) - if err != nil { - return err - } - if y.v, err = gocty.ToCtyValue(s, ty); err != nil { - return err - } - - if l, is := IsYamlExpressionLiteral(y.v); is { // HCL literal - var e Expression - if e, err = ParseExpression(l); err != nil { - return err - } - y.v = e.AsValue() - } else if y.v.Type() == cty.String && hasVariable(y.v.AsString()) { // "simple" variable - e, err := SimpleVarToExpression(y.v.AsString()) - if err != nil { - return err - } - y.v = e.AsValue() - } - return nil -} - -func (y *YamlValue) unmarshalObject(n *yaml.Node) error { - var my map[string]YamlValue - if err := n.Decode(&my); err != nil { - return err - } - mv := map[string]cty.Value{} - for k, y := range my { - mv[k] = y.v - } - y.v = cty.ObjectVal(mv) - return nil -} - -func (y *YamlValue) unmarshalTuple(n *yaml.Node) error { - var ly []YamlValue - if err := n.Decode(&ly); err != nil { - return err - } - lv := []cty.Value{} - for _, y := range ly { - lv = append(lv, y.v) - } - y.v = cty.TupleVal(lv) - return nil -} - -// UnmarshalYAML implements custom YAML unmarshaling. -func (d *Dict) UnmarshalYAML(n *yaml.Node) error { - var m map[string]YamlValue - if err := n.Decode(&m); err != nil { - return err - } - for k, y := range m { - d.Set(k, y.v) - } - return nil -} - -// MarshalYAML implements custom YAML marshaling. -func (d Dict) MarshalYAML() (interface{}, error) { - o, _ := cty.Transform(d.AsObject(), func(p cty.Path, v cty.Value) (cty.Value, error) { - if e, is := IsExpressionValue(v); is { - return e.makeYamlExpressionValue(), nil - } - return v, nil - }) - - j := ctyJson.SimpleJSONValue{Value: o} - b, err := j.MarshalJSON() - if err != nil { - return nil, fmt.Errorf("failed to marshal JSON: %v", err) - } - var g interface{} - err = json.Unmarshal(b, &g) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal JSON: %v", err) - } - return g, nil +// IsZero determine whether it should be omitted when YAML marshaling +// with the `omitempty“ flag. +func (d Dict) IsZero() bool { + return len(d.m) == 0 } // Eval returns a copy of this Dict, where all Expressions diff --git a/pkg/config/dict_test.go b/pkg/config/dict_test.go index 5405c9f382..9626eea62e 100644 --- a/pkg/config/dict_test.go +++ b/pkg/config/dict_test.go @@ -20,7 +20,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/zclconf/go-cty-debug/ctydebug" "github.com/zclconf/go-cty/cty" - "gopkg.in/yaml.v3" ) func TestZeroValueValid(t *testing.T) { @@ -82,109 +81,6 @@ func TestItemsAreCopy(t *testing.T) { } } -func TestYAMLDecode(t *testing.T) { - yml := ` -s1: "red" -s2: pink -m1: {} -m2: - m2f1: green - m2f2: [1, 0.2, -3, false] - gv: $(vars.gold) - mv: $(lime.bloom) - hl: ((3 + 9)) -` - want := Dict{} - want. - Set("s1", cty.StringVal("red")). - Set("s2", cty.StringVal("pink")). - Set("m1", cty.EmptyObjectVal). - Set("m2", cty.ObjectVal(map[string]cty.Value{ - "m2f1": cty.StringVal("green"), - "m2f2": cty.TupleVal([]cty.Value{ - cty.NumberIntVal(1), - cty.NumberFloatVal(0.2), - cty.NumberIntVal(-3), - cty.False, - }), - "gv": MustParseExpression("var.gold").AsValue(), - "mv": MustParseExpression("module.lime.bloom").AsValue(), - "hl": MustParseExpression("3 + 9").AsValue(), - })) - var got Dict - if err := yaml.Unmarshal([]byte(yml), &got); err != nil { - t.Fatalf("failed to decode: %v", err) - } - if diff := cmp.Diff(want.Items(), got.Items(), ctydebug.CmpOptions); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - -func TestMarshalYAML(t *testing.T) { - d := Dict{} - d. - Set("s1", cty.StringVal("red")). - Set("m1", cty.EmptyObjectVal). - Set("m2", cty.ObjectVal(map[string]cty.Value{ - "m2f1": cty.StringVal("green"), - "m2f2": cty.TupleVal([]cty.Value{ - cty.NumberIntVal(1), - cty.NumberFloatVal(0.2), - cty.NumberIntVal(-3), - cty.False, - MustParseExpression("7 + 4").AsValue(), - }), - })) - want := map[string]interface{}{ - "s1": "red", - "m1": map[string]interface{}{}, - "m2": map[string]interface{}{ - "m2f1": "green", - "m2f2": []interface{}{1.0, 0.2, -3.0, false, "((7 + 4))"}, - }, - } - got, err := d.MarshalYAML() - if err != nil { - t.Fatalf("failed to marshal: %v", err) - } - if diff := cmp.Diff(want, got); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - -func TestYAMLMarshalIntAsInt(t *testing.T) { - d := Dict{} - d.Set("zebra", cty.NumberIntVal(5)) - want := "zebra: 5\n" - got, err := yaml.Marshal(d) - if err != nil { - t.Fatalf("failed to marshal: %v", err) - } - if diff := cmp.Diff(want, string(got)); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - -func TestYAMLDecodeWithAlias(t *testing.T) { - yml := ` -pony: &passtime -- eat -- sleep -zebra: *passtime -` - want := Dict{} - want. - Set("pony", cty.TupleVal([]cty.Value{cty.StringVal("eat"), cty.StringVal("sleep")})). - Set("zebra", cty.TupleVal([]cty.Value{cty.StringVal("eat"), cty.StringVal("sleep")})) - var got Dict - if err := yaml.Unmarshal([]byte(yml), &got); err != nil { - t.Fatalf("failed to decode: %v", err) - } - if diff := cmp.Diff(want.Items(), got.Items(), ctydebug.CmpOptions); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - func TestEval(t *testing.T) { bp := Blueprint{ Vars: NewDict(map[string]cty.Value{ diff --git a/pkg/config/errors.go b/pkg/config/errors.go new file mode 100644 index 0000000000..dd9fba7cff --- /dev/null +++ b/pkg/config/errors.go @@ -0,0 +1,33 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" +) + +// BpError is an error wrapper to augment Path +type BpError struct { + Path Path + Err error +} + +func (e BpError) Error() string { + return fmt.Sprintf("%s: %s", e.Path, e.Err) +} + +func (e BpError) Unwrap() error { + return e.Err +} diff --git a/pkg/config/expand.go b/pkg/config/expand.go index 5f0ee19eb6..b8236d843e 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -16,7 +16,6 @@ package config import ( "fmt" - "log" "regexp" "strings" @@ -45,59 +44,24 @@ var ( // expand expands variables and strings in the yaml config. Used directly by // ExpandConfig for the create and expand commands. -func (dc *DeploymentConfig) expand() { - if err := dc.addMetadataToModules(); err != nil { - log.Printf("could not determine required APIs: %v", err) - } - - if err := dc.expandBackends(); err != nil { - log.Fatalf("failed to apply default backend to deployment groups: %v", err) - } - - if err := dc.addDefaultValidators(); err != nil { - log.Fatalf( - "failed to update validators when expanding the config: %v", err) - } - - if err := dc.combineLabels(); err != nil { - log.Fatalf( - "failed to update module labels when expanding the config: %v", err) - } +func (dc *DeploymentConfig) expand() error { + dc.expandBackends() + dc.addDefaultValidators() + dc.combineLabels() if err := dc.applyUseModules(); err != nil { - log.Fatalf( - "failed to apply \"use\" modules when expanding the config: %v", err) + return err } if err := dc.applyGlobalVariables(); err != nil { - log.Fatalf( - "failed to apply deployment variables in modules when expanding the config: %v", - err) + return err } dc.Config.populateOutputs() + return nil } -func (dc *DeploymentConfig) addMetadataToModules() error { - return dc.Config.WalkModules(func(mod *Module) error { - if mod.RequiredApis != nil { - return nil - } - if dc.Config.Vars.Get("project_id").Type() != cty.String { - return fmt.Errorf("global variable project_id must be defined") - } - requiredAPIs := mod.InfoOrDie().RequiredApis - if requiredAPIs == nil { - requiredAPIs = []string{} - } - mod.RequiredApis = map[string][]string{ - "$(vars.project_id)": requiredAPIs, - } - return nil - }) -} - -func (dc *DeploymentConfig) expandBackends() error { +func (dc *DeploymentConfig) expandBackends() { // 1. DEFAULT: use TerraformBackend configuration (if supplied) in each // resource group // 2. If top-level TerraformBackendDefaults is defined, insert that @@ -127,7 +91,6 @@ func (dc *DeploymentConfig) expandBackends() error { } } } - return nil } func getModuleInputMap(inputs []modulereader.VarInfo) map[string]string { @@ -140,22 +103,24 @@ func getModuleInputMap(inputs []modulereader.VarInfo) map[string]string { // initialize a Toolkit setting that corresponds to a module input of type list // create new list if unset, append if already set, error if value not a list -func (mod *Module) addListValue(settingName string, value cty.Value) error { - var cur []cty.Value - if !mod.Settings.Has(settingName) { - mod.createWrapSettingsWith() - mod.WrapSettingsWith[settingName] = []string{"flatten([", "])"} - cur = []cty.Value{} - } else { - v := mod.Settings.Get(settingName) - ty := v.Type() - if !ty.IsTupleType() && !ty.IsSetType() && !ty.IsSetType() { - return fmt.Errorf("%s: module %s, setting %s", errorMessages["appendToNonList"], mod.ID, settingName) +func (mod *Module) addListValue(settingName string, value cty.Value) { + args := []cty.Value{value} + mods := map[ModuleID]bool{} + for _, mod := range IsProductOfModuleUse(value) { + mods[mod] = true + } + + if mod.Settings.Has(settingName) { + cur := mod.Settings.Get(settingName) + for _, mod := range IsProductOfModuleUse(cur) { + mods[mod] = true } - cur = mod.Settings.Get(settingName).AsValueSlice() + args = append(args, cur) } - mod.Settings.Set(settingName, cty.TupleVal(append(cur, value))) - return nil + + exp := FunctionCallExpression("flatten", cty.TupleVal(args)) + val := AsProductOfModuleUse(exp.AsValue(), maps.Keys(mods)...) + mod.Settings.Set(settingName, val) } // useModule matches input variables in a "using" module to output values @@ -166,66 +131,52 @@ func (mod *Module) addListValue(settingName string, value cty.Value) error { // a list, in which case output values are appended and flattened using HCL. // // mod: "using" module as defined above -// useMod: "used" module as defined above -// settingsToIgnore: a list of module settings not to modify for any reason; -// typical usage will be to leave explicit blueprint settings unmodified -func useModule( - mod *Module, - useMod Module, - settingsToIgnore []string, -) error { +// use: "used" module as defined above +func useModule(mod *Module, use Module) { modInputsMap := getModuleInputMap(mod.InfoOrDie().Inputs) - for _, useOutput := range useMod.InfoOrDie().Outputs { - settingName := useOutput.Name - - // explicitly ignore these settings (typically those in blueprint) - if slices.Contains(settingsToIgnore, settingName) { - continue - } + for _, useOutput := range use.InfoOrDie().Outputs { + setting := useOutput.Name // Skip settings that do not have matching module inputs - inputType, ok := modInputsMap[settingName] + inputType, ok := modInputsMap[setting] if !ok { continue } + alreadySet := mod.Settings.Has(setting) + if alreadySet && len(IsProductOfModuleUse(mod.Settings.Get(setting))) == 0 { + continue // set explicitly, skip + } + // skip settings that are not of list type, but already have a value // these were probably added by a previous call to this function - alreadySet := mod.Settings.Has(settingName) isList := strings.HasPrefix(inputType, "list") if alreadySet && !isList { continue } - v := ModuleRef(useMod.ID, settingName). - AsExpression(). - AsValue(). - Mark(ProductOfModuleUse{Module: useMod.ID}) + v := AsProductOfModuleUse( + ModuleRef(use.ID, setting).AsExpression().AsValue(), + use.ID) if !isList { - mod.Settings.Set(settingName, v) + mod.Settings.Set(setting, v) } else { - if err := mod.addListValue(settingName, v); err != nil { - return err - } + mod.addListValue(setting, v) } } - return nil } // applyUseModules applies variables from modules listed in the "use" field // when/if applicable func (dc *DeploymentConfig) applyUseModules() error { return dc.Config.WalkModules(func(m *Module) error { - settingsInBlueprint := maps.Keys(m.Settings.Items()) for _, u := range m.Use { used, err := dc.Config.Module(u) if err != nil { return err } - if err := useModule(m, *used, settingsInBlueprint); err != nil { - return err - } + useModule(m, *used) } return nil }) @@ -256,7 +207,7 @@ func getRole(source string) string { // combineLabels sets defaults for labels based on other variables and merges // the global labels defined in Vars with module setting labels. It also // determines the role and sets it for each module independently. -func (dc *DeploymentConfig) combineLabels() error { +func (dc *DeploymentConfig) combineLabels() { vars := &dc.Config.Vars defaults := map[string]cty.Value{ blueprintLabel: cty.StringVal(dc.Config.BlueprintName), @@ -266,69 +217,42 @@ func (dc *DeploymentConfig) combineLabels() error { if !vars.Has(labels) { // Shouldn't happen if blueprint was properly constructed vars.Set(labels, cty.EmptyObjectVal) } - gl := mergeLabels(vars.Get(labels).AsValueMap(), defaults) + gl := mergeMaps(defaults, vars.Get(labels).AsValueMap()) vars.Set(labels, cty.ObjectVal(gl)) - return dc.Config.WalkModules(func(mod *Module) error { - return combineModuleLabels(mod, *dc) + dc.Config.WalkModules(func(mod *Module) error { + combineModuleLabels(mod, *dc) + return nil }) } -func combineModuleLabels(mod *Module, dc DeploymentConfig) error { - mod.createWrapSettingsWith() +func combineModuleLabels(mod *Module, dc DeploymentConfig) { labels := "labels" - - // previously expanded blueprint, user written BPs do not use `WrapSettingsWith` - if _, ok := mod.WrapSettingsWith[labels]; ok { - return nil // Do nothing - } - - // Check if labels are set for this module if !moduleHasInput(*mod, labels) { - return nil + return // no op } - modLabels := map[string]cty.Value{} - if mod.Settings.Has(labels) { - // Cast into map so we can index into them - v := mod.Settings.Get(labels) - ty := v.Type() - if !ty.IsObjectType() && !ty.IsMapType() { - return fmt.Errorf("%s, Module %s, labels type: %s", - errorMessages["settingsLabelType"], mod.ID, ty.FriendlyName()) - } - if v.AsValueMap() != nil { - modLabels = v.AsValueMap() - } + extra := map[string]cty.Value{ + roleLabel: cty.StringVal(getRole(mod.Source))} + args := []cty.Value{ + GlobalRef(labels).AsExpression().AsValue(), + cty.ObjectVal(extra), } - // Add the role (e.g. compute, network, etc) - if _, exists := modLabels[roleLabel]; !exists { - modLabels[roleLabel] = cty.StringVal(getRole(mod.Source)) + if !mod.Settings.Get(labels).IsNull() { + args = append(args, mod.Settings.Get(labels)) } - if mod.Kind == TerraformKind { - // Terraform module labels to be expressed as - // `merge(var.labels, { ghpc_role=..., **settings.labels })` - mod.WrapSettingsWith[labels] = []string{"merge(", ")"} - ref := GlobalRef(labels).AsExpression() - args := []cty.Value{ref.AsValue(), cty.ObjectVal(modLabels)} - mod.Settings.Set(labels, cty.TupleVal(args)) - } else if mod.Kind == PackerKind { - g := dc.Config.Vars.Get(labels).AsValueMap() - mod.Settings.Set(labels, cty.ObjectVal(mergeLabels(modLabels, g))) - } - return nil + mod.Settings.Set(labels, FunctionCallExpression("merge", args...).AsValue()) } -// mergeLabels returns a new map with the keys from both maps. If a key exists in both maps, -// the value from the first map is used. -func mergeLabels(a map[string]cty.Value, b map[string]cty.Value) map[string]cty.Value { +// mergeMaps takes an arbitrary number of maps, and returns a single map that contains +// a merged set of elements from all arguments. +// If more than one given map defines the same key, then the one that is later in the argument sequence takes precedence. +// See https://developer.hashicorp.com/terraform/language/functions/merge +func mergeMaps(ms ...map[string]cty.Value) map[string]cty.Value { r := map[string]cty.Value{} - for k, v := range a { - r[k] = v - } - for k, v := range b { - if _, exists := a[k]; !exists { + for _, m := range ms { + for k, v := range m { r[k] = v } } @@ -435,7 +359,7 @@ func hasVariable(str string) bool { // this function adds default validators to the blueprint. // default validators are only added for global variables that exist -func (dc *DeploymentConfig) addDefaultValidators() error { +func (dc *DeploymentConfig) addDefaultValidators() { if dc.Config.Validators == nil { dc.Config.Validators = []validatorConfig{} } @@ -510,8 +434,6 @@ func (dc *DeploymentConfig) addDefaultValidators() error { } dc.Config.Validators = append(dc.Config.Validators, v) } - - return nil } // FindAllIntergroupReferences finds all intergroup references within the group diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 1d1c8309e7..12e96ed563 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -24,21 +24,16 @@ import ( func (s *MySuite) TestExpand(c *C) { dc := getDeploymentConfigForTest() - fmt.Println("TEST_DEBUG: If tests die without report, check TestExpand") - dc.expand() + c.Check(dc.expand(), IsNil) } func (s *MySuite) TestExpandBackends(c *C) { dc := getDeploymentConfigForTest() deplName := dc.Config.Vars.Get("deployment_name").AsString() - // Simple test: Does Nothing - err := dc.expandBackends() - c.Assert(err, IsNil) - dc.Config.TerraformBackendDefaults = TerraformBackend{Type: "gcs"} - err = dc.expandBackends() - c.Assert(err, IsNil) + dc.expandBackends() + grp := dc.Config.DeploymentGroups[0] c.Assert(grp.TerraformBackend.Type, Not(Equals), "") gotPrefix := grp.TerraformBackend.Configuration.Get("prefix") @@ -50,8 +45,8 @@ func (s *MySuite) TestExpandBackends(c *C) { Name: "group2", } dc.Config.DeploymentGroups = append(dc.Config.DeploymentGroups, newGroup) - err = dc.expandBackends() - c.Assert(err, IsNil) + dc.expandBackends() + newGrp := dc.Config.DeploymentGroups[1] c.Assert(newGrp.TerraformBackend.Type, Not(Equals), "") gotPrefix = newGrp.TerraformBackend.Configuration.Get("prefix") @@ -63,23 +58,21 @@ func (s *MySuite) TestAddListValue(c *C) { mod := Module{ID: "TestModule"} setting := "newSetting" - nonListSetting := "not-a-list" - first := cty.StringVal("value1") - second := cty.StringVal("value2") - - c.Assert(mod.addListValue(setting, first), IsNil) - c.Check(mod.Settings.Get(setting), DeepEquals, cty.TupleVal([]cty.Value{first})) + first := AsProductOfModuleUse(cty.StringVal("value1"), "mod1") + second := AsProductOfModuleUse(cty.StringVal("value2"), "mod2") - c.Assert(mod.addListValue(setting, second), IsNil) - c.Check(mod.Settings.Get(setting), DeepEquals, cty.TupleVal([]cty.Value{first, second})) + mod.addListValue(setting, first) + c.Check(mod.Settings.Get(setting), DeepEquals, + AsProductOfModuleUse(MustParseExpression(`flatten(["value1"])`).AsValue(), "mod1")) - mod.Settings.Set(nonListSetting, cty.StringVal("string-value")) - c.Assert(mod.addListValue(nonListSetting, second), NotNil) + mod.addListValue(setting, second) + c.Check(mod.Settings.Get(setting), DeepEquals, + AsProductOfModuleUse(MustParseExpression(`flatten(["value2", flatten(["value1"])])`).AsValue(), "mod1", "mod2")) } func (s *MySuite) TestUseModule(c *C) { // Setup - usedMod := Module{ + used := Module{ ID: "UsedModule", Source: "usedSource", } @@ -88,16 +81,14 @@ func (s *MySuite) TestUseModule(c *C) { Type: "number", } ref := ModuleRef("UsedModule", "val1").AsExpression().AsValue() - useMark := ProductOfModuleUse{"UsedModule"} { // Pass: No Inputs, No Outputs mod := Module{ID: "lime", Source: "modSource"} setTestModuleInfo(mod, modulereader.ModuleInfo{}) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{}) + setTestModuleInfo(used, modulereader.ModuleInfo{}) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, used) c.Check(mod.Settings, DeepEquals, Dict{}) } @@ -105,11 +96,10 @@ func (s *MySuite) TestUseModule(c *C) { mod := Module{ID: "lime", Source: "limeTree"} setTestModuleInfo(mod, modulereader.ModuleInfo{}) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, used) c.Check(mod.Settings, DeepEquals, Dict{}) } @@ -118,14 +108,13 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{varInfoNumber}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ - "val1": ref.Mark(useMark), + "val1": AsProductOfModuleUse(ref, "UsedModule"), }) } @@ -135,29 +124,28 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{varInfoNumber}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{"val1"}) - c.Check(err, IsNil) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{"val1": ref}) } { // Pass: re-apply used modules, should be a no-op // Assume no settings were in blueprint mod := Module{ID: "lime", Source: "limeTree"} - mod.Settings.Set("val1", ref.Mark(useMark)) + mod.Settings.Set("val1", AsProductOfModuleUse(ref, "UsedModule")) setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{varInfoNumber}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) - c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{"val1": ref.Mark(useMark)}) + useModule(&mod, used) + c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ + "val1": AsProductOfModuleUse(ref, "UsedModule")}) } { // Pass: Single Input/Output match, input is list, not already set @@ -165,35 +153,32 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{{Name: "val1", Type: "list"}}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ - "val1": cty.TupleVal([]cty.Value{ - ref.Mark(useMark), - })}) + "val1": AsProductOfModuleUse( + MustParseExpression(`flatten([module.UsedModule.val1])`).AsValue(), + "UsedModule")}) } { // Pass: Setting exists, Input is List, Output is not a list // Assume setting was not set in blueprint mod := Module{ID: "lime", Source: "limeTree"} - mod.Settings.Set("val1", cty.TupleVal([]cty.Value{ref})) + mod.Settings.Set("val1", AsProductOfModuleUse(cty.TupleVal([]cty.Value{ref}), "other")) setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{{Name: "val1", Type: "list"}}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ - "val1": cty.TupleVal([]cty.Value{ - ref, - ref.Mark(useMark), - })}) + "val1": AsProductOfModuleUse( + MustParseExpression(`flatten([module.UsedModule.val1,[module.UsedModule.val1]])`).AsValue(), + "other", "UsedModule")}) } { // Pass: Setting exists, Input is List, Output is not a list @@ -203,12 +188,11 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{{Name: "val1", Type: "list"}}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{"val1"}) - c.Check(err, IsNil) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": cty.TupleVal([]cty.Value{ref})}) } @@ -227,7 +211,7 @@ func (s *MySuite) TestApplyUseModules(c *C) { using := Module{ ID: "usingModule", Source: "path/using", - Use: []ModuleID{"usedModule"}, + Use: ModuleIDs{"usedModule"}, } used := Module{ID: "usedModule", Source: "path/used"} @@ -256,8 +240,7 @@ func (s *MySuite) TestApplyUseModules(c *C) { c.Assert(dc.applyUseModules(), IsNil) ref := ModuleRef("TestModule0", "test_inter_0").AsExpression().AsValue() c.Assert(m.Settings.Items(), DeepEquals, map[string]cty.Value{ - "test_inter_0": ref.Mark(ProductOfModuleUse{"TestModule0"}), - }) + "test_inter_0": AsProductOfModuleUse(ref, "TestModule0")}) } { // Deliberately break the match and see that no settings are added @@ -278,7 +261,6 @@ func (s *MySuite) TestCombineLabels(c *C) { coral := Module{ Source: "blue/salmon", - Kind: TerraformKind, ID: "coral", Settings: NewDict(map[string]cty.Value{ "labels": cty.ObjectVal(map[string]cty.Value{ @@ -290,21 +272,13 @@ func (s *MySuite) TestCombineLabels(c *C) { setTestModuleInfo(coral, infoWithLabels) // has no labels set - khaki := Module{Source: "brown/oak", Kind: TerraformKind, ID: "khaki"} + khaki := Module{Source: "brown/oak", ID: "khaki"} setTestModuleInfo(khaki, infoWithLabels) // has no labels set, also module has no labels input - silver := Module{Source: "ivory/black", Kind: TerraformKind, ID: "silver"} + silver := Module{Source: "ivory/black", ID: "silver"} setTestModuleInfo(silver, modulereader.ModuleInfo{Inputs: []modulereader.VarInfo{}}) - orange := Module{Source: "red/velvet", Kind: PackerKind, ID: "orange", Settings: NewDict(map[string]cty.Value{ - "labels": cty.ObjectVal(map[string]cty.Value{ - "olive": cty.StringVal("teal"), - "ghpc_deployment": cty.StringVal("navy"), - }), - })} - setTestModuleInfo(orange, infoWithLabels) - dc := DeploymentConfig{ Config: Blueprint{ BlueprintName: "simple", @@ -313,11 +287,10 @@ func (s *MySuite) TestCombineLabels(c *C) { }), DeploymentGroups: []DeploymentGroup{ {Name: "lime", Modules: []Module{coral, khaki, silver}}, - {Name: "pink", Modules: []Module{orange}}, }, }, } - c.Check(dc.combineLabels(), IsNil) + dc.combineLabels() // Were global labels created? c.Check(dc.Config.Vars.Get("labels"), DeepEquals, cty.ObjectVal(map[string]cty.Value{ @@ -330,37 +303,26 @@ func (s *MySuite) TestCombineLabels(c *C) { lime := dc.Config.DeploymentGroups[0] // Labels are set and override role coral = lime.Modules[0] - c.Check(coral.WrapSettingsWith["labels"], DeepEquals, []string{"merge(", ")"}) - c.Check(coral.Settings.Get("labels"), DeepEquals, cty.TupleVal([]cty.Value{ + c.Check(coral.Settings.Get("labels"), DeepEquals, FunctionCallExpression( + "merge", labelsRef, cty.ObjectVal(map[string]cty.Value{ - "magenta": cty.StringVal("orchid"), + "ghpc_role": cty.StringVal("blue")}), + cty.ObjectVal(map[string]cty.Value{ "ghpc_role": cty.StringVal("maroon"), - }), - })) + "magenta": cty.StringVal("orchid")}), + ).AsValue()) + // Labels are not set, infer role from module.source khaki = lime.Modules[1] - c.Check(khaki.WrapSettingsWith["labels"], DeepEquals, []string{"merge(", ")"}) - c.Check(khaki.Settings.Get("labels"), DeepEquals, cty.TupleVal([]cty.Value{ + c.Check(khaki.Settings.Get("labels"), DeepEquals, FunctionCallExpression( + "merge", labelsRef, - cty.ObjectVal(map[string]cty.Value{ - "ghpc_role": cty.StringVal("brown")}), - })) + cty.ObjectVal(map[string]cty.Value{"ghpc_role": cty.StringVal("brown")}), + ).AsValue()) // No labels input silver = lime.Modules[2] - c.Check(silver.WrapSettingsWith["labels"], IsNil) c.Check(silver.Settings.Get("labels"), DeepEquals, cty.NilVal) - - // Packer, include global include explicitly - // Keep overridden ghpc_deployment=navy - orange = dc.Config.DeploymentGroups[1].Modules[0] - c.Check(orange.WrapSettingsWith["labels"], IsNil) - c.Check(orange.Settings.Get("labels"), DeepEquals, cty.ObjectVal(map[string]cty.Value{ - "ghpc_blueprint": cty.StringVal("simple"), - "ghpc_deployment": cty.StringVal("navy"), - "ghpc_role": cty.StringVal("red"), - "olive": cty.StringVal("teal"), - })) } func (s *MySuite) TestApplyGlobalVariables(c *C) { diff --git a/pkg/config/expression.go b/pkg/config/expression.go index 23cc5b6ff8..a46ce9379f 100644 --- a/pkg/config/expression.go +++ b/pkg/config/expression.go @@ -16,12 +16,15 @@ package config import ( "fmt" + "regexp" "strings" "github.com/hashicorp/hcl/v2" "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" + "github.com/zclconf/go-cty/cty/function" + "github.com/zclconf/go-cty/cty/function/stdlib" ) // Reference is data struct that represents a reference to a variable. @@ -253,6 +256,7 @@ type BaseExpression struct { func (e BaseExpression) Eval(bp Blueprint) (cty.Value, error) { ctx := hcl.EvalContext{ Variables: map[string]cty.Value{"var": bp.Vars.AsObject()}, + Functions: functions(), } v, diag := e.e.Value(&ctx) if diag.HasErrors() { @@ -348,3 +352,85 @@ func HasMark[T any](val cty.Value) (T, bool) { } return tgt, found } + +func escapeBlueprintVariables(s string) string { + // Convert \$(not.variable) to $(not.variable) + re := regexp.MustCompile(`\\\$\(`) + return re.ReplaceAllString(s, `$(`) +} + +func escapeLiteralVariables(s string) string { + // Convert \((not.variable)) to ((not.variable)) + re := regexp.MustCompile(`\\\(\(`) + return re.ReplaceAllString(s, `((`) +} + +// TokensForValue is a modification of hclwrite.TokensForValue. +// The only difference in behavior is handling "HCL literal" strings. +func TokensForValue(val cty.Value) hclwrite.Tokens { + if val.IsNull() { // terminate early as Null value can has any type (e.g. String) + return hclwrite.TokensForValue(val) + } + + // We need to handle both cases, until all "expression" users are moved to Expression + if e, is := IsExpressionValue(val); is { + return e.Tokenize() + } + val, _ = val.Unmark() // remove marks, as we don't need them anymore + if s, is := IsYamlExpressionLiteral(val); is { // return it "as is" + return hclwrite.TokensForIdentifier(s) + } + + ty := val.Type() + if ty == cty.String { + s := val.AsString() + // The order of application matters, for an edge cases like: `\$\((` -> `$((` + s = escapeLiteralVariables(s) + s = escapeBlueprintVariables(s) + return hclwrite.TokensForValue(cty.StringVal(s)) + } + + if ty.IsListType() || ty.IsSetType() || ty.IsTupleType() { + tl := []hclwrite.Tokens{} + for it := val.ElementIterator(); it.Next(); { + _, v := it.Element() + tl = append(tl, TokensForValue(v)) + } + return hclwrite.TokensForTuple(tl) + } + if ty.IsMapType() || ty.IsObjectType() { + tl := []hclwrite.ObjectAttrTokens{} + for it := val.ElementIterator(); it.Next(); { + k, v := it.Element() + kt := hclwrite.TokensForIdentifier(k.AsString()) + if !hclsyntax.ValidIdentifier(k.AsString()) { + kt = TokensForValue(k) + } + vt := TokensForValue(v) + tl = append(tl, hclwrite.ObjectAttrTokens{Name: kt, Value: vt}) + } + return hclwrite.TokensForObject(tl) + + } + return hclwrite.TokensForValue(val) // rely on hclwrite implementation +} + +// FunctionCallExpression is a helper to build function call expression. +func FunctionCallExpression(n string, args ...cty.Value) Expression { + if _, ok := functions()[n]; !ok { + panic("unknown function " + n) + } + ta := make([]hclwrite.Tokens, len(args)) + for i, a := range args { + ta[i] = TokensForValue(a) + } + toks := hclwrite.TokensForFunctionCall(n, ta...) + return MustParseExpression(string(toks.Bytes())) +} + +func functions() map[string]function.Function { + return map[string]function.Function{ + "flatten": stdlib.FlattenFunc, + "merge": stdlib.MergeFunc, + } +} diff --git a/pkg/config/expression_test.go b/pkg/config/expression_test.go index dcfcf98785..be6206a1ee 100644 --- a/pkg/config/expression_test.go +++ b/pkg/config/expression_test.go @@ -20,6 +20,8 @@ import ( "github.com/google/go-cmp/cmp" "github.com/hashicorp/hcl/v2" "github.com/hashicorp/hcl/v2/hclsyntax" + "github.com/hashicorp/hcl/v2/hclwrite" + "github.com/zclconf/go-cty-debug/ctydebug" "github.com/zclconf/go-cty/cty" ) @@ -141,3 +143,96 @@ func TestSimpleVarToExpression(t *testing.T) { }) } } + +func TestTokensForValueNoLiteral(t *testing.T) { + val := cty.ObjectVal(map[string]cty.Value{ + "tan": cty.TupleVal([]cty.Value{ + cty.StringVal("biege"), + cty.NullVal(cty.String), + cty.MapVal(map[string]cty.Value{ + "cu": cty.NumberIntVal(29), + "ba": cty.NumberIntVal(56), + })}), + "pony.zebra": cty.NilVal, + }) + want := hclwrite.NewEmptyFile() + want.Body().AppendUnstructuredTokens(hclwrite.TokensForValue(val)) + + got := hclwrite.NewEmptyFile() + got.Body().AppendUnstructuredTokens(TokensForValue(val)) + + if diff := cmp.Diff(string(want.Bytes()), string(got.Bytes())); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestTokensForValueWithLiteral(t *testing.T) { + val := cty.ObjectVal(map[string]cty.Value{ + "tan": cty.TupleVal([]cty.Value{ + cty.StringVal("((var.kilo + 8))"), // HCL literal + MustParseExpression("var.tina + 4").AsValue(), // HclExpression value + })}) + want := ` +{ + tan = [var.kilo + 8, var.tina + 4] +}`[1:] + + gotF := hclwrite.NewEmptyFile() + gotF.Body().AppendUnstructuredTokens(TokensForValue(val)) + got := hclwrite.Format(gotF.Bytes()) // format to normalize whitespace + + if diff := cmp.Diff(want, string(got)); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestFlattenFunctionCallExpression(t *testing.T) { + bp := Blueprint{Vars: NewDict(map[string]cty.Value{ + "three": cty.NumberIntVal(3), + })} + expr := FunctionCallExpression("flatten", cty.TupleVal([]cty.Value{ + cty.TupleVal([]cty.Value{cty.NumberIntVal(1), cty.NumberIntVal(2)}), + GlobalRef("three").AsExpression().AsValue(), + })) + + want := cty.TupleVal([]cty.Value{ + cty.NumberIntVal(1), + cty.NumberIntVal(2), + cty.NumberIntVal(3)}) + + got, err := expr.Eval(bp) + if err != nil { + t.Errorf("got unexpected error: %s", err) + } + if diff := cmp.Diff(want, got, ctydebug.CmpOptions); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestMergeFunctionCallExpression(t *testing.T) { + bp := Blueprint{Vars: NewDict(map[string]cty.Value{ + "fix": cty.ObjectVal(map[string]cty.Value{ + "two": cty.NumberIntVal(2), + }), + })} + expr := FunctionCallExpression("merge", + cty.ObjectVal(map[string]cty.Value{ + "one": cty.NumberIntVal(1), + "two": cty.NumberIntVal(3), + }), + GlobalRef("fix").AsExpression().AsValue(), + ) + + want := cty.ObjectVal(map[string]cty.Value{ + "one": cty.NumberIntVal(1), + "two": cty.NumberIntVal(2), + }) + + got, err := expr.Eval(bp) + if err != nil { + t.Errorf("got unexpected error: %s", err) + } + if diff := cmp.Diff(want, got, ctydebug.CmpOptions); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} diff --git a/pkg/config/path.go b/pkg/config/path.go new file mode 100644 index 0000000000..98d48fd383 --- /dev/null +++ b/pkg/config/path.go @@ -0,0 +1,139 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" + "reflect" +) + +// Path is unique identifier of a piece of configuration. +type Path interface { + String() string + Parent() Path +} + +type basePath struct { + InternalPrev Path + InternalPiece string +} + +func (p basePath) Parent() Path { return p.InternalPrev } + +func (p basePath) String() string { + pref := "" + if p.Parent() != nil { + pref = p.Parent().String() + } + return fmt.Sprintf("%s%s", pref, p.InternalPiece) +} + +type arrayPath[E any] struct{ basePath } + +func (p arrayPath[E]) At(i int) E { + var e E + initPath(&e, &p, fmt.Sprintf("[%d]", i)) + return e +} + +type mapPath[E any] struct{ basePath } + +func (p mapPath[E]) Dot(k string) E { + var e E + initPath(&e, &p, fmt.Sprintf(".%s", k)) + return e +} + +func initPath(p any, prev any, piece string) { + r := reflect.Indirect(reflect.ValueOf(p)) + ty := reflect.TypeOf(p).Elem() + if !r.FieldByName("InternalPiece").IsValid() || !r.FieldByName("InternalPrev").IsValid() { + panic(fmt.Sprintf("%s does not embed basePath", ty.Name())) + } + if _, ok := prev.(Path); prev != nil && !ok { + panic(fmt.Sprintf("prev is not a Path: %#v", p)) + } + + r.FieldByName("InternalPiece").SetString(piece) + if prev != nil { + r.FieldByName("InternalPrev").Set(reflect.ValueOf(prev)) + } + + for i := 0; i < ty.NumField(); i++ { + tag, ok := ty.Field(i).Tag.Lookup("path") + if !ok { + continue + } + initPath(r.Field(i).Addr().Interface(), p, tag) + } +} + +type rootPath struct { + basePath + BlueprintName basePath `path:"blueprint_name"` + GhpcVersion basePath `path:"ghpc_version"` + Validators arrayPath[validatorCfgPath] `path:"validators"` + ValidationLevel basePath `path:"validation_level"` + Vars dictPath `path:"vars"` + Groups arrayPath[groupPath] `path:"deployment_groups"` + Backend backendPath `path:"terraform_backend_defaults"` +} + +type validatorCfgPath struct { + basePath + Validator basePath `path:".validator"` + Inputs dictPath `path:".inputs"` + Skip basePath `path:".skip"` +} + +type dictPath struct{ mapPath[basePath] } + +type backendPath struct { + basePath + Type basePath `path:".type"` + Configuration dictPath `path:".configuration"` +} + +type groupPath struct { + basePath + Name basePath `path:".group"` + Backend backendPath `path:".terraform_backend"` + Modules arrayPath[modulePath] `path:".modules"` + Kind basePath `path:".kind"` +} + +type modulePath struct { + basePath + Source basePath `path:".source"` + Kind basePath `path:".kind"` + ID basePath `path:".id"` + Use arrayPath[backendPath] `path:".use"` + Outputs arrayPath[outputPath] `path:".outputs"` + Settings dictPath `path:".settings"` +} + +type outputPath struct { + basePath + Name basePath `path:".name"` + Description basePath `path:".description"` + Sensitive basePath `path:".sensitive"` +} + +// Root is a starting point for creating a Blueprint Path +var Root rootPath + +func init() { + initPath(&Root, nil, "") +} diff --git a/pkg/config/path_test.go b/pkg/config/path_test.go new file mode 100644 index 0000000000..922483aa90 --- /dev/null +++ b/pkg/config/path_test.go @@ -0,0 +1,78 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "testing" +) + +func TestPath(t *testing.T) { + type test struct { + p Path + want string + } + r := Root + m := r.Groups.At(3).Modules.At(1) + tests := []test{ + {r, ""}, + {r.BlueprintName, "blueprint_name"}, + {r.GhpcVersion, "ghpc_version"}, + {r.Validators, "validators"}, + {r.ValidationLevel, "validation_level"}, + {r.Vars, "vars"}, + {r.Groups, "deployment_groups"}, + {r.Backend, "terraform_backend_defaults"}, + + {r.Validators.At(2), "validators[2]"}, + {r.Validators.At(2).Validator, "validators[2].validator"}, + {r.Validators.At(2).Skip, "validators[2].skip"}, + {r.Validators.At(2).Inputs, "validators[2].inputs"}, + {r.Validators.At(2).Inputs.Dot("zebra"), "validators[2].inputs.zebra"}, + + {r.Vars.Dot("red"), "vars.red"}, + + {r.Groups.At(3), "deployment_groups[3]"}, + {r.Groups.At(3).Name, "deployment_groups[3].group"}, + {r.Groups.At(3).Kind, "deployment_groups[3].kind"}, + {r.Groups.At(3).Backend, "deployment_groups[3].terraform_backend"}, + {r.Groups.At(3).Modules, "deployment_groups[3].modules"}, + {r.Groups.At(3).Modules.At(1), "deployment_groups[3].modules[1]"}, + // m := r.Groups.At(3).Modules.At(1) + {m.Source, "deployment_groups[3].modules[1].source"}, + {m.ID, "deployment_groups[3].modules[1].id"}, + {m.Kind, "deployment_groups[3].modules[1].kind"}, + {m.Use, "deployment_groups[3].modules[1].use"}, + {m.Use.At(6), "deployment_groups[3].modules[1].use[6]"}, + {m.Outputs, "deployment_groups[3].modules[1].outputs"}, + {m.Outputs.At(2), "deployment_groups[3].modules[1].outputs[2]"}, + {m.Outputs.At(2).Name, "deployment_groups[3].modules[1].outputs[2].name"}, + {m.Outputs.At(2).Description, "deployment_groups[3].modules[1].outputs[2].description"}, + {m.Outputs.At(2).Sensitive, "deployment_groups[3].modules[1].outputs[2].sensitive"}, + {m.Settings, "deployment_groups[3].modules[1].settings"}, + {m.Settings.Dot("lime"), "deployment_groups[3].modules[1].settings.lime"}, + + {r.Backend.Type, "terraform_backend_defaults.type"}, + {r.Backend.Configuration, "terraform_backend_defaults.configuration"}, + {r.Backend.Configuration.Dot("goo"), "terraform_backend_defaults.configuration.goo"}, + } + for _, tc := range tests { + t.Run(tc.want, func(t *testing.T) { + got := tc.p.String() + if got != tc.want { + t.Errorf("\ngot : %q\nwant: %q", got, tc.want) + } + }) + } +} diff --git a/pkg/config/validate.go b/pkg/config/validate.go index e9aea870a1..22b6040dc8 100644 --- a/pkg/config/validate.go +++ b/pkg/config/validate.go @@ -28,7 +28,6 @@ import ( "github.com/pkg/errors" "github.com/zclconf/go-cty/cty" "golang.org/x/exp/maps" - "golang.org/x/exp/slices" "gopkg.in/yaml.v3" ) @@ -50,24 +49,18 @@ func (err *InvalidSettingError) Error() string { } // validate is the top-level function for running the validation suite. -func (dc DeploymentConfig) validate() { - // Drop the flags for log to improve readability only for running the validation suite - log.SetFlags(0) - +func (dc DeploymentConfig) validate() error { // variables should be validated before running validators if err := dc.executeValidators(); err != nil { - log.Fatal(err) + return err } - if err := dc.validateModules(); err != nil { - log.Fatal(err) + return err } if err := dc.validateModuleSettings(); err != nil { - log.Fatal(err) + return err } - - // Set it back to the initial value - log.SetFlags(log.LstdFlags) + return nil } // performs validation of global variables @@ -212,10 +205,6 @@ func validateModule(c Module) error { return nil } -func hasIllegalChars(name string) bool { - return !regexp.MustCompile(`^[\w\+]+(\s*)[\w-\+\.]+$`).MatchString(name) -} - func validateOutputs(mod Module) error { modInfo := mod.InfoOrDie() // Only get the map if needed @@ -322,67 +311,26 @@ func (dc *DeploymentConfig) getValidators() map[string]func(validatorConfig) err return allValidators } -// The expected use case of this function is to merge blueprint requirements -// that are maps from project_id to string slices containing APIs or IAM roles -// required for provisioning. It will remove duplicate elements and ensure that -// the output is sorted for easy visual and automatic comparison. -// Solution: merge []string of new[key] into []string of base[key], removing -// duplicate elements and sorting the result -func mergeBlueprintRequirements(base map[string][]string, new map[string][]string) map[string][]string { - dest := make(map[string][]string) - maps.Copy(dest, base) - - // sort each value in dest in-place to ensure output is sorted when new map - // does not contain all keys in base - for _, v := range dest { - slices.Sort(v) - } - - for newProject, newRequirements := range new { - // this code is safe even if dest[newProject] has not yet been populated - dest[newProject] = append(dest[newProject], newRequirements...) - slices.Sort(dest[newProject]) - dest[newProject] = slices.Compact(dest[newProject]) - } - return dest -} - func (dc *DeploymentConfig) testApisEnabled(c validatorConfig) error { if err := c.check(testApisEnabledName, []string{}); err != nil { return err } - requiredApis := make(map[string][]string) - for _, grp := range dc.Config.DeploymentGroups { - for _, mod := range grp.Modules { - requiredApis = mergeBlueprintRequirements(requiredApis, mod.RequiredApis) - } + pv := dc.Config.Vars.Get("project_id") + if pv.Type() != cty.String { + return fmt.Errorf("the deployment variable `project_id` is either not set or is not a string") } - var errored bool - for project, apis := range requiredApis { - if hasVariable(project) { - expr, err := SimpleVarToExpression(project) - if err != nil { - return err - } - v, err := expr.Eval(dc.Config) - if err != nil { - return err - } - if v.Type() != cty.String { - return fmt.Errorf("the deployment variable %s is not a string", project) - } - project = v.AsString() - } - err := validators.TestApisEnabled(project, apis) - if err != nil { - log.Println(err) - errored = true + apis := map[string]bool{} + dc.Config.WalkModules(func(m *Module) error { + for _, api := range m.InfoOrDie().RequiredApis { + apis[api] = true } - } + return nil + }) - if errored { + if err := validators.TestApisEnabled(pv.AsString(), maps.Keys(apis)); err != nil { + log.Println(err) return fmt.Errorf(funcErrorMsgTemplate, testApisEnabledName.String()) } return nil diff --git a/pkg/config/validator_test.go b/pkg/config/validator_test.go index 10ed07ce5a..38809ae811 100644 --- a/pkg/config/validator_test.go +++ b/pkg/config/validator_test.go @@ -17,14 +17,11 @@ package config import ( "fmt" "path/filepath" - "sort" "hpc-toolkit/pkg/modulereader" "github.com/pkg/errors" "github.com/zclconf/go-cty/cty" - "golang.org/x/exp/maps" - "golang.org/x/exp/slices" . "gopkg.in/check.v1" ) @@ -192,53 +189,6 @@ func (s *MySuite) TestAddDefaultValidators(c *C) { c.Assert(dc.Config.Validators, HasLen, 7) } -func (s *MySuite) TestMergeBlueprintRequirements(c *C) { - map1 := make(map[string][]string) - map2 := make(map[string][]string) - - // each expected value should individually be sorted and have no duplicate - // elements, although different values may share elements - expectedValues1 := []string{"bar", "bat"} - expectedValues2 := []string{"value2", "value3"} - - reversedValues1 := slices.Clone(expectedValues1) - sort.Sort(sort.Reverse(sort.StringSlice(reversedValues1))) - - // TEST: merge with identical keys and duplicate elements in values - map1["key1"] = slices.Clone(reversedValues1) - map2["key1"] = []string{expectedValues1[0], expectedValues1[0]} - map3 := mergeBlueprintRequirements(map1, map2) - - // expected value (duplicates removed and sorted) - expectedMap := map[string][]string{ - "key1": expectedValues1, - } - c.Assert(maps.EqualFunc(map3, expectedMap, slices.Equal[string]), Equals, true) - - // unexpected value (duplicates removed and reverse sorted) - unexpectedMap := map[string][]string{ - "key1": reversedValues1, - } - c.Assert(maps.EqualFunc(map3, unexpectedMap, slices.Equal[string]), Equals, false) - - // TEST: merge with additional key in 1st map - map1["key2"] = []string{expectedValues2[1], expectedValues2[0]} - map3 = mergeBlueprintRequirements(map1, map2) - - // test the expected value (duplicates removed and sorted) - expectedMap = map[string][]string{ - "key1": slices.Clone(expectedValues1), - "key2": slices.Clone(expectedValues2), - } - c.Assert(maps.EqualFunc(map3, expectedMap, slices.Equal[string]), Equals, true) - - // TEST: merge with additional key in 2nd map (expected value unchanged!) - delete(map1, "key2") - map2["key2"] = slices.Clone(expectedValues2) - map3 = mergeBlueprintRequirements(map1, map2) - c.Assert(maps.EqualFunc(map3, expectedMap, slices.Equal[string]), Equals, true) -} - func (s *MySuite) TestExecuteValidators(c *C) { dc := getDeploymentConfigForTest() dc.Config.Validators = []validatorConfig{ diff --git a/pkg/config/yaml.go b/pkg/config/yaml.go new file mode 100644 index 0000000000..b2656aaddc --- /dev/null +++ b/pkg/config/yaml.go @@ -0,0 +1,303 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "os" + "regexp" + + "github.com/zclconf/go-cty/cty" + "github.com/zclconf/go-cty/cty/gocty" + ctyJson "github.com/zclconf/go-cty/cty/json" + "gopkg.in/yaml.v3" +) + +// yPath is a helper for YamlCtx to build "Path". It's agnostic to the Blueprint structure. +type yPath string + +// At is a builder method for a path of a child in a sequence. +func (p yPath) At(i int) yPath { + return yPath(fmt.Sprintf("%s[%d]", p, i)) +} + +// Dot is a builder method for a path of a child in a mapping. +func (p yPath) Dot(k string) yPath { + if p == "" { + return yPath(k) + } + return yPath(fmt.Sprintf("%s.%s", p, k)) +} + +// Pos is a position in the blueprint file. +type Pos struct { + Line int + Column int +} + +func importBlueprint(f string) (Blueprint, YamlCtx, error) { + data, err := os.ReadFile(f) + if err != nil { + return Blueprint{}, YamlCtx{}, fmt.Errorf("%s, filename=%s: %v", errorMessages["fileLoadError"], f, err) + } + decoder := yaml.NewDecoder(bytes.NewReader(data)) + decoder.KnownFields(true) + + var bp Blueprint + if err = decoder.Decode(&bp); err != nil { + return Blueprint{}, YamlCtx{}, fmt.Errorf(errorMessages["yamlUnmarshalError"], f, err) + } + return bp, NewYamlCtx(data), nil +} + +// YamlCtx is a contextual information to render errors. +type YamlCtx struct { + pathToPos map[yPath]Pos + Lines []string +} + +// Pos returns a position of a given path if one is found. +func (c YamlCtx) Pos(p Path) (Pos, bool) { + pos, ok := c.pathToPos[yPath(p.String())] + return pos, ok +} + +func syntheticOutputsNode(name string, ln int, col int) *yaml.Node { + return &yaml.Node{ + Kind: yaml.MappingNode, + Content: []*yaml.Node{ + { + Kind: yaml.ScalarNode, + Value: "name", + Line: ln, + Column: col, + }, + { + Kind: yaml.ScalarNode, + Value: name, + Line: ln, + Column: col, + }, + }, + Line: ln, + Column: col, + } +} + +// normalizeNode is treating variadic YAML syntax, ensuring that +// there is only one (canonical) way to refer to a piece of blueprint. +// Handled cases: +// * Module.outputs: +// ``` +// outputs: +// - name: grog # canonical path to "grog" value is `...outputs[0].name` +// - mork # canonical path to "mork" value is `...outputs[1].name`, NOT `...outputs[1]` +// ``` +func normalizeYamlNode(p yPath, n *yaml.Node) *yaml.Node { + switch { + case n.Kind == yaml.ScalarNode && regexp.MustCompile(`^deployment_groups\[\d+\]\.modules\[\d+\]\.outputs\[\d+\]$`).MatchString(string(p)): + return syntheticOutputsNode(n.Value, n.Line, n.Column) + default: + return n + } +} + +// NewYamlCtx creates a new YamlCtx from a given YAML data. +// NOTE: The data should be a valid blueprint YAML (previously used to parse Blueprint), +// this function will panic if it's not valid YAML and doesn't validate Blueprint structure. +func NewYamlCtx(data []byte) YamlCtx { + var c nodeCapturer + if err := yaml.Unmarshal(data, &c); err != nil { + panic(err) // shouldn't happen + } + if c.n == nil { + return YamlCtx{} // empty + } + + m := map[yPath]Pos{} + var walk func(n *yaml.Node, p yPath) + walk = func(n *yaml.Node, p yPath) { + n = normalizeYamlNode(p, n) + m[p] = Pos{n.Line, n.Column} + if n.Kind == yaml.MappingNode { + for i := 0; i < len(n.Content); i += 2 { + walk(n.Content[i+1], p.Dot(n.Content[i].Value)) + } + } else if n.Kind == yaml.SequenceNode { + for i, c := range n.Content { + walk(c, p.At(i)) + } + } + } + walk(c.n, "") + + var lines []string + sc := bufio.NewScanner(bytes.NewReader(data)) + for sc.Scan() { + lines = append(lines, sc.Text()) + } + return YamlCtx{m, lines} +} + +type nodeCapturer struct{ n *yaml.Node } + +func (c *nodeCapturer) UnmarshalYAML(n *yaml.Node) error { + c.n = n + return nil +} + +// UnmarshalYAML implements a custom unmarshaler from YAML string to ModuleKind +func (mk *ModuleKind) UnmarshalYAML(n *yaml.Node) error { + var kind string + err := n.Decode(&kind) + if err == nil && IsValidModuleKind(kind) { + mk.kind = kind + return nil + } + return fmt.Errorf("line %d: kind must be \"packer\" or \"terraform\" or removed from YAML", n.Line) +} + +// MarshalYAML implements a custom marshaler from ModuleKind to YAML string +func (mk ModuleKind) MarshalYAML() (interface{}, error) { + return mk.String(), nil +} + +// UnmarshalYAML is a custom unmarshaler for Module.Use, that will print nice error message. +func (ms *ModuleIDs) UnmarshalYAML(n *yaml.Node) error { + var ids []ModuleID + if err := n.Decode(&ids); err != nil { + return fmt.Errorf("line %d: `use` must be a list of module ids", n.Line) + } + *ms = ids + return nil +} + +// YamlValue is wrapper around cty.Value to handle YAML unmarshal. +type YamlValue struct { + v cty.Value +} + +// Unwrap returns wrapped cty.Value. +func (y YamlValue) Unwrap() cty.Value { + return y.v +} + +// UnmarshalYAML implements custom YAML unmarshaling. +func (y *YamlValue) UnmarshalYAML(n *yaml.Node) error { + var err error + switch n.Kind { + case yaml.ScalarNode: + err = y.unmarshalScalar(n) + case yaml.MappingNode: + err = y.unmarshalObject(n) + case yaml.SequenceNode: + err = y.unmarshalTuple(n) + default: + err = fmt.Errorf("line %d: cannot decode node with unknown kind %d", n.Line, n.Kind) + } + return err +} + +func (y *YamlValue) unmarshalScalar(n *yaml.Node) error { + var s interface{} + if err := n.Decode(&s); err != nil { + return err + } + ty, err := gocty.ImpliedType(s) + if err != nil { + return err + } + if y.v, err = gocty.ToCtyValue(s, ty); err != nil { + return err + } + + if l, is := IsYamlExpressionLiteral(y.v); is { // HCL literal + var e Expression + if e, err = ParseExpression(l); err != nil { + return err + } + y.v = e.AsValue() + } else if y.v.Type() == cty.String && hasVariable(y.v.AsString()) { // "simple" variable + e, err := SimpleVarToExpression(y.v.AsString()) + if err != nil { + return err + } + y.v = e.AsValue() + } + return nil +} + +func (y *YamlValue) unmarshalObject(n *yaml.Node) error { + var my map[string]YamlValue + if err := n.Decode(&my); err != nil { + return err + } + mv := map[string]cty.Value{} + for k, y := range my { + mv[k] = y.v + } + y.v = cty.ObjectVal(mv) + return nil +} + +func (y *YamlValue) unmarshalTuple(n *yaml.Node) error { + var ly []YamlValue + if err := n.Decode(&ly); err != nil { + return err + } + lv := []cty.Value{} + for _, y := range ly { + lv = append(lv, y.v) + } + y.v = cty.TupleVal(lv) + return nil +} + +// UnmarshalYAML implements custom YAML unmarshaling. +func (d *Dict) UnmarshalYAML(n *yaml.Node) error { + var m map[string]YamlValue + if err := n.Decode(&m); err != nil { + return err + } + for k, y := range m { + d.Set(k, y.v) + } + return nil +} + +// MarshalYAML implements custom YAML marshaling. +func (d Dict) MarshalYAML() (interface{}, error) { + o, _ := cty.Transform(d.AsObject(), func(p cty.Path, v cty.Value) (cty.Value, error) { + if e, is := IsExpressionValue(v); is { + return e.makeYamlExpressionValue(), nil + } + return v, nil + }) + + j := ctyJson.SimpleJSONValue{Value: o} + b, err := j.MarshalJSON() + if err != nil { + return nil, fmt.Errorf("failed to marshal JSON: %v", err) + } + var g interface{} + err = json.Unmarshal(b, &g) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %v", err) + } + return g, nil +} diff --git a/pkg/config/yaml_test.go b/pkg/config/yaml_test.go new file mode 100644 index 0000000000..22ca7bbb9e --- /dev/null +++ b/pkg/config/yaml_test.go @@ -0,0 +1,317 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "bytes" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/zclconf/go-cty-debug/ctydebug" + "github.com/zclconf/go-cty/cty" + "gopkg.in/yaml.v3" +) + +func TestYamlCtx(t *testing.T) { + data := ` # line 1 +# comment +blueprint_name: green + +ghpc_version: apricot + +validators: +- validator: clay + inputs: + spice: curry # line 10 +- validator: sand + skip: true + +validation_level: 9000 + +vars: + red: ruby + +deployment_groups: +- group: tiger # line 20 + terraform_backend: + type: yam + configuration: + carrot: rust + kind: terraform + modules: + - id: tan + source: oatmeal + kind: terraform + use: [mocha, coffee] # line 30 + outputs: + - latte + - name: hazelnut + description: almond + sensitive: false + settings: + dijon: pine + +- group: crocodile + modules: # line 40 + - id: green + - id: olive + +terraform_backend_defaults: + type: moss +` + + { // Tests sanity check - data describes valid blueprint. + decoder := yaml.NewDecoder(bytes.NewReader([]byte(data))) + decoder.KnownFields(true) + var bp Blueprint + if err := decoder.Decode(&bp); err != nil { + t.Fatal(err) + } + } + + type test struct { + path Path + want Pos + } + tests := []test{ + {Root, Pos{3, 1}}, + {Root.BlueprintName, Pos{3, 17}}, + {Root.GhpcVersion, Pos{5, 15}}, + {Root.Validators, Pos{8, 1}}, + {Root.Validators.At(0), Pos{8, 3}}, + {Root.Validators.At(0).Inputs, Pos{10, 5}}, + {Root.Validators.At(0).Inputs.Dot("spice"), Pos{10, 12}}, + {Root.Validators.At(0).Validator, Pos{8, 14}}, + {Root.Validators.At(1), Pos{11, 3}}, + {Root.Validators.At(1).Skip, Pos{12, 9}}, + {Root.Validators.At(1).Validator, Pos{11, 14}}, + {Root.ValidationLevel, Pos{14, 19}}, + {Root.Vars, Pos{17, 3}}, + {Root.Vars.Dot("red"), Pos{17, 8}}, + {Root.Groups, Pos{20, 1}}, + {Root.Groups.At(0), Pos{20, 3}}, + {Root.Groups.At(0).Name, Pos{20, 10}}, + + {Root.Groups.At(0).Backend, Pos{22, 5}}, + {Root.Groups.At(0).Backend.Type, Pos{22, 11}}, + {Root.Groups.At(0).Backend.Configuration, Pos{24, 7}}, + {Root.Groups.At(0).Backend.Configuration.Dot("carrot"), Pos{24, 15}}, + {Root.Groups.At(0).Kind, Pos{25, 9}}, + + {Root.Groups.At(0).Modules, Pos{27, 3}}, + {Root.Groups.At(0).Modules.At(0), Pos{27, 5}}, + {Root.Groups.At(0).Modules.At(0).ID, Pos{27, 9}}, + {Root.Groups.At(0).Modules.At(0).Source, Pos{28, 13}}, + {Root.Groups.At(0).Modules.At(0).Kind, Pos{29, 11}}, + {Root.Groups.At(0).Modules.At(0).Use, Pos{30, 10}}, + {Root.Groups.At(0).Modules.At(0).Use.At(0), Pos{30, 11}}, + {Root.Groups.At(0).Modules.At(0).Use.At(1), Pos{30, 18}}, + {Root.Groups.At(0).Modules.At(0).Outputs, Pos{32, 5}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(0), Pos{32, 7}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(0).Name, Pos{32, 7}}, // synthetic + {Root.Groups.At(0).Modules.At(0).Outputs.At(1), Pos{33, 7}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(1).Name, Pos{33, 13}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(1).Description, Pos{34, 20}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(1).Sensitive, Pos{35, 18}}, + {Root.Groups.At(0).Modules.At(0).Settings, Pos{37, 7}}, + {Root.Groups.At(0).Modules.At(0).Settings.Dot("dijon"), Pos{37, 14}}, + + {Root.Groups.At(1), Pos{39, 3}}, + {Root.Groups.At(1).Name, Pos{39, 10}}, + {Root.Groups.At(1).Modules, Pos{41, 3}}, + {Root.Groups.At(1).Modules.At(0), Pos{41, 5}}, + {Root.Groups.At(1).Modules.At(0).ID, Pos{41, 9}}, + {Root.Groups.At(1).Modules.At(1), Pos{42, 5}}, + {Root.Groups.At(1).Modules.At(1).ID, Pos{42, 9}}, + + {Root.Backend, Pos{45, 3}}, + {Root.Backend.Type, Pos{45, 9}}, + } + + ctx := NewYamlCtx([]byte(data)) + for _, tc := range tests { + t.Run(tc.path.String(), func(t *testing.T) { + got, ok := ctx.Pos(tc.path) + if !ok { + t.Errorf("%q not found", tc.path.String()) + } else if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } + }) + } +} + +func TestModuleKindUnmarshalYAML(t *testing.T) { + type test struct { + input string + want ModuleKind + err bool + } + tests := []test{ + {"", UnknownKind, false}, + {"terraform", TerraformKind, false}, + {"packer", PackerKind, false}, + + {"unknown", ModuleKind{}, true}, + {"[]", ModuleKind{}, true}, + {"{]", ModuleKind{}, true}, + } + for _, tc := range tests { + t.Run(tc.input, func(t *testing.T) { + var got ModuleKind + err := yaml.Unmarshal([]byte(tc.input), &got) + if tc.err != (err != nil) { + t.Fatalf("got unexpected error: %s", err) + } + + if tc.want != got { + t.Errorf("want:%#v:\ngot%#v", tc.want, got) + } + }) + } +} + +func TestModuleIDsUnmarshalYAML(t *testing.T) { + type test struct { + input string + want ModuleIDs + err bool + } + tests := []test{ + {"[green, red]", ModuleIDs{"green", "red"}, false}, + {"[]", ModuleIDs{}, false}, + + {"green", nil, true}, + {"44", nil, true}, + {"{}", nil, true}, + {"[[]]", nil, true}, + } + for _, tc := range tests { + t.Run(tc.input, func(t *testing.T) { + var got ModuleIDs + err := yaml.Unmarshal([]byte(tc.input), &got) + if tc.err != (err != nil) { + t.Fatalf("got unexpected error: %s", err) + } + + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } + }) + } +} + +func TestDictUnmarshalYAML(t *testing.T) { + yml := ` +s1: "red" +s2: pink +m1: {} +m2: + m2f1: green + m2f2: [1, 0.2, -3, false] + gv: $(vars.gold) + mv: $(lime.bloom) + hl: ((3 + 9)) +` + want := Dict{} + want. + Set("s1", cty.StringVal("red")). + Set("s2", cty.StringVal("pink")). + Set("m1", cty.EmptyObjectVal). + Set("m2", cty.ObjectVal(map[string]cty.Value{ + "m2f1": cty.StringVal("green"), + "m2f2": cty.TupleVal([]cty.Value{ + cty.NumberIntVal(1), + cty.NumberFloatVal(0.2), + cty.NumberIntVal(-3), + cty.False, + }), + "gv": MustParseExpression("var.gold").AsValue(), + "mv": MustParseExpression("module.lime.bloom").AsValue(), + "hl": MustParseExpression("3 + 9").AsValue(), + })) + var got Dict + if err := yaml.Unmarshal([]byte(yml), &got); err != nil { + t.Fatalf("failed to decode: %v", err) + } + if diff := cmp.Diff(want.Items(), got.Items(), ctydebug.CmpOptions); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestDictMarshalYAML(t *testing.T) { + d := Dict{} + d. + Set("s1", cty.StringVal("red")). + Set("m1", cty.EmptyObjectVal). + Set("m2", cty.ObjectVal(map[string]cty.Value{ + "m2f1": cty.StringVal("green"), + "m2f2": cty.TupleVal([]cty.Value{ + cty.NumberIntVal(1), + cty.NumberFloatVal(0.2), + cty.NumberIntVal(-3), + cty.False, + MustParseExpression("7 + 4").AsValue(), + }), + })) + want := map[string]interface{}{ + "s1": "red", + "m1": map[string]interface{}{}, + "m2": map[string]interface{}{ + "m2f1": "green", + "m2f2": []interface{}{1.0, 0.2, -3.0, false, "((7 + 4))"}, + }, + } + got, err := d.MarshalYAML() + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestYAMLValueMarshalIntAsInt(t *testing.T) { + d := Dict{} + d.Set("zebra", cty.NumberIntVal(5)) + want := "zebra: 5\n" + got, err := yaml.Marshal(d) + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + if diff := cmp.Diff(want, string(got)); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestYAMLValueUnmarshalWithAlias(t *testing.T) { + yml := ` +pony: &passtime +- eat +- sleep +zebra: *passtime +` + want := Dict{} + want. + Set("pony", cty.TupleVal([]cty.Value{cty.StringVal("eat"), cty.StringVal("sleep")})). + Set("zebra", cty.TupleVal([]cty.Value{cty.StringVal("eat"), cty.StringVal("sleep")})) + var got Dict + if err := yaml.Unmarshal([]byte(yml), &got); err != nil { + t.Fatalf("failed to decode: %v", err) + } + if diff := cmp.Diff(want.Items(), got.Items(), ctydebug.CmpOptions); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} diff --git a/pkg/deploymentio/local.go b/pkg/deploymentio/local.go index 573b3c801e..6366f37f9a 100644 --- a/pkg/deploymentio/local.go +++ b/pkg/deploymentio/local.go @@ -16,10 +16,8 @@ package deploymentio import ( "fmt" - "log" "os" "path/filepath" - "strings" "github.com/otiai10/copy" ) @@ -35,18 +33,6 @@ func mkdirWrapper(directory string) error { return nil } -func getAbsSourcePath(sourcePath string) string { - if strings.HasPrefix(sourcePath, "/") { // Absolute Path Already - return sourcePath - } - // Otherwise base it off of the CWD - cwd, err := os.Getwd() - if err != nil { - log.Fatalf("deploymentio: %v", err) - } - return filepath.Join(cwd, sourcePath) -} - // CreateDirectory creates the directory func (b *Local) CreateDirectory(directory string) error { if _, err := os.Stat(directory); !os.IsNotExist(err) { @@ -58,9 +44,12 @@ func (b *Local) CreateDirectory(directory string) error { return mkdirWrapper(directory) } -// CopyFromPath copyes the source file to the destination file +// CopyFromPath copies the source file to the destination file func (b *Local) CopyFromPath(src string, dst string) error { - absPath := getAbsSourcePath(src) + absPath, err := filepath.Abs(src) + if err != nil { + return err + } return copy.Copy(absPath, dst) } diff --git a/pkg/deploymentio/local_test.go b/pkg/deploymentio/local_test.go index a58e384815..7b865b423c 100644 --- a/pkg/deploymentio/local_test.go +++ b/pkg/deploymentio/local_test.go @@ -40,19 +40,6 @@ func (s *MySuite) TestCreateDirectoryLocal(c *C) { c.Assert(err, IsNil) } -func (s *MySuite) TestGetAbsSourcePath(c *C) { - // Already abs path - gotPath := getAbsSourcePath(testDir) - c.Assert(gotPath, Equals, testDir) - - // Relative path - relPath := "relative/path" - cwd, err := os.Getwd() - c.Assert(err, IsNil) - gotPath = getAbsSourcePath(relPath) - c.Assert(gotPath, Equals, filepath.Join(cwd, relPath)) -} - func (s *MySuite) TestCopyFromPathLocal(c *C) { deploymentio := GetDeploymentioLocal() testSrcFilename := filepath.Join(testDir, "testSrc") diff --git a/pkg/inspect/modules_test.go b/pkg/inspect/modules_test.go index 6905f91be3..758d6abea6 100644 --- a/pkg/inspect/modules_test.go +++ b/pkg/inspect/modules_test.go @@ -15,6 +15,7 @@ package inspect_test import ( + "fmt" "hpc-toolkit/pkg/inspect" "hpc-toolkit/pkg/modulereader" "log" @@ -147,7 +148,7 @@ func TestLabelsType(t *testing.T) { } func TestNetworkStorage(t *testing.T) { - expected := `list(object({ + obj := modulereader.NormalizeType(`object({ server_ip = string remote_mount = string local_mount = string @@ -155,12 +156,15 @@ func TestNetworkStorage(t *testing.T) { mount_options = string client_install_runner = map(string) mount_runner = map(string) - }))` - for _, mod := range notEmpty(query(hasInput("network_storage")), t) { - checkInputType(t, mod, "network_storage", expected) - } + })`) + lst := modulereader.NormalizeType(fmt.Sprintf("list(%s)", obj)) - for _, mod := range query(all(ofRole("file-system"), not(hasOutput("network_storage")))) { - t.Errorf("%q does not output 'network_storage'", mod.Source) + for _, mod := range notEmpty(query(hasInput("network_storage")), t) { + i, _ := mod.Input("network_storage") + got := modulereader.NormalizeType(i.Type) + if got != obj && got != lst { + t.Errorf("%s `network_storage` has unexpected type expected:\n%#v\nor\n%#v\ngot:\n%#v", + mod.Source, obj, lst, got) + } } } diff --git a/pkg/modulereader/resreader.go b/pkg/modulereader/resreader.go index ec10a45252..45ffafdf05 100644 --- a/pkg/modulereader/resreader.go +++ b/pkg/modulereader/resreader.go @@ -25,6 +25,7 @@ import ( "path" "strings" + "github.com/hashicorp/go-getter" "gopkg.in/yaml.v3" ) @@ -135,17 +136,25 @@ func GetModuleInfo(source string, kind string) (ModuleInfo, error) { if err != nil { return ModuleInfo{}, err } - modPath = path.Join(tmpDir, "module") - sourceReader := sourcereader.Factory(source) - if err = sourceReader.GetModule(source, modPath); err != nil { - return ModuleInfo{}, fmt.Errorf("failed to clone git module at %s: %v", source, err) + pkgAddr, subDir := getter.SourceDirSubdir(source) + pkgPath := path.Join(tmpDir, "module") + modPath = path.Join(pkgPath, subDir) + sourceReader := sourcereader.Factory(pkgAddr) + if err = sourceReader.GetModule(pkgAddr, pkgPath); err != nil { + if subDir == "" { + return ModuleInfo{}, err + } + return ModuleInfo{}, + fmt.Errorf("module source %s included \"//\" package syntax; "+ + "the \"//\" should typically be placed at the root of the repository:\n%s", + source, err.Error()) } case sourcereader.IsEmbeddedPath(source) || sourcereader.IsLocalPath(source): modPath = source default: - return ModuleInfo{}, fmt.Errorf("Source is not valid: %s", source) + return ModuleInfo{}, fmt.Errorf("source is not valid: %s", source) } reader := Factory(kind) @@ -185,25 +194,13 @@ var kinds = map[string]ModReader{ "packer": NewPackerReader(), } -// IsValidReaderKind returns true if the kind input is valid -func IsValidReaderKind(input string) bool { - for k := range kinds { - if k == input { - return true - } - } - return false -} - // Factory returns a ModReader of type 'kind' func Factory(kind string) ModReader { - for k, v := range kinds { - if kind == k { - return v - } + r, ok := kinds[kind] + if !ok { + log.Fatalf("Invalid request to create a reader of kind %s", kind) } - log.Fatalf("Invalid request to create a reader of kind %s", kind) - return nil + return r } func defaultAPIList(source string) []string { diff --git a/pkg/modulereader/resreader_test.go b/pkg/modulereader/resreader_test.go index d5be2e3b6d..e9b17d2240 100644 --- a/pkg/modulereader/resreader_test.go +++ b/pkg/modulereader/resreader_test.go @@ -24,7 +24,6 @@ import ( "reflect" "testing" - "github.com/spf13/afero" . "gopkg.in/check.v1" "gopkg.in/yaml.v3" ) @@ -72,16 +71,6 @@ func Test(t *testing.T) { TestingT(t) } -// modulereader.go -func (s *MySuite) TestIsValidKind(c *C) { - c.Assert(IsValidReaderKind(pkrKindString), Equals, true) - c.Assert(IsValidReaderKind(tfKindString), Equals, true) - c.Assert(IsValidReaderKind("Packer"), Equals, false) - c.Assert(IsValidReaderKind("Terraform"), Equals, false) - c.Assert(IsValidReaderKind("META"), Equals, false) - c.Assert(IsValidReaderKind(""), Equals, false) -} - func (s *MySuite) TestGetOutputsAsMap(c *C) { // Simple: empty outputs modInfo := ModuleInfo{} @@ -122,7 +111,7 @@ func (s *MySuite) TestGetModuleInfo_Embedded(c *C) { // Invalid: Unsupported Module Source badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" moduleInfo, err = GetModuleInfo(badSource, tfKindString) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } @@ -137,7 +126,7 @@ func (s *MySuite) TestGetModuleInfo_Git(c *C) { // Invalid: Unsupported Module Source badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" _, err = GetModuleInfo(badSource, tfKindString) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } @@ -158,19 +147,10 @@ func (s *MySuite) TestGetModuleInfo_Local(c *C) { // Invalid: Unsupported Module Source badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" moduleInfo, err = GetModuleInfo(badSource, tfKindString) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } -// hcl_utils.go -func getTestFS() afero.IOFS { - aferoFS := afero.NewMemMapFs() - aferoFS.MkdirAll("modules/network/vpc", 0755) - afero.WriteFile( - aferoFS, "modules/network/vpc/main.tf", []byte(testMainTf), 0644) - return afero.NewIOFS(aferoFS) -} - func (s *MySuite) TestGetHCLInfo(c *C) { // Invalid source path - path does not exists fakePath := "./not/a/real/path" diff --git a/pkg/modulewriter/hcl_utils.go b/pkg/modulewriter/hcl_utils.go index f62fb173d1..74e0f33cb2 100644 --- a/pkg/modulewriter/hcl_utils.go +++ b/pkg/modulewriter/hcl_utils.go @@ -17,27 +17,13 @@ package modulewriter import ( "fmt" "path/filepath" - "regexp" "hpc-toolkit/pkg/config" - "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" ) -func escapeBlueprintVariables(s string) string { - // Convert \$(not.variable) to $(not.variable) - re := regexp.MustCompile(`\\\$\(`) - return re.ReplaceAllString(s, `$(`) -} - -func escapeLiteralVariables(s string) string { - // Convert \((not.variable)) to ((not.variable)) - re := regexp.MustCompile(`\\\(\(`) - return re.ReplaceAllString(s, `((`) -} - // WriteHclAttributes writes tfvars/pkvars.hcl files func WriteHclAttributes(vars map[string]cty.Value, dst string) error { if err := createBaseFile(dst); err != nil { @@ -48,7 +34,7 @@ func WriteHclAttributes(vars map[string]cty.Value, dst string) error { hclBody := hclFile.Body() for _, k := range orderKeys(vars) { hclBody.AppendNewline() - toks := TokensForValue(vars[k]) + toks := config.TokensForValue(vars[k]) hclBody.SetAttributeRaw(k, toks) } @@ -59,51 +45,3 @@ func WriteHclAttributes(vars map[string]cty.Value, dst string) error { } return err } - -// TokensForValue is a modification of hclwrite.TokensForValue. -// The only difference in behavior is handling "HCL literal" strings. -func TokensForValue(val cty.Value) hclwrite.Tokens { - if val.IsNull() { // terminate early as Null value can has any type (e.g. String) - return hclwrite.TokensForValue(val) - } - - // We need to handle both cases, until all "expression" users are moved to Expression - if e, is := config.IsExpressionValue(val); is { - return e.Tokenize() - } else if s, is := config.IsYamlExpressionLiteral(val); is { // return it "as is" - return hclwrite.TokensForIdentifier(s) - } - - ty := val.Type() - if ty == cty.String { - s := val.AsString() - // The order of application matters, for an edge cases like: `\$\((` -> `$((` - s = escapeLiteralVariables(s) - s = escapeBlueprintVariables(s) - return hclwrite.TokensForValue(cty.StringVal(s)) - } - - if ty.IsListType() || ty.IsSetType() || ty.IsTupleType() { - tl := []hclwrite.Tokens{} - for it := val.ElementIterator(); it.Next(); { - _, v := it.Element() - tl = append(tl, TokensForValue(v)) - } - return hclwrite.TokensForTuple(tl) - } - if ty.IsMapType() || ty.IsObjectType() { - tl := []hclwrite.ObjectAttrTokens{} - for it := val.ElementIterator(); it.Next(); { - k, v := it.Element() - kt := hclwrite.TokensForIdentifier(k.AsString()) - if !hclsyntax.ValidIdentifier(k.AsString()) { - kt = TokensForValue(k) - } - vt := TokensForValue(v) - tl = append(tl, hclwrite.ObjectAttrTokens{Name: kt, Value: vt}) - } - return hclwrite.TokensForObject(tl) - - } - return hclwrite.TokensForValue(val) // rely on hclwrite implementation -} diff --git a/pkg/modulewriter/hcl_utils_test.go b/pkg/modulewriter/hcl_utils_test.go index 6833f8b75a..570ab28156 100644 --- a/pkg/modulewriter/hcl_utils_test.go +++ b/pkg/modulewriter/hcl_utils_test.go @@ -15,59 +15,15 @@ package modulewriter import ( - "hpc-toolkit/pkg/config" "hpc-toolkit/pkg/modulereader" "os" "testing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" ) -func TestTokensForValueNoLiteral(t *testing.T) { - val := cty.ObjectVal(map[string]cty.Value{ - "tan": cty.TupleVal([]cty.Value{ - cty.StringVal("biege"), - cty.NullVal(cty.String), - cty.MapVal(map[string]cty.Value{ - "cu": cty.NumberIntVal(29), - "ba": cty.NumberIntVal(56), - })}), - "pony.zebra": cty.NilVal, - }) - want := hclwrite.NewEmptyFile() - want.Body().AppendUnstructuredTokens(hclwrite.TokensForValue(val)) - - got := hclwrite.NewEmptyFile() - got.Body().AppendUnstructuredTokens(TokensForValue(val)) - - if diff := cmp.Diff(string(want.Bytes()), string(got.Bytes())); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - -func TestTokensForValueWithLiteral(t *testing.T) { - val := cty.ObjectVal(map[string]cty.Value{ - "tan": cty.TupleVal([]cty.Value{ - cty.StringVal("((var.kilo + 8))"), // HCL literal - config.MustParseExpression("var.tina + 4").AsValue(), // HclExpression value - })}) - want := ` -{ - tan = [var.kilo + 8, var.tina + 4] -}`[1:] - - gotF := hclwrite.NewEmptyFile() - gotF.Body().AppendUnstructuredTokens(TokensForValue(val)) - got := hclwrite.Format(gotF.Bytes()) // format to normalize whitespace - - if diff := cmp.Diff(want, string(got)); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - func TestHclAtttributesRW(t *testing.T) { want := make(map[string]cty.Value) // test that a string that needs escaping when written is read correctly diff --git a/pkg/modulewriter/modulewriter.go b/pkg/modulewriter/modulewriter.go index 4c09751a1d..60dcef7e32 100644 --- a/pkg/modulewriter/modulewriter.go +++ b/pkg/modulewriter/modulewriter.go @@ -32,6 +32,8 @@ import ( "os" "path" "path/filepath" + + "github.com/hashicorp/go-getter" ) // strings that get re-used throughout this package and others @@ -78,13 +80,7 @@ func factory(kind string) ModuleWriter { // WriteDeployment writes a deployment directory using modules defined the // environment blueprint. -func WriteDeployment(dc config.DeploymentConfig, outputDir string, overwriteFlag bool) error { - deploymentName, err := dc.Config.DeploymentName() - if err != nil { - return err - } - deploymentDir := filepath.Join(outputDir, deploymentName) - +func WriteDeployment(dc config.DeploymentConfig, deploymentDir string, overwriteFlag bool) error { overwrite := isOverwriteAllowed(deploymentDir, &dc.Config, overwriteFlag) if err := prepDepDir(deploymentDir, overwrite); err != nil { return err @@ -98,8 +94,7 @@ func WriteDeployment(dc config.DeploymentConfig, outputDir string, overwriteFlag return err } - advancedDeployInstructions := filepath.Join(deploymentDir, "instructions.txt") - f, err := os.Create(advancedDeployInstructions) + f, err := os.Create(InstructionsPath(deploymentDir)) if err != nil { return err } @@ -133,19 +128,14 @@ func WriteDeployment(dc config.DeploymentConfig, outputDir string, overwriteFlag } } } - - fmt.Println("To deploy your infrastructure please run:") - fmt.Println() - fmt.Printf("./ghpc deploy %s\n", deploymentDir) - fmt.Println() - fmt.Println("Find instructions for cleanly destroying infrastructure and advanced manual") - fmt.Println("deployment instructions at:") - fmt.Println() - fmt.Printf("%s\n", f.Name()) - return nil } +// InstructionsPath returns the path to the instructions file for a deployment +func InstructionsPath(deploymentDir string) string { + return filepath.Join(deploymentDir, "instructions.txt") +} + func createGroupDirs(deploymentPath string, deploymentGroups *[]config.DeploymentGroup) error { for _, grp := range *deploymentGroups { groupPath := filepath.Join(deploymentPath, string(grp.Name)) @@ -160,7 +150,7 @@ func createGroupDirs(deploymentPath string, deploymentGroups *[]config.Deploymen return nil } -// Get module source within deployment group +// DeploymentSource returns module source within deployment group // Rules are following: // - git source // => keep the same source @@ -170,22 +160,27 @@ func createGroupDirs(deploymentPath string, deploymentGroups *[]config.Deploymen // => ./modules/embedded/ // - other // => ./modules/- -func deploymentSource(mod config.Module) (string, error) { - if sourcereader.IsGitPath(mod.Source) && mod.Kind == config.TerraformKind { - return mod.Source, nil +func DeploymentSource(mod config.Module) (string, error) { + if mod.Kind != config.PackerKind && mod.Kind != config.TerraformKind { + return "", fmt.Errorf("unexpected module kind %#v", mod.Kind) + } + if sourcereader.IsGitPath(mod.Source) { + switch mod.Kind { + case config.TerraformKind: + return mod.Source, nil + case config.PackerKind: + _, subDir := getter.SourceDirSubdir(mod.Source) + return filepath.Join(string(mod.ID), subDir), nil + } } if mod.Kind == config.PackerKind { return string(mod.ID), nil } - if mod.Kind != config.TerraformKind { - return "", fmt.Errorf("unexpected module kind %#v", mod.Kind) - } - if sourcereader.IsEmbeddedPath(mod.Source) { return "./modules/" + filepath.Join("embedded", mod.Source), nil } if !sourcereader.IsLocalPath(mod.Source) { - return "", fmt.Errorf("unuexpected module source %s", mod.Source) + return "", fmt.Errorf("unexpected module source %s", mod.Source) } abs, err := filepath.Abs(mod.Source) @@ -225,11 +220,6 @@ func copySource(deploymentPath string, deploymentGroups *[]config.DeploymentGrou var copyEmbedded = false for iMod := range grp.Modules { mod := &grp.Modules[iMod] - ds, err := deploymentSource(*mod) - if err != nil { - return err - } - mod.DeploymentSource = ds if sourcereader.IsGitPath(mod.Source) && mod.Kind == config.TerraformKind { continue // do not download @@ -239,14 +229,33 @@ func copySource(deploymentPath string, deploymentGroups *[]config.DeploymentGrou copyEmbedded = true continue // all embedded terraform modules fill be copied at once } + /* Copy source files */ - dst := filepath.Join(basePath, mod.DeploymentSource) + var modPath string + var dst string + if sourcereader.IsGitPath(mod.Source) && mod.Kind == config.PackerKind { + modPath, _ = getter.SourceDirSubdir(mod.Source) + dst = filepath.Join(basePath, string(mod.ID)) + + } else { + modPath = mod.Source + ds, err := DeploymentSource(*mod) + if err != nil { + return err + } + dst = filepath.Join(basePath, ds) + } if _, err := os.Stat(dst); err == nil { continue } - reader := sourcereader.Factory(mod.Source) - if err := reader.GetModule(mod.Source, dst); err != nil { - return fmt.Errorf("failed to get module from %s to %s: %v", mod.Source, dst, err) + reader := sourcereader.Factory(modPath) + if err := reader.GetModule(modPath, dst); err != nil { + return fmt.Errorf("failed to get module from %s to %s: %v", modPath, dst, err) + } + // remove .git directory if one exists; we do not want submodule + // git history in deployment directory + if err := os.RemoveAll(filepath.Join(dst, ".git")); err != nil { + return err } } if copyEmbedded { diff --git a/pkg/modulewriter/modulewriter_test.go b/pkg/modulewriter/modulewriter_test.go index 36e04b710d..88533a1337 100644 --- a/pkg/modulewriter/modulewriter_test.go +++ b/pkg/modulewriter/modulewriter_test.go @@ -165,25 +165,25 @@ func (s *MySuite) TestPrepDepDir_OverwriteRealDep(c *C) { // Test with a real deployment previously written testDC := getDeploymentConfigForTest() testDC.Config.Vars.Set("deployment_name", cty.StringVal("test_prep_dir")) - realDepDir := filepath.Join(testDir, "test_prep_dir") + depDir := filepath.Join(testDir, "test_prep_dir") // writes a full deployment w/ actual resource groups - WriteDeployment(testDC, testDir, false /* overwrite */) + WriteDeployment(testDC, depDir, false /* overwrite */) // confirm existence of resource groups (beyond .ghpc dir) - files, _ := ioutil.ReadDir(realDepDir) + files, _ := ioutil.ReadDir(depDir) c.Check(len(files) > 1, Equals, true) - err := prepDepDir(realDepDir, true /* overwrite */) + err := prepDepDir(depDir, true /* overwrite */) c.Check(err, IsNil) - c.Check(isDeploymentDirPrepped(realDepDir), IsNil) + c.Check(isDeploymentDirPrepped(depDir), IsNil) // Check prev resource groups were moved - prevModuleDir := filepath.Join(testDir, "test_prep_dir", HiddenGhpcDirName, prevDeploymentGroupDirName) + prevModuleDir := filepath.Join(depDir, HiddenGhpcDirName, prevDeploymentGroupDirName) files1, _ := ioutil.ReadDir(prevModuleDir) c.Check(len(files1) > 0, Equals, true) - files2, _ := ioutil.ReadDir(realDepDir) + files2, _ := ioutil.ReadDir(depDir) c.Check(len(files2), Equals, 3) // .ghpc, .gitignore, and instructions file } @@ -236,16 +236,16 @@ func (s *MySuite) TestWriteDeployment(c *C) { afero.WriteFile(aferoFS, "community/modules/green/lime/main.tf", []byte("lime"), 0644) sourcereader.ModuleFS = afero.NewIOFS(aferoFS) - testDC := getDeploymentConfigForTest() + dc := getDeploymentConfigForTest() + dir := filepath.Join(testDir, "test_write_deployment") - testDC.Config.Vars.Set("deployment_name", cty.StringVal("test_write_deployment")) - err := WriteDeployment(testDC, testDir, false /* overwriteFlag */) + err := WriteDeployment(dc, dir, false /* overwriteFlag */) c.Check(err, IsNil) // Overwriting the deployment fails - err = WriteDeployment(testDC, testDir, false /* overwriteFlag */) + err = WriteDeployment(dc, dir, false /* overwriteFlag */) c.Check(err, NotNil) // Overwriting the deployment succeeds with flag - err = WriteDeployment(testDC, testDir, true /* overwriteFlag */) + err = WriteDeployment(dc, dir, true /* overwriteFlag */) c.Check(err, IsNil) } @@ -307,15 +307,6 @@ func (s *MySuite) TestCreateGroupDirs(c *C) { c.Check(err, IsNil) } -func (s *MySuite) TestWriteDeployment_BadDeploymentName(c *C) { - testDC := getDeploymentConfigForTest() - var e *config.InputValueError - - testDC.Config.Vars.Set("deployment_name", cty.NumberIntVal(100)) - err := WriteDeployment(testDC, testDir, false /* overwriteFlag */) - c.Check(errors.As(err, &e), Equals, true) -} - // tfwriter.go func (s *MySuite) TestRestoreTfState(c *C) { // set up dir structure @@ -491,7 +482,9 @@ func (s *MySuite) TestWriteMain(c *C) { // Test with modules testModule := config.Module{ - ID: "test_module", + ID: "test_module", + Kind: config.TerraformKind, + Source: "modules/network/vpc", Settings: config.NewDict(map[string]cty.Value{ "testSetting": cty.StringVal("testValue"), "passthrough": config.MustParseExpression(`"${var.deployment_name}-allow"`).AsValue(), @@ -521,25 +514,6 @@ func (s *MySuite) TestWriteMain(c *C) { exists, err = stringExistsInFile("a_bucket", mainFilePath) c.Assert(err, IsNil) c.Assert(exists, Equals, true) - - // Test with WrapSettingsWith - testModuleWithWrap := config.Module{ - ID: "test_module_with_wrap", - WrapSettingsWith: map[string][]string{ - "wrappedSetting": {"list(flatten(", "))"}, - }, - Settings: config.NewDict(map[string]cty.Value{ - "wrappedSetting": cty.TupleVal([]cty.Value{ - cty.StringVal("val1"), - cty.StringVal("val2")}), - }), - } - testModules = append(testModules, testModuleWithWrap) - err = writeMain(testModules, testBackend, testMainDir) - c.Assert(err, IsNil) - exists, err = stringExistsInFile("list(flatten(", mainFilePath) - c.Assert(err, IsNil) - c.Assert(exists, Equals, true) } func (s *MySuite) TestWriteOutputs(c *C) { @@ -687,9 +661,8 @@ func (s *MySuite) TestWriteDeploymentGroup_PackerWriter(c *C) { } testPackerModule := config.Module{ - Kind: config.PackerKind, - ID: "testPackerModule", - DeploymentSource: "testPackerModule", + Kind: config.PackerKind, + ID: "testPackerModule", } testDeploymentGroup := config.DeploymentGroup{ Name: "packerGroup", @@ -740,7 +713,7 @@ func (s *MySuite) TestWritePackerAutoVars(c *C) { func (s *MySuite) TestStringEscape(c *C) { f := func(s string) string { - toks := TokensForValue(cty.StringVal(s)) + toks := config.TokensForValue(cty.StringVal(s)) return string(toks.Bytes()) } // LiteralVariables @@ -769,44 +742,76 @@ func TestMain(m *testing.M) { func (s *MySuite) TestDeploymentSource(c *C) { { // git m := config.Module{Kind: config.TerraformKind, Source: "github.com/x/y.git"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Equals, "github.com/x/y.git") } { // packer - m := config.Module{Kind: config.PackerKind, Source: "modules/packer/custom-image", ID: "custom-image"} - s, err := deploymentSource(m) + m := config.Module{Kind: config.PackerKind, Source: "modules/packer/custom-image", ID: "image-id"} + s, err := DeploymentSource(m) c.Check(err, IsNil) - c.Check(s, Equals, "custom-image") + c.Check(s, Equals, "image-id") + } + { // remote packer non-package + m := config.Module{Kind: config.PackerKind, Source: "github.com/GoogleCloudPlatform/modules/packer/custom-image", ID: "image-id"} + s, err := DeploymentSource(m) + c.Check(err, IsNil) + c.Check(s, Equals, "image-id") + } + { // remote packer package + m := config.Module{Kind: config.PackerKind, Source: "github.com/GoogleCloudPlatform//modules/packer/custom-image?ref=main", ID: "image-id"} + s, err := DeploymentSource(m) + c.Check(err, IsNil) + c.Check(s, Equals, "image-id/modules/packer/custom-image") } { // embedded core m := config.Module{Kind: config.TerraformKind, Source: "modules/x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Equals, "./modules/embedded/modules/x/y") } { // embedded community m := config.Module{Kind: config.TerraformKind, Source: "community/modules/x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Equals, "./modules/embedded/community/modules/x/y") } { // local rel in repo m := config.Module{Kind: config.TerraformKind, Source: "./modules/x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Matches, `^\./modules/y-\w\w\w\w$`) } { // local rel m := config.Module{Kind: config.TerraformKind, Source: "./../../../../x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Matches, `^\./modules/y-\w\w\w\w$`) } { // local abs m := config.Module{Kind: config.TerraformKind, Source: "/tmp/x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Matches, `^\./modules/y-\w\w\w\w$`) } } + +func (s *MySuite) TestSubstituteIgcReferencesInModule(c *C) { + d := config.Dict{} + d.Set("fold", cty.TupleVal([]cty.Value{ + cty.StringVal("zebra"), + config.MustParseExpression(`module.golf.red + 6 + module.golf.green`).AsValue(), + config.MustParseExpression(`module.tennis.brown`).AsValue(), + })) + m := SubstituteIgcReferencesInModule( + config.Module{Settings: d}, + map[config.Reference]modulereader.VarInfo{ + config.ModuleRef("golf", "red"): {Name: "pink"}, + config.ModuleRef("golf", "green"): {Name: "lime"}, + }) + c.Check(m.Settings.Items(), DeepEquals, map[string]cty.Value{"fold": cty.TupleVal([]cty.Value{ + cty.StringVal("zebra"), + config.MustParseExpression(`var.pink + 6 + var.lime`).AsValue(), + config.MustParseExpression(`module.tennis.brown`).AsValue(), + })}) +} diff --git a/pkg/modulewriter/packerwriter.go b/pkg/modulewriter/packerwriter.go index b5bb6ec18d..7cc3253cc8 100644 --- a/pkg/modulewriter/packerwriter.go +++ b/pkg/modulewriter/packerwriter.go @@ -41,16 +41,15 @@ func (w *PackerWriter) addNumModules(value int) { w.numModules += value } -func printPackerInstructions(w io.Writer, modPath string, modID config.ModuleID, printImportInputs bool) { +func printPackerInstructions(w io.Writer, groupPath string, subPath string, printImportInputs bool) { fmt.Fprintln(w) - fmt.Fprintf(w, "Packer group '%s' was successfully created in directory %s\n", modID, modPath) + fmt.Fprintf(w, "Packer group was successfully created in directory %s\n", groupPath) fmt.Fprintln(w, "To deploy, run the following commands:") fmt.Fprintln(w) - grpPath := filepath.Clean(filepath.Join(modPath, "..")) if printImportInputs { - fmt.Fprintf(w, "ghpc import-inputs %s\n", grpPath) + fmt.Fprintf(w, "ghpc import-inputs %s\n", groupPath) } - fmt.Fprintf(w, "cd %s\n", modPath) + fmt.Fprintf(w, "cd %s\n", filepath.Join(groupPath, subPath)) fmt.Fprintln(w, "packer init .") fmt.Fprintln(w, "packer validate .") fmt.Fprintln(w, "packer build .") @@ -93,12 +92,16 @@ func (w PackerWriter) writeDeploymentGroup( return err } - modPath := filepath.Join(groupPath, mod.DeploymentSource) + ds, err := DeploymentSource(mod) + if err != nil { + return err + } + modPath := filepath.Join(groupPath, ds) if err = writePackerAutovars(av.Items(), modPath); err != nil { return err } hasIgc := len(pure.Items()) < len(mod.Settings.Items()) - printPackerInstructions(instructionsFile, modPath, mod.ID, hasIgc) + printPackerInstructions(instructionsFile, groupPath, ds, hasIgc) } return nil diff --git a/pkg/modulewriter/tfversions.go b/pkg/modulewriter/tfversions.go index 11f10d3bec..b3a1b1b31f 100644 --- a/pkg/modulewriter/tfversions.go +++ b/pkg/modulewriter/tfversions.go @@ -21,11 +21,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 4.65.2" + version = "~> 4.69.1" } google-beta = { source = "hashicorp/google-beta" - version = "~> 4.65.2" + version = "~> 4.69.1" } } } diff --git a/pkg/modulewriter/tfwriter.go b/pkg/modulewriter/tfwriter.go index 4280791b99..c9161e9e46 100644 --- a/pkg/modulewriter/tfwriter.go +++ b/pkg/modulewriter/tfwriter.go @@ -26,7 +26,6 @@ import ( "strings" "github.com/hashicorp/hcl/v2/ext/typeexpr" - "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" "golang.org/x/exp/maps" @@ -222,25 +221,16 @@ func writeMain( moduleBody := moduleBlock.Body() // Add source attribute - moduleBody.SetAttributeValue("source", cty.StringVal(mod.DeploymentSource)) + ds, err := DeploymentSource(mod) + if err != nil { + return err + } + moduleBody.SetAttributeValue("source", cty.StringVal(ds)) // For each Setting for _, setting := range orderKeys(mod.Settings.Items()) { value := mod.Settings.Get(setting) - if wrap, ok := mod.WrapSettingsWith[setting]; ok { - if len(wrap) != 2 { - return fmt.Errorf( - "invalid length of WrapSettingsWith for %s.%s, expected 2 got %d", - mod.ID, setting, len(wrap)) - } - toks, err := tokensForWrapped(wrap[0], value, wrap[1]) - if err != nil { - return fmt.Errorf("failed to process %s.%s: %v", mod.ID, setting, err) - } - moduleBody.SetAttributeRaw(setting, toks) - } else { - moduleBody.SetAttributeRaw(setting, TokensForValue(value)) - } + moduleBody.SetAttributeRaw(setting, config.TokensForValue(value)) } } // Write file @@ -252,30 +242,6 @@ func writeMain( return nil } -func tokensForWrapped(pref string, val cty.Value, suf string) (hclwrite.Tokens, error) { - var toks hclwrite.Tokens - if !val.Type().IsListType() && !val.Type().IsTupleType() { - return toks, fmt.Errorf( - "invalid value for wrapped setting, expected sequence, got %#v", val.Type()) - } - toks = append(toks, simpleTokens(pref)...) - - it, first := val.ElementIterator(), true - for it.Next() { - if !first { - toks = append(toks, &hclwrite.Token{ - Type: hclsyntax.TokenComma, - Bytes: []byte{','}}) - } - _, el := it.Element() - toks = append(toks, TokensForValue(el)...) - first = false - } - toks = append(toks, simpleTokens(suf)...) - - return toks, nil -} - var simpleTokens = hclwrite.TokensForIdentifier func writeProviders(vars map[string]cty.Value, dst string) error { diff --git a/pkg/shell/packer.go b/pkg/shell/packer.go index 74f82baefa..0d51fe3baa 100644 --- a/pkg/shell/packer.go +++ b/pkg/shell/packer.go @@ -17,8 +17,11 @@ package shell import ( + "bytes" + "io" "os" "os/exec" + "sync" ) // ConfigurePacker errors if packer is not in the user PATH @@ -38,12 +41,47 @@ func ConfigurePacker() error { func ExecPackerCmd(workingDir string, printToScreen bool, args ...string) error { cmd := exec.Command("packer", args...) cmd.Dir = workingDir + stdout, err := cmd.StdoutPipe() + if err != nil { + return err + } + stderr, err := cmd.StderrPipe() + if err != nil { + return err + } + + if err := cmd.Start(); err != nil { + return err + } + + // capture stdout/stderr; print to screen in real-time or upon error + var wg sync.WaitGroup + var outBuf io.ReadWriter + var errBuf io.ReadWriter if printToScreen { - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr + outBuf = os.Stdout + errBuf = os.Stderr + } else { + outBuf = bytes.NewBuffer([]byte{}) + errBuf = bytes.NewBuffer([]byte{}) } + wg.Add(1) + go func() { + defer wg.Done() + io.Copy(outBuf, stdout) + }() + wg.Add(1) + go func() { + defer wg.Done() + io.Copy(errBuf, stderr) + }() + wg.Wait() - if err := cmd.Run(); err != nil { + if err := cmd.Wait(); err != nil { + if !printToScreen { + io.Copy(os.Stdout, outBuf) + io.Copy(os.Stderr, errBuf) + } return err } return nil diff --git a/pkg/shell/terraform.go b/pkg/shell/terraform.go index f51a16da3b..3b8546b207 100644 --- a/pkg/shell/terraform.go +++ b/pkg/shell/terraform.go @@ -288,7 +288,7 @@ func ExportOutputs(tf *tfexec.Terraform, artifactsDir string, applyBehavior Appl func ImportInputs(deploymentGroupDir string, artifactsDir string, expandedBlueprintFile string) error { deploymentRoot := filepath.Clean(filepath.Join(deploymentGroupDir, "..")) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) if err != nil { return err } @@ -334,8 +334,12 @@ func ImportInputs(deploymentGroupDir string, artifactsDir string, expandedBluepr packerGroup := dc.Config.DeploymentGroups[thisGroupIdx] // Packer groups are enforced to have length 1 packerModule := packerGroup.Modules[0] - moduleID := string(packerModule.ID) - outfile = filepath.Join(deploymentGroupDir, moduleID, fmt.Sprintf("%s_inputs.auto.pkrvars.hcl", moduleID)) + modPath, err := modulewriter.DeploymentSource(packerModule) + if err != nil { + return err + } + outfile = filepath.Join(deploymentGroupDir, modPath, + fmt.Sprintf("%s_inputs.auto.pkrvars.hcl", packerModule.ID)) // evaluate Packer settings that contain intergroup references in the // context of deployment variables and intergroup output values diff --git a/pkg/sourcereader/embedded.go b/pkg/sourcereader/embedded.go index 920f198911..e681755950 100644 --- a/pkg/sourcereader/embedded.go +++ b/pkg/sourcereader/embedded.go @@ -103,7 +103,7 @@ func (r EmbeddedSourceReader) GetModule(modPath string, copyPath string) error { return fmt.Errorf("embedded file system is not initialized") } if !IsEmbeddedPath(modPath) { - return fmt.Errorf("Source is not valid: %s", modPath) + return fmt.Errorf("source is not valid: %s", modPath) } modDir, err := copyFSToTempDir(ModuleFS, modPath) diff --git a/pkg/sourcereader/embedded_test.go b/pkg/sourcereader/embedded_test.go index 003384d24e..3267e3921d 100644 --- a/pkg/sourcereader/embedded_test.go +++ b/pkg/sourcereader/embedded_test.go @@ -153,7 +153,7 @@ func (s *MySuite) TestGetModule_Embedded(c *C) { // Invalid: Unsupported Module Source by EmbeddedSourceReader badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" err = reader.GetModule(badSource, dest) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } diff --git a/pkg/sourcereader/git.go b/pkg/sourcereader/git.go index 2f3643c188..dcc1ef06a9 100644 --- a/pkg/sourcereader/git.go +++ b/pkg/sourcereader/git.go @@ -59,7 +59,7 @@ func copyGitModules(srcPath string, destPath string) error { // GetModule copies the git source to a provided destination (the deployment directory) func (r GitSourceReader) GetModule(modPath string, copyPath string) error { if !IsGitPath(modPath) { - return fmt.Errorf("Source is not valid: %s", modPath) + return fmt.Errorf("source is not valid: %s", modPath) } modDir, err := ioutil.TempDir("", "git-module-*") diff --git a/pkg/sourcereader/git_test.go b/pkg/sourcereader/git_test.go index 0dd321e0f8..4160c6fa72 100644 --- a/pkg/sourcereader/git_test.go +++ b/pkg/sourcereader/git_test.go @@ -62,6 +62,6 @@ func (s *MySuite) TestGetModule_Git(c *C) { // Invalid: Unsupported Module Source badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" err = reader.GetModule(badSource, tfKindString) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } diff --git a/pkg/sourcereader/local.go b/pkg/sourcereader/local.go index ee68b369da..a8744cb6f5 100644 --- a/pkg/sourcereader/local.go +++ b/pkg/sourcereader/local.go @@ -25,7 +25,7 @@ type LocalSourceReader struct{} // GetModule copies the local source to a provided destination (the deployment directory) func (r LocalSourceReader) GetModule(modPath string, copyPath string) error { if !IsLocalPath(modPath) { - return fmt.Errorf("Source is not valid: %s", modPath) + return fmt.Errorf("source is not valid: %s", modPath) } if _, err := os.Stat(modPath); os.IsNotExist(err) { diff --git a/pkg/sourcereader/local_test.go b/pkg/sourcereader/local_test.go index 4e6f9d0b38..3bc1330913 100644 --- a/pkg/sourcereader/local_test.go +++ b/pkg/sourcereader/local_test.go @@ -122,6 +122,6 @@ func (s *MySuite) TestGetModule_Local(c *C) { // Invalid: Unsupported Module Source by LocalSourceReader badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" err = reader.GetModule(badSource, dest) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } diff --git a/tools/cloud-build/Dockerfile b/tools/cloud-build/Dockerfile index 8da6bbff91..730ca2d214 100644 --- a/tools/cloud-build/Dockerfile +++ b/tools/cloud-build/Dockerfile @@ -50,7 +50,7 @@ WORKDIR /ghpc-tmp COPY ./ ./ RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt && \ + pip install --no-cache-dir -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt && \ pip install --no-cache-dir -r tools/cloud-build/requirements.txt && \ rm -rf ~/.cache/pip/* diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml index 7d78370bbf..58ff5a5f29 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml @@ -80,11 +80,14 @@ ## Setup firewall for cloud build - name: Get Builder IP - changed_when: false - ansible.builtin.shell: >- - dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | - awk -F'"' '{print $2}' register: build_ip + changed_when: false + args: + executable: /bin/bash + ansible.builtin.shell: | + set -e -o pipefail + dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | \ + awk -F'"' '{print $2}' - name: Create firewall rule register: fw_result changed_when: fw_result.rc == 0 @@ -159,7 +162,6 @@ minutes: 2 - name: Run Integration tests for HPC toolkit ansible.builtin.include_tasks: "{{ test }}" - run_once: true vars: remote_node: "{{ remote_node }}" deployment_name: "{{ deployment_name }}" diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml index 7a6a9d3b73..e678b025a2 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml @@ -33,18 +33,14 @@ file: tasks/create_deployment_directory.yml - name: Create Infrastructure and test block: - - name: Setup network and HTCondor install scripts - ansible.builtin.command: - cmd: "{{ item }}" - chdir: "{{ workspace }}/{{ deployment_name }}/htcondor" + - name: Execute ghpc deploy + register: deployment + changed_when: deployment.changed + ansible.builtin.command: ./ghpc deploy {{ deployment_name }} --auto-approve args: - creates: "{{ workspace }}/{{ deployment_name }}/.terraform" + chdir: "{{ workspace }}" environment: TF_IN_AUTOMATION: "TRUE" - with_items: - - terraform init - - terraform validate - - terraform apply -auto-approve -no-color - name: Print instance IDs of VMs ansible.builtin.include_tasks: file: tasks/get_instance_ids.yml @@ -52,10 +48,10 @@ register: access_ip changed_when: false args: - chdir: "{{ workspace }}/{{ deployment_name }}/htcondor" + chdir: "{{ workspace }}/{{ deployment_name }}/pool" executable: /bin/bash ansible.builtin.shell: | - set -o pipefail + set -e -o pipefail terraform output -json external_ip_htcondor_access | jq -r '.[0]' - name: Add Login node as host ansible.builtin.add_host: @@ -66,9 +62,12 @@ - name: Get Builder IP register: build_ip changed_when: false - ansible.builtin.shell: >- - dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | - awk -F'"' '{print $2}' + args: + executable: /bin/bash + ansible.builtin.shell: | + set -e -o pipefail + dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | \ + awk -F'"' '{print $2}' - name: Create firewall rule register: fw_result changed_when: fw_result.rc == 0 @@ -111,19 +110,25 @@ - firewall-rules - delete - "{{ deployment_name }}" - - name: Tear Down Pool - changed_when: true # assume something destroyed - failed_when: false # keep cleaning up - run_once: true - delegate_to: localhost + - name: Destroy deployment + register: ghpc_destroy + changed_when: ghpc_destroy.changed + ignore_errors: true + ansible.builtin.command: ./ghpc destroy {{ deployment_name }} --auto-approve + args: + chdir: "{{ workspace }}" environment: TF_IN_AUTOMATION: "TRUE" - ansible.builtin.command: - cmd: "{{ item }}" - chdir: "{{ workspace }}/{{ deployment_name }}/htcondor" - with_items: - - terraform init - - terraform destroy -auto-approve + - name: Delete VM Image + register: image_deletion + changed_when: image_deletion.changed + ignore_errors: true + ansible.builtin.shell: | + set -e -o pipefail + gcloud compute images delete --project={{ project }} --quiet $(jq -r '.builds[-1].artifact_id' packer-manifest.json | cut -d ":" -f2) + args: + chdir: "{{ workspace }}/{{ deployment_name }}/packer/custom-image" + executable: /bin/bash - name: Run Integration Tests hosts: remote_host @@ -148,7 +153,6 @@ timeout: 480 - name: Run Integration tests for HPC toolkit ansible.builtin.include_tasks: "{{ test }}" - run_once: true vars: access_point: "{{ access_point }}" loop: "{{ post_deploy_tests }}" @@ -156,11 +160,10 @@ loop_var: test always: - name: Delete Firewall Rule + delegate_to: localhost register: fw_deleted changed_when: fw_deleted.rc == 0 failed_when: false # keep cleaning up - run_once: true - delegate_to: localhost ansible.builtin.command: argv: - gcloud @@ -168,15 +171,24 @@ - firewall-rules - delete - "{{ deployment_name }}" - - name: Tear Down Pool - changed_when: true # assume something destroyed + - name: Destroy deployment delegate_to: localhost - run_once: true + register: ghpc_destroy + changed_when: ghpc_destroy.changed + ignore_errors: true + ansible.builtin.command: ./ghpc destroy {{ deployment_name }} --auto-approve + args: + chdir: "{{ workspace }}" environment: TF_IN_AUTOMATION: "TRUE" - ansible.builtin.command: - cmd: "{{ item }}" - chdir: "{{ workspace }}/{{ deployment_name }}/htcondor" - with_items: - - terraform init - - terraform destroy -auto-approve + - name: Delete VM Image + delegate_to: localhost + register: image_deletion + changed_when: image_deletion.changed + ignore_errors: true + ansible.builtin.shell: | + set -e -o pipefail + gcloud compute images delete --project={{ project }} --quiet $(jq -r '.builds[-1].artifact_id' packer-manifest.json | cut -d ":" -f2) + args: + chdir: "{{ workspace }}/{{ deployment_name }}/packer/custom-image" + executable: /bin/bash diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml index 70bf2ccc2a..0c424bb20d 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml @@ -86,11 +86,14 @@ ## Setup firewall for cloud build - name: Get Builder IP - changed_when: false - ansible.builtin.shell: >- - dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | - awk -F'"' '{print $2}' register: build_ip + changed_when: false + args: + executable: /bin/bash + ansible.builtin.shell: | + set -e -o pipefail + dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | \ + awk -F'"' '{print $2}' - name: Create firewall rule register: fw_created changed_when: fw_created.rc == 0 @@ -172,7 +175,6 @@ timeout: 600 - name: Run Integration tests for HPC toolkit ansible.builtin.include_tasks: "{{ test }}" - run_once: true vars: login_node: "{{ login_node }}" custom_vars: "{{ custom_vars }}" diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml index 6555539e56..0419c3276b 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml @@ -13,10 +13,18 @@ # limitations under the License. --- -- name: Ensure schedd has joined pool +- name: Ensure schedd has joined the pool ansible.builtin.command: condor_status -schedd -autof Name register: schedd_status changed_when: False - until: ansible_fqdn == schedd_status.stdout + until: schedd_status.stdout == ansible_fqdn retries: 8 delay: 15 +- name: Ensure execute points have joined the pool + ansible.builtin.command: condor_status -collector -autoformat HostsTotal + register: hosts_total + changed_when: False + # must match total min_idle settings of execute-point modules in blueprint + until: hosts_total.stdout == "2" + retries: 20 + delay: 15 diff --git a/tools/duplicate-diff.py b/tools/duplicate-diff.py index a0730fcaef..068d2e476e 100644 --- a/tools/duplicate-diff.py +++ b/tools/duplicate-diff.py @@ -43,7 +43,12 @@ ], [ "community/modules/compute/gke-node-pool/threads_per_core_calc.tf", - "modules/compute/vm-instance/threads_per_core_calc.tf" + "modules/compute/vm-instance/threads_per_core_calc.tf", + ], + [ + "community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf", + "community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf", + "community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf", ], ] diff --git a/tools/enforce_coverage.pl b/tools/enforce_coverage.pl index 5e1e2eecfe..5f78aae300 100755 --- a/tools/enforce_coverage.pl +++ b/tools/enforce_coverage.pl @@ -19,9 +19,8 @@ # TODO: raise ./cmd min coverage to 80% after tests are written my $min = 80; my $cmdmin = 40; -my $shellmin = 15; +my $shellmin = 0; my $failed_coverage = 0; -my $failed_tests = 0; while (<>){ print $_; @@ -32,14 +31,8 @@ } elsif ( $_ =~ /coverage: (\d+\.\d)%/ ) { $failed_coverage++ if ($1 < $min); } - if ($_ =~ /\d+ passed, (\d+) FAILED/){ - $failed_tests += $1; - } -} -if ($failed_tests > 0) { - print STDERR "$failed_tests test(s) failed.\n"; - exit 1 } + if ($failed_coverage > 0) { print STDERR "Coverage must be above $cmdmin% for ./cmd and $min% for other packages, $failed_coverage packages were below that.\n"; exit 1 diff --git a/tools/validate_configs/golden_copies/configs/igc_pkr.yaml b/tools/validate_configs/golden_copies/configs/igc_pkr.yaml index fc119d2085..07963b6413 100644 --- a/tools/validate_configs/golden_copies/configs/igc_pkr.yaml +++ b/tools/validate_configs/golden_copies/configs/igc_pkr.yaml @@ -26,6 +26,9 @@ deployment_groups: modules: - id: network0 source: modules/network/vpc + settings: + enable_iap_rdp_ingress: true + enable_iap_winrm_ingress: true - id: homefs source: modules/file-system/filestore use: [network0] @@ -45,6 +48,10 @@ deployment_groups: content: | #!/bin/bash echo "Hello, World!" + - id: windows_startup + source: community/modules/scripts/windows-startup-script + settings: + install_nvidia_driver: true - group: one modules: @@ -54,3 +61,4 @@ deployment_groups: use: - network0 - script + - windows_startup diff --git a/tools/validate_configs/golden_copies/configs/merge_flatten.yaml b/tools/validate_configs/golden_copies/configs/merge_flatten.yaml new file mode 100644 index 0000000000..590510eb41 --- /dev/null +++ b/tools/validate_configs/golden_copies/configs/merge_flatten.yaml @@ -0,0 +1,49 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +blueprint_name: merge_flatten + +vars: + project_id: # + deployment_name: merge_flatten + region: us-east4 + zone: us-east4-c + +deployment_groups: +- group: zero + modules: + - id: network + source: modules/network/vpc + + - id: first-fs + source: modules/file-system/filestore + use: [network] + settings: + local_mount: /first + + - id: second-fs + source: modules/file-system/filestore + use: [network] + settings: + local_mount: /first + + - id: first-vm + source: modules/compute/vm-instance + use: [first-fs] + settings: + labels: {"green": "sleeves"} + + - id: second-vm + source: modules/compute/vm-instance + use: [first-fs, second-fs] diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml index 5d3066cb2e..f606099f79 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml @@ -16,26 +16,17 @@ blueprint_name: igc ghpc_version: golden validators: - validator: test_project_exists - inputs: {} skip: true - validator: test_apis_enabled - inputs: {} skip: true - validator: test_region_exists - inputs: {} skip: true - validator: test_zone_exists - inputs: {} skip: true - validator: test_zone_in_region - inputs: {} skip: true - validator: test_module_not_used - inputs: {} - skip: false - validator: test_deployment_variable_not_used - inputs: {} - skip: false vars: deployment_name: golden_copy_deployment labels: @@ -46,87 +37,65 @@ vars: zone: us-east4-c deployment_groups: - group: zero - terraform_backend: - type: "" - configuration: {} modules: - source: modules/network/vpc kind: terraform id: network0 - use: [] - wrapsettingswith: {} outputs: - name: subnetwork_name description: Automatically-generated output exported for use by later deployment groups sensitive: true settings: deployment_name: ((var.deployment_name )) + enable_iap_rdp_ingress: true + enable_iap_winrm_ingress: true project_id: ((var.project_id )) region: ((var.region )) - required_apis: - $(vars.project_id): - - compute.googleapis.com - source: modules/file-system/filestore kind: terraform id: homefs use: - network0 - wrapsettingswith: - labels: - - merge( - - ) settings: deployment_name: ((var.deployment_name )) - labels: - - ((var.labels )) - - ghpc_role: file-system + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) local_mount: /home network_id: ((module.network0.network_id )) project_id: ((var.project_id )) region: ((var.region )) zone: ((var.zone )) - required_apis: - $(vars.project_id): - - file.googleapis.com - source: modules/file-system/filestore kind: terraform id: projectsfs use: - network0 - wrapsettingswith: - labels: - - merge( - - ) settings: deployment_name: ((var.deployment_name )) - labels: - - ((var.labels )) - - ghpc_role: file-system + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) local_mount: /projects network_id: ((module.network0.network_id )) project_id: ((var.project_id )) region: ((var.region )) zone: ((var.zone )) - required_apis: - $(vars.project_id): - - file.googleapis.com - source: modules/scripts/startup-script kind: terraform id: script - use: [] - wrapsettingswith: - labels: - - merge( - - ) outputs: - name: startup_script description: Automatically-generated output exported for use by later deployment groups sensitive: true settings: deployment_name: ((var.deployment_name )) - labels: - - ((var.labels )) - - ghpc_role: scripts + labels: |- + ((merge(var.labels, { + ghpc_role = "scripts" + }) )) project_id: ((var.project_id )) region: ((var.region )) runners: @@ -135,14 +104,17 @@ deployment_groups: echo "Hello, World!" destination: hello.sh type: shell - required_apis: - $(vars.project_id): - - storage.googleapis.com + - source: community/modules/scripts/windows-startup-script + kind: terraform + id: windows_startup + outputs: + - name: windows_startup_ps1 + description: Automatically-generated output exported for use by later deployment groups + sensitive: true + settings: + install_nvidia_driver: true kind: terraform - group: one - terraform_backend: - type: "" - configuration: {} modules: - source: modules/packer/custom-image kind: packer @@ -150,22 +122,16 @@ deployment_groups: use: - network0 - script - wrapsettingswith: {} + - windows_startup settings: deployment_name: ((var.deployment_name )) - labels: - ghpc_blueprint: igc - ghpc_deployment: golden_copy_deployment - ghpc_role: packer + labels: |- + ((merge(var.labels, { + ghpc_role = "packer" + }))) project_id: ((var.project_id )) startup_script: ((module.script.startup_script)) subnetwork_name: ((module.network0.subnetwork_name)) + windows_startup_ps1: ((flatten([module.windows_startup.windows_startup_ps1]))) zone: ((var.zone )) - required_apis: - $(vars.project_id): - - compute.googleapis.com - - storage.googleapis.com kind: packer -terraform_backend_defaults: - type: "" - configuration: {} diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/instructions.txt b/tools/validate_configs/golden_copies/expectations/igc_pkr/instructions.txt index 88bc6ba912..dbf1500c89 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/instructions.txt +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/instructions.txt @@ -9,7 +9,7 @@ terraform -chdir=golden_copy_deployment/zero validate terraform -chdir=golden_copy_deployment/zero apply ghpc export-outputs golden_copy_deployment/zero -Packer group 'image' was successfully created in directory golden_copy_deployment/one/image +Packer group was successfully created in directory golden_copy_deployment/one To deploy, run the following commands: ghpc import-inputs golden_copy_deployment/one diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl index 6ee3b63554..fa9a372cc5 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl @@ -18,9 +18,25 @@ locals { image_name_default = "${local.image_family}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" image_name = var.image_name != null ? var.image_name : local.image_name_default + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.windows_startup_ps1) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap + # construct metadata from startup_script and metadata variables startup_script_metadata = var.startup_script == null ? {} : { startup-script = var.startup_script } - user_management_metadata = { + + linux_user_metadata = { block-project-ssh-keys = "TRUE" shutdown-script = <<-EOT #!/bin/bash @@ -28,23 +44,21 @@ locals { sed -i '/${var.ssh_username}/d' /var/lib/google/google_users EOT } + windows_packer_user = "packer_user" + windows_user_metadata = { + sysprep-specialize-script-cmd = "winrm quickconfig -quiet & net user /add ${local.windows_packer_user} & net localgroup administrators ${local.windows_packer_user} /add & winrm set winrm/config/service/auth @{Basic=\\\"true\\\"}" + windows-shutdown-script-cmd = "net user /delete ${local.windows_packer_user}" + } + user_metadata = local.communicator == "winrm" ? local.windows_user_metadata : local.linux_user_metadata # merge metadata such that var.metadata always overrides user management # metadata but always allow var.startup_script to override var.metadata metadata = merge( - local.user_management_metadata, + local.user_metadata, var.metadata, local.startup_script_metadata, ) - # determine communicator to use and whether to enable Identity-Aware Proxy - no_shell_scripts = length(var.shell_scripts) == 0 - no_ansible_playbooks = length(var.ansible_playbooks) == 0 - no_provisioners = local.no_shell_scripts && local.no_ansible_playbooks - communicator_default = local.no_provisioners ? "none" : "ssh" - communicator = var.communicator == null ? local.communicator_default : var.communicator - use_iap = local.communicator == "none" ? false : var.use_iap - # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) machine_family = local.machine_vals[0] @@ -55,6 +69,12 @@ locals { ? var.on_host_maintenance : local.on_host_maintenance_default ) + + accelerator_type = var.accelerator_type == null ? null : "projects/${var.project_id}/zones/${var.zone}/acceleratorTypes/${var.accelerator_type}" + + winrm_username = local.communicator == "winrm" ? "packer_user" : null + winrm_insecure = local.communicator == "winrm" ? true : null + winrm_use_ssl = local.communicator == "winrm" ? true : null } source "googlecompute" "toolkit_image" { @@ -64,10 +84,11 @@ source "googlecompute" "toolkit_image" { image_family = local.image_family image_labels = var.labels machine_type = var.machine_type - accelerator_type = var.accelerator_type + accelerator_type = local.accelerator_type accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size + disk_type = var.disk_type omit_external_ip = var.omit_external_ip use_internal_ip = var.omit_external_ip subnetwork = var.subnetwork_name @@ -80,6 +101,9 @@ source "googlecompute" "toolkit_image" { tags = var.tags use_iap = local.use_iap use_os_login = var.use_os_login + winrm_username = local.winrm_username + winrm_insecure = local.winrm_insecure + winrm_use_ssl = local.winrm_use_ssl zone = var.zone labels = var.labels metadata = local.metadata @@ -107,6 +131,15 @@ build { } } + # provisioner "powershell" blocks + dynamic "provisioner" { + labels = ["powershell"] + for_each = var.windows_startup_ps1 + content { + inline = split("\n", provisioner.value) + } + } + # provisioner "ansible-local" blocks # this installs custom roles/collections from ansible-galaxy in /home/packer # which will be removed at the end; consider modifying /etc/ansible/ansible.cfg diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl index 11a887bc85..9589669153 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl @@ -34,6 +34,12 @@ variable "disk_size" { default = null } +variable "disk_type" { + description = "Type of persistent disk to provision" + type = string + default = "pd-balanced" +} + variable "zone" { description = "Cloud zone in which to provision image building VM" type = string @@ -150,6 +156,12 @@ variable "shell_scripts" { default = [] } +variable "windows_startup_ps1" { + description = "A list of strings containing PowerShell scripts which will customize a Windows VM image (requires WinRM communicator)" + type = list(string) + default = [] +} + variable "startup_script" { description = "Startup script (as raw string) used to build the custom Linux VM image (overridden by var.startup_script_file if both are set)" type = string diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/main.tf b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/main.tf index d8ab74c242..d44735f911 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/main.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/main.tf @@ -15,10 +15,12 @@ */ module "network0" { - source = "./modules/embedded/modules/network/vpc" - deployment_name = var.deployment_name - project_id = var.project_id - region = var.region + source = "./modules/embedded/modules/network/vpc" + deployment_name = var.deployment_name + enable_iap_rdp_ingress = true + enable_iap_winrm_ingress = true + project_id = var.project_id + region = var.region } module "homefs" { @@ -61,3 +63,8 @@ module "script" { type = "shell" }] } + +module "windows_startup" { + source = "./modules/embedded/community/modules/scripts/windows-startup-script" + install_nvidia_driver = true +} diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/outputs.tf b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/outputs.tf index 2ea9eaf0af..04bef33e47 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/outputs.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/outputs.tf @@ -25,3 +25,9 @@ output "startup_script_script" { value = module.script.startup_script sensitive = true } + +output "windows_startup_ps1_windows_startup" { + description = "Automatically-generated output exported for use by later deployment groups" + value = module.windows_startup.windows_startup_ps1 + sensitive = true +} diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf index ea77e9348f..d1bb72b8b4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 4.65.2" + version = "~> 4.69.1" } google-beta = { source = "hashicorp/google-beta" - version = "~> 4.65.2" + version = "~> 4.69.1" } } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml index a285178473..0ac4892f70 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml @@ -16,26 +16,17 @@ blueprint_name: igc ghpc_version: golden validators: - validator: test_project_exists - inputs: {} skip: true - validator: test_apis_enabled - inputs: {} skip: true - validator: test_region_exists - inputs: {} skip: true - validator: test_zone_exists - inputs: {} skip: true - validator: test_zone_in_region - inputs: {} skip: true - validator: test_module_not_used - inputs: {} - skip: false - validator: test_deployment_variable_not_used - inputs: {} - skip: false vars: deployment_name: golden_copy_deployment labels: @@ -46,15 +37,10 @@ vars: zone: us-east4-c deployment_groups: - group: zero - terraform_backend: - type: "" - configuration: {} modules: - source: modules/network/vpc kind: terraform id: network0 - use: [] - wrapsettingswith: {} outputs: - name: nat_ips - name: subnetwork_name @@ -65,39 +51,24 @@ deployment_groups: deployment_name: ((var.deployment_name )) project_id: ((var.project_id )) region: ((var.region )) - required_apis: - $(vars.project_id): - - compute.googleapis.com kind: terraform - group: one - terraform_backend: - type: "" - configuration: {} modules: - source: modules/file-system/filestore kind: terraform id: homefs use: - network0 - wrapsettingswith: - labels: - - merge( - - ) settings: deployment_name: ((var.deployment_name )) - labels: - - ((var.labels )) - - ghpc_role: file-system + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) local_mount: /home name: ((module.network0.subnetwork_name)) network_id: ((module.network0.network_id)) project_id: ((var.project_id )) region: ((var.region )) zone: ((var.zone )) - required_apis: - $(vars.project_id): - - file.googleapis.com kind: terraform -terraform_backend_defaults: - type: "" - configuration: {} diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf index ea77e9348f..d1bb72b8b4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 4.65.2" + version = "~> 4.69.1" } google-beta = { source = "hashicorp/google-beta" - version = "~> 4.65.2" + version = "~> 4.69.1" } } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf index ea77e9348f..d1bb72b8b4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 4.65.2" + version = "~> 4.69.1" } google-beta = { source = "hashicorp/google-beta" - version = "~> 4.65.2" + version = "~> 4.69.1" } } } diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/DO_NOT_MODIFY_THIS_DIRECTORY b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/DO_NOT_MODIFY_THIS_DIRECTORY new file mode 100644 index 0000000000..1613c718b5 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/DO_NOT_MODIFY_THIS_DIRECTORY @@ -0,0 +1 @@ +Files in this directory are managed by ghpc. Do not modify them manually! diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml new file mode 100644 index 0000000000..9228ff8a0c --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml @@ -0,0 +1,113 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +blueprint_name: merge_flatten +ghpc_version: golden +validators: + - validator: test_project_exists + skip: true + - validator: test_apis_enabled + skip: true + - validator: test_region_exists + skip: true + - validator: test_zone_exists + skip: true + - validator: test_zone_in_region + skip: true + - validator: test_module_not_used + - validator: test_deployment_variable_not_used +vars: + deployment_name: golden_copy_deployment + labels: + ghpc_blueprint: merge_flatten + ghpc_deployment: golden_copy_deployment + project_id: invalid-project + region: us-east4 + zone: us-east4-c +deployment_groups: + - group: zero + modules: + - source: modules/network/vpc + kind: terraform + id: network + settings: + deployment_name: ((var.deployment_name )) + project_id: ((var.project_id )) + region: ((var.region )) + - source: modules/file-system/filestore + kind: terraform + id: first-fs + use: + - network + settings: + deployment_name: ((var.deployment_name )) + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) + local_mount: /first + network_id: ((module.network.network_id )) + project_id: ((var.project_id )) + region: ((var.region )) + zone: ((var.zone )) + - source: modules/file-system/filestore + kind: terraform + id: second-fs + use: + - network + settings: + deployment_name: ((var.deployment_name )) + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) + local_mount: /first + network_id: ((module.network.network_id )) + project_id: ((var.project_id )) + region: ((var.region )) + zone: ((var.zone )) + - source: modules/compute/vm-instance + kind: terraform + id: first-vm + use: + - first-fs + settings: + deployment_name: ((var.deployment_name )) + labels: |- + ((merge(var.labels, { + ghpc_role = "compute" + }, { + green = "sleeves" + }) )) + network_storage: ((flatten([module.first-fs.network_storage]) )) + project_id: ((var.project_id )) + region: ((var.region )) + zone: ((var.zone )) + - source: modules/compute/vm-instance + kind: terraform + id: second-vm + use: + - first-fs + - second-fs + settings: + deployment_name: ((var.deployment_name )) + labels: |- + ((merge(var.labels, { + ghpc_role = "compute" + }) )) + network_storage: ((flatten([module.second-fs.network_storage, flatten([module.first-fs.network_storage])]) )) + project_id: ((var.project_id )) + region: ((var.region )) + zone: ((var.zone )) + kind: terraform diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/instructions.txt b/tools/validate_configs/golden_copies/expectations/merge_flatten/instructions.txt new file mode 100644 index 0000000000..119d4c7b20 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/instructions.txt @@ -0,0 +1,23 @@ +Advanced Deployment Instructions +================================ + +Terraform group 'zero' was successfully created in directory golden_copy_deployment/zero +To deploy, run the following commands: + +terraform -chdir=golden_copy_deployment/zero init +terraform -chdir=golden_copy_deployment/zero validate +terraform -chdir=golden_copy_deployment/zero apply + +Destroying infrastructure when no longer needed +=============================================== + +Automated +--------- + +./ghpc destroy golden_copy_deployment + +Advanced / Manual +----------------- +Infrastructure should be destroyed in reverse order of creation: + +terraform -chdir=golden_copy_deployment/zero destroy diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/main.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/main.tf new file mode 100644 index 0000000000..97498d30bb --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/main.tf @@ -0,0 +1,74 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module "network" { + source = "./modules/embedded/modules/network/vpc" + deployment_name = var.deployment_name + project_id = var.project_id + region = var.region +} + +module "first-fs" { + source = "./modules/embedded/modules/file-system/filestore" + deployment_name = var.deployment_name + labels = merge(var.labels, { + ghpc_role = "file-system" + }) + local_mount = "/first" + network_id = module.network.network_id + project_id = var.project_id + region = var.region + zone = var.zone +} + +module "second-fs" { + source = "./modules/embedded/modules/file-system/filestore" + deployment_name = var.deployment_name + labels = merge(var.labels, { + ghpc_role = "file-system" + }) + local_mount = "/first" + network_id = module.network.network_id + project_id = var.project_id + region = var.region + zone = var.zone +} + +module "first-vm" { + source = "./modules/embedded/modules/compute/vm-instance" + deployment_name = var.deployment_name + labels = merge(var.labels, { + ghpc_role = "compute" + }, { + green = "sleeves" + }) + network_storage = flatten([module.first-fs.network_storage]) + project_id = var.project_id + region = var.region + zone = var.zone +} + +module "second-vm" { + source = "./modules/embedded/modules/compute/vm-instance" + deployment_name = var.deployment_name + labels = merge(var.labels, { + ghpc_role = "compute" + }) + network_storage = flatten([module.second-fs.network_storage, flatten([module.first-fs.network_storage])]) + project_id = var.project_id + region = var.region + zone = var.zone +} diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/providers.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/providers.tf new file mode 100644 index 0000000000..ec0dc80b57 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/providers.tf @@ -0,0 +1,27 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +provider "google" { + project = var.project_id + zone = var.zone + region = var.region +} + +provider "google-beta" { + project = var.project_id + zone = var.zone + region = var.region +} diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/terraform.tfvars b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/terraform.tfvars new file mode 100644 index 0000000000..d9130ce006 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/terraform.tfvars @@ -0,0 +1,28 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +deployment_name = "golden_copy_deployment" + +labels = { + ghpc_blueprint = "merge_flatten" + ghpc_deployment = "golden_copy_deployment" +} + +project_id = "invalid-project" + +region = "us-east4" + +zone = "us-east4-c" diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/variables.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/variables.tf new file mode 100644 index 0000000000..bed06d89e6 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/variables.tf @@ -0,0 +1,40 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "deployment_name" { + description = "Toolkit deployment variable: deployment_name" + type = string +} + +variable "labels" { + description = "Toolkit deployment variable: labels" + type = any +} + +variable "project_id" { + description = "Toolkit deployment variable: project_id" + type = string +} + +variable "region" { + description = "Toolkit deployment variable: region" + type = string +} + +variable "zone" { + description = "Toolkit deployment variable: zone" + type = string +} diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf new file mode 100644 index 0000000000..d1bb72b8b4 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf @@ -0,0 +1,30 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +terraform { + required_version = ">= 1.2" + + required_providers { + google = { + source = "hashicorp/google" + version = "~> 4.69.1" + } + google-beta = { + source = "hashicorp/google-beta" + version = "~> 4.69.1" + } + } +} diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml index 7398085201..1b6f913cd5 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml @@ -16,26 +16,17 @@ blueprint_name: text_escape ghpc_version: golden validators: - validator: test_project_exists - inputs: {} skip: true - validator: test_apis_enabled - inputs: {} skip: true - validator: test_region_exists - inputs: {} skip: true - validator: test_zone_exists - inputs: {} skip: true - validator: test_zone_in_region - inputs: {} skip: true - validator: test_module_not_used - inputs: {} - skip: false - validator: test_deployment_variable_not_used - inputs: {} - skip: false vars: deployment_name: golden_copy_deployment labels: @@ -46,33 +37,21 @@ vars: zone: us-east4-c deployment_groups: - group: zero - terraform_backend: - type: "" - configuration: {} modules: - source: modules/packer/custom-image kind: packer id: lime - use: [] - wrapsettingswith: {} settings: deployment_name: ((var.deployment_name)) image_family: \$(zebra/to(ad image_name: \((cat /dog)) - labels: - brown: \$(fox) - ghpc_blueprint: text_escape - ghpc_deployment: golden_copy_deployment - ghpc_role: packer - ñred: ñblue + labels: |- + ((merge(var.labels, { + ghpc_role = "packer" + }, { + brown = "$(fox)" + }))) project_id: ((var.project_id)) subnetwork_name: \$(purple zone: ((var.zone)) - required_apis: - $(vars.project_id): - - compute.googleapis.com - - storage.googleapis.com kind: packer -terraform_backend_defaults: - type: "" - configuration: {} diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/instructions.txt b/tools/validate_configs/golden_copies/expectations/text_escape/instructions.txt index c3df421bd0..49c401feca 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/instructions.txt +++ b/tools/validate_configs/golden_copies/expectations/text_escape/instructions.txt @@ -1,7 +1,7 @@ Advanced Deployment Instructions ================================ -Packer group 'lime' was successfully created in directory golden_copy_deployment/zero/lime +Packer group was successfully created in directory golden_copy_deployment/zero To deploy, run the following commands: cd golden_copy_deployment/zero/lime diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl index 6ee3b63554..fa9a372cc5 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl @@ -18,9 +18,25 @@ locals { image_name_default = "${local.image_family}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" image_name = var.image_name != null ? var.image_name : local.image_name_default + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.windows_startup_ps1) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap + # construct metadata from startup_script and metadata variables startup_script_metadata = var.startup_script == null ? {} : { startup-script = var.startup_script } - user_management_metadata = { + + linux_user_metadata = { block-project-ssh-keys = "TRUE" shutdown-script = <<-EOT #!/bin/bash @@ -28,23 +44,21 @@ locals { sed -i '/${var.ssh_username}/d' /var/lib/google/google_users EOT } + windows_packer_user = "packer_user" + windows_user_metadata = { + sysprep-specialize-script-cmd = "winrm quickconfig -quiet & net user /add ${local.windows_packer_user} & net localgroup administrators ${local.windows_packer_user} /add & winrm set winrm/config/service/auth @{Basic=\\\"true\\\"}" + windows-shutdown-script-cmd = "net user /delete ${local.windows_packer_user}" + } + user_metadata = local.communicator == "winrm" ? local.windows_user_metadata : local.linux_user_metadata # merge metadata such that var.metadata always overrides user management # metadata but always allow var.startup_script to override var.metadata metadata = merge( - local.user_management_metadata, + local.user_metadata, var.metadata, local.startup_script_metadata, ) - # determine communicator to use and whether to enable Identity-Aware Proxy - no_shell_scripts = length(var.shell_scripts) == 0 - no_ansible_playbooks = length(var.ansible_playbooks) == 0 - no_provisioners = local.no_shell_scripts && local.no_ansible_playbooks - communicator_default = local.no_provisioners ? "none" : "ssh" - communicator = var.communicator == null ? local.communicator_default : var.communicator - use_iap = local.communicator == "none" ? false : var.use_iap - # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) machine_family = local.machine_vals[0] @@ -55,6 +69,12 @@ locals { ? var.on_host_maintenance : local.on_host_maintenance_default ) + + accelerator_type = var.accelerator_type == null ? null : "projects/${var.project_id}/zones/${var.zone}/acceleratorTypes/${var.accelerator_type}" + + winrm_username = local.communicator == "winrm" ? "packer_user" : null + winrm_insecure = local.communicator == "winrm" ? true : null + winrm_use_ssl = local.communicator == "winrm" ? true : null } source "googlecompute" "toolkit_image" { @@ -64,10 +84,11 @@ source "googlecompute" "toolkit_image" { image_family = local.image_family image_labels = var.labels machine_type = var.machine_type - accelerator_type = var.accelerator_type + accelerator_type = local.accelerator_type accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size + disk_type = var.disk_type omit_external_ip = var.omit_external_ip use_internal_ip = var.omit_external_ip subnetwork = var.subnetwork_name @@ -80,6 +101,9 @@ source "googlecompute" "toolkit_image" { tags = var.tags use_iap = local.use_iap use_os_login = var.use_os_login + winrm_username = local.winrm_username + winrm_insecure = local.winrm_insecure + winrm_use_ssl = local.winrm_use_ssl zone = var.zone labels = var.labels metadata = local.metadata @@ -107,6 +131,15 @@ build { } } + # provisioner "powershell" blocks + dynamic "provisioner" { + labels = ["powershell"] + for_each = var.windows_startup_ps1 + content { + inline = split("\n", provisioner.value) + } + } + # provisioner "ansible-local" blocks # this installs custom roles/collections from ansible-galaxy in /home/packer # which will be removed at the end; consider modifying /etc/ansible/ansible.cfg diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl index 11a887bc85..9589669153 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl @@ -34,6 +34,12 @@ variable "disk_size" { default = null } +variable "disk_type" { + description = "Type of persistent disk to provision" + type = string + default = "pd-balanced" +} + variable "zone" { description = "Cloud zone in which to provision image building VM" type = string @@ -150,6 +156,12 @@ variable "shell_scripts" { default = [] } +variable "windows_startup_ps1" { + description = "A list of strings containing PowerShell scripts which will customize a Windows VM image (requires WinRM communicator)" + type = list(string) + default = [] +} + variable "startup_script" { description = "Startup script (as raw string) used to build the custom Linux VM image (overridden by var.startup_script_file if both are set)" type = string diff --git a/tools/validate_configs/test_configs/node-groups.yaml b/tools/validate_configs/test_configs/node-groups.yaml index 39101fae61..8f22e60acc 100644 --- a/tools/validate_configs/test_configs/node-groups.yaml +++ b/tools/validate_configs/test_configs/node-groups.yaml @@ -65,7 +65,7 @@ deployment_groups: machine_type: c2-standard-30 instance_image: family: slurm-gcp-5-7-debian-11 - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public - id: node_group2 source: community/modules/compute/schedmd-slurm-gcp-v5-node-group @@ -74,7 +74,7 @@ deployment_groups: machine_type: c2-standard-60 instance_image: name: slurm-gcp-dev-hpc-centos-7-1684970018 - project: projects/schedmd-slurm-public/global/images + project: schedmd-slurm-public - id: node_group3 source: community/modules/compute/schedmd-slurm-gcp-v5-node-group @@ -83,7 +83,7 @@ deployment_groups: machine_type: c2d-standard-112 instance_image: family: slurm-gcp-5-7-hpc-centos-7 - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public enable_smt: true - id: node_group4 diff --git a/tools/validate_configs/test_configs/vm.yaml b/tools/validate_configs/test_configs/vm.yaml new file mode 100644 index 0000000000..b8e194d5c7 --- /dev/null +++ b/tools/validate_configs/test_configs/vm.yaml @@ -0,0 +1,48 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +blueprint_name: simple-vm + +vars: + project_id: ## Set GCP Project ID Here ## + deployment_name: simple-vm + region: us-central1 + zone: us-central1-c + +# Documentation for each of the modules used below can be found at +# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md + +deployment_groups: +- group: primary + modules: + - id: network1 + source: modules/network/pre-existing-vpc + + - source: ./modules/compute/vm-instance + id: compute_instances + use: [network1] + settings: + name_prefix: client-vm + instance_count: 1 + machine_type: n2-standard-2 + instance_image: + project: ubuntu-os-cloud + family: ubuntu-2004-lts + # The following can be uncommented to test that changing an image definition triggers recreation. + # Create this image by running: + # gcloud compute images create myubuntu-1 --source-image-family ubuntu-2004-lts \ + # --source-image-project=ubuntu-os-cloud --family myubuntu --project + # project: $(vars.project_id) + # family: myubuntu