From e6b983e4d8b081f43f89fbd92afb0a309d4d216f Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 8 Jun 2023 16:49:43 -0500 Subject: [PATCH 01/92] Improve support for Identity-Aware Proxy Modify the VPC module to support an explicit setting for enabling TCP tunneling to the WinRM port used by PowerShell for Windows remote administration. Additionally add a setting for a list of arbitrary ports to which to grant IAP IP range access. --- modules/network/vpc/README.md | 2 ++ modules/network/vpc/main.tf | 41 ++++++++++---------------------- modules/network/vpc/variables.tf | 12 ++++++++++ 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/modules/network/vpc/README.md b/modules/network/vpc/README.md index 1d74a796d9..4151217760 100644 --- a/modules/network/vpc/README.md +++ b/modules/network/vpc/README.md @@ -189,7 +189,9 @@ No resources. | [deployment\_name](#input\_deployment\_name) | The name of the current deployment | `string` | n/a | yes | | [enable\_iap\_rdp\_ingress](#input\_enable\_iap\_rdp\_ingress) | Enable a firewall rule to allow Windows Remote Desktop Protocol access using IAP tunnels | `bool` | `false` | no | | [enable\_iap\_ssh\_ingress](#input\_enable\_iap\_ssh\_ingress) | Enable a firewall rule to allow SSH access using IAP tunnels | `bool` | `true` | no | +| [enable\_iap\_winrm\_ingress](#input\_enable\_iap\_winrm\_ingress) | Enable a firewall rule to allow Windows Remote Management (WinRM) access using IAP tunnels | `bool` | `false` | no | | [enable\_internal\_traffic](#input\_enable\_internal\_traffic) | Enable a firewall rule to allow all internal TCP, UDP, and ICMP traffic within the network | `bool` | `true` | no | +| [extra\_iap\_ports](#input\_extra\_iap\_ports) | A list of TCP ports for which to create firewall rules that enable IAP for TCP forwarding (use dedicated enable\_iap variables for standard ports) | `list(string)` | `[]` | no | | [firewall\_rules](#input\_firewall\_rules) | List of firewall rules | `any` | `[]` | no | | [ips\_per\_nat](#input\_ips\_per\_nat) | The number of IP addresses to allocate for each regional Cloud NAT (set to 0 to disable NAT) | `number` | `2` | no | | [mtu](#input\_mtu) | The network MTU (default: 8896). Recommended values: 0 (use Compute Engine default), 1460 (default outside HPC environments), 1500 (Internet default), or 8896 (for Jumbo packets). Allowed are all values in the range 1300 to 8896, inclusively. | `number` | `8896` | no | diff --git a/modules/network/vpc/main.tf b/modules/network/vpc/main.tf index d672a5baac..cc3c273b66 100644 --- a/modules/network/vpc/main.tf +++ b/modules/network/vpc/main.tf @@ -75,9 +75,15 @@ locals { output_primary_subnetwork_self_link = local.output_primary_subnetwork.self_link output_primary_subnetwork_ip_cidr_range = local.output_primary_subnetwork.ip_cidr_range - allow_iap_ssh_ingress = { - name = "${local.network_name}-fw-allow-iap-ssh-ingress" - description = "allow SSH access via Identity-Aware Proxy" + iap_ports = distinct(concat(compact([ + var.enable_iap_rdp_ingress ? "3389" : "", + var.enable_iap_ssh_ingress ? "22" : "", + var.enable_iap_winrm_ingress ? "5986" : "", + ]), var.extra_iap_ports)) + + allow_iap_ingress = { + name = "${local.network_name}-fw-allow-iap-ingress" + description = "allow TCP access via Identity-Aware Proxy" direction = "INGRESS" priority = null ranges = ["35.235.240.0/20"] @@ -87,7 +93,7 @@ locals { target_service_accounts = null allow = [{ protocol = "tcp" - ports = ["22"] + ports = local.iap_ports }] deny = [] log_config = { @@ -95,27 +101,6 @@ locals { } } - allow_iap_rdp_ingress = { - name = "${local.network_name}-fw-allow-iap-rdp-ingress" - description = "allow Windows remote desktop access via Identity-Aware Proxy" - direction = "INGRESS" - priority = null - ranges = ["35.235.240.0/20"] - source_tags = null - source_service_accounts = null - target_tags = null - target_service_accounts = null - allow = [{ - protocol = "tcp" - ports = ["3389"] - }] - deny = [] - log_config = { - metadata = "INCLUDE_ALL_METADATA" - } - } - - allow_internal_traffic = { name = "${local.network_name}-fw-allow-internal-traffic" priority = null @@ -143,10 +128,10 @@ locals { } } - firewall_rules = concat(var.firewall_rules, + firewall_rules = concat( + var.firewall_rules, var.enable_internal_traffic ? [local.allow_internal_traffic] : [], - var.enable_iap_rdp_ingress ? [local.allow_iap_rdp_ingress] : [], - var.enable_iap_ssh_ingress ? [local.allow_iap_ssh_ingress] : [], + length(local.iap_ports) > 0 ? [local.allow_iap_ingress] : [] ) } diff --git a/modules/network/vpc/variables.tf b/modules/network/vpc/variables.tf index cad16b009f..139582c60a 100644 --- a/modules/network/vpc/variables.tf +++ b/modules/network/vpc/variables.tf @@ -190,12 +190,24 @@ variable "enable_iap_rdp_ingress" { default = false } +variable "enable_iap_winrm_ingress" { + type = bool + description = "Enable a firewall rule to allow Windows Remote Management (WinRM) access using IAP tunnels" + default = false +} + variable "enable_internal_traffic" { type = bool description = "Enable a firewall rule to allow all internal TCP, UDP, and ICMP traffic within the network" default = true } +variable "extra_iap_ports" { + type = list(string) + description = "A list of TCP ports for which to create firewall rules that enable IAP for TCP forwarding (use dedicated enable_iap variables for standard ports)" + default = [] +} + variable "firewall_rules" { type = any description = "List of firewall rules" From f821e1480e0728d6367dd612e195f14152c9c288 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 8 Jun 2023 16:49:43 -0500 Subject: [PATCH 02/92] Add firewall rule for SSH from arbitrary IP ranges --- modules/network/vpc/README.md | 1 + modules/network/vpc/main.tf | 21 +++++++++++++++++++++ modules/network/vpc/variables.tf | 11 +++++++++++ 3 files changed, 33 insertions(+) diff --git a/modules/network/vpc/README.md b/modules/network/vpc/README.md index 4151217760..2dbc21e02e 100644 --- a/modules/network/vpc/README.md +++ b/modules/network/vpc/README.md @@ -184,6 +184,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [additional\_subnetworks](#input\_additional\_subnetworks) | DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions | `list(map(string))` | `null` | no | +| [allowed\_ssh\_ip\_ranges](#input\_allowed\_ssh\_ip\_ranges) | A list of CIDR IP ranges from which to allow ssh access | `list(string)` | `[]` | no | | [default\_primary\_subnetwork\_size](#input\_default\_primary\_subnetwork\_size) | The size, in CIDR bits, of the default primary subnetwork unless explicitly defined in var.subnetworks | `number` | `15` | no | | [delete\_default\_internet\_gateway\_routes](#input\_delete\_default\_internet\_gateway\_routes) | If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted | `bool` | `false` | no | | [deployment\_name](#input\_deployment\_name) | The name of the current deployment | `string` | n/a | yes | diff --git a/modules/network/vpc/main.tf b/modules/network/vpc/main.tf index cc3c273b66..657816b507 100644 --- a/modules/network/vpc/main.tf +++ b/modules/network/vpc/main.tf @@ -101,6 +101,26 @@ locals { } } + allow_ssh_ingress = { + name = "${local.network_name}-fw-allow-ssh-ingress" + description = "allow SSH access" + direction = "INGRESS" + priority = null + ranges = var.allowed_ssh_ip_ranges + source_tags = null + source_service_accounts = null + target_tags = null + target_service_accounts = null + allow = [{ + protocol = "tcp" + ports = ["22"] + }] + deny = [] + log_config = { + metadata = "INCLUDE_ALL_METADATA" + } + } + allow_internal_traffic = { name = "${local.network_name}-fw-allow-internal-traffic" priority = null @@ -130,6 +150,7 @@ locals { firewall_rules = concat( var.firewall_rules, + length(var.allowed_ssh_ip_ranges) > 0 ? [local.allow_ssh_ingress] : [], var.enable_internal_traffic ? [local.allow_internal_traffic] : [], length(local.iap_ports) > 0 ? [local.allow_iap_ingress] : [] ) diff --git a/modules/network/vpc/variables.tf b/modules/network/vpc/variables.tf index 139582c60a..f88e5aca01 100644 --- a/modules/network/vpc/variables.tf +++ b/modules/network/vpc/variables.tf @@ -208,6 +208,17 @@ variable "extra_iap_ports" { default = [] } +variable "allowed_ssh_ip_ranges" { + type = list(string) + description = "A list of CIDR IP ranges from which to allow ssh access" + default = [] + + validation { + condition = alltrue([for r in var.allowed_ssh_ip_ranges : can(cidrhost(r, 32))]) + error_message = "Each element of var.allowed_ssh_ip_ranges must be a valid CIDR-formatted IPv4 range." + } +} + variable "firewall_rules" { type = any description = "List of firewall rules" From c20fd95358a86027c145132940122ab260f375fb Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 8 Jun 2023 19:56:52 -0700 Subject: [PATCH 03/92] Add option to gke-node-pool for static node count --- .../modules/compute/gke-node-pool/README.md | 7 ++-- .../modules/compute/gke-node-pool/main.tf | 20 +++++++++--- .../compute/gke-node-pool/variables.tf | 32 +++++++++++++++++-- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/community/modules/compute/gke-node-pool/README.md b/community/modules/compute/gke-node-pool/README.md index ea8d952c4d..f67f21820e 100644 --- a/community/modules/compute/gke-node-pool/README.md +++ b/community/modules/compute/gke-node-pool/README.md @@ -174,6 +174,8 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [auto\_upgrade](#input\_auto\_upgrade) | Whether the nodes will be automatically upgraded. | `bool` | `false` | no | +| [autoscaling\_total\_max\_nodes](#input\_autoscaling\_total\_max\_nodes) | Total maximum number of nodes in the NodePool. | `number` | `1000` | no | +| [autoscaling\_total\_min\_nodes](#input\_autoscaling\_total\_min\_nodes) | Total minimum number of nodes in the NodePool. | `number` | `0` | no | | [cluster\_id](#input\_cluster\_id) | projects/{{project}}/locations/{{location}}/clusters/{{cluster}} | `string` | n/a | yes | | [compact\_placement](#input\_compact\_placement) | Places node pool's nodes in a closer physical proximity in order to reduce network latency between nodes. | `bool` | `false` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Size of disk for each node. | `number` | `100` | no | @@ -186,10 +188,11 @@ No modules. | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | | [service\_account](#input\_service\_account) | Service account to use with the system node pool |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | +| [static\_node\_count](#input\_static\_node\_count) | The static number of nodes in the node pool. If set, autoscaling will be disabled. | `number` | `null` | no | | [taints](#input\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "user-workload",
"value": true
}
]
| no | | [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
to 2, Simultaneous Multithreading (SMT) is enabled extending the total number
of virtual cores. For example, a machine of type c2-standard-60 will have 60
virtual cores with threads\_per\_core equal to 2. With threads\_per\_core equal
to 1 (SMT turned off), only the 30 physical cores will be available on the VM.

The default value of \"0\" will turn off SMT for supported machine types, and
will fall back to GCE defaults for unsupported machine types (t2d, shared-core
instances, or instances with less than 2 vCPU).

Disabling SMT can be more performant in many HPC workloads, therefore it is
disabled by default where compatible.

null = SMT configuration will use the GCE defaults for the machine type
0 = SMT will be disabled where compatible (default)
1 = SMT will always be disabled (will fail on incompatible machine types)
2 = SMT will always be enabled (will fail on incompatible machine types) | `number` | `0` | no | -| [total\_max\_nodes](#input\_total\_max\_nodes) | Total maximum number of nodes in the NodePool. | `number` | `1000` | no | -| [total\_min\_nodes](#input\_total\_min\_nodes) | Total minimum number of nodes in the NodePool. | `number` | `0` | no | +| [total\_max\_nodes](#input\_total\_max\_nodes) | DEPRECATED: Use autoscaling\_total\_max\_nodes. | `number` | `null` | no | +| [total\_min\_nodes](#input\_total\_min\_nodes) | DEPRECATED: Use autoscaling\_total\_min\_nodes. | `number` | `null` | no | | [zones](#input\_zones) | A list of zones to be used. Zones must be in region of cluster. If null, cluster zones will be inherited. Note `zones` not `zone`; does not work with `zone` deployment variable. | `list(string)` | `null` | no | ## Outputs diff --git a/community/modules/compute/gke-node-pool/main.tf b/community/modules/compute/gke-node-pool/main.tf index 5dbff2f8ce..2161f4716f 100644 --- a/community/modules/compute/gke-node-pool/main.tf +++ b/community/modules/compute/gke-node-pool/main.tf @@ -28,6 +28,9 @@ locals { value = "present" effect = "NO_SCHEDULE" }] : [] + + autoscale_set = var.autoscaling_total_min_nodes != 0 || var.autoscaling_total_max_nodes != 1000 + static_node_set = var.static_node_count != null } data "google_compute_default_service_account" "default_sa" { @@ -40,10 +43,15 @@ resource "google_container_node_pool" "node_pool" { name = var.name == null ? var.machine_type : var.name cluster = var.cluster_id node_locations = var.zones - autoscaling { - total_min_node_count = var.total_min_nodes - total_max_node_count = var.total_max_nodes - location_policy = "ANY" + + node_count = var.static_node_count + dynamic "autoscaling" { + for_each = local.static_node_set ? [] : [1] + content { + total_min_node_count = var.autoscaling_total_min_nodes + total_max_node_count = var.autoscaling_total_max_nodes + location_policy = "ANY" + } } management { @@ -113,6 +121,10 @@ resource "google_container_node_pool" "node_pool" { ignore_changes = [ node_config[0].labels, ] + precondition { + condition = !local.static_node_set || !local.autoscale_set + error_message = "static_node_count cannot be set with either autoscaling_total_min_nodes or autoscaling_total_max_nodes." + } } } diff --git a/community/modules/compute/gke-node-pool/variables.tf b/community/modules/compute/gke-node-pool/variables.tf index 343fbc002f..293ff86cec 100644 --- a/community/modules/compute/gke-node-pool/variables.tf +++ b/community/modules/compute/gke-node-pool/variables.tf @@ -74,19 +74,24 @@ variable "image_type" { default = "COS_CONTAINERD" } -# TODO -variable "total_min_nodes" { +variable "autoscaling_total_min_nodes" { description = "Total minimum number of nodes in the NodePool." type = number default = 0 } -variable "total_max_nodes" { +variable "autoscaling_total_max_nodes" { description = "Total maximum number of nodes in the NodePool." type = number default = 1000 } +variable "static_node_count" { + description = "The static number of nodes in the node pool. If set, autoscaling will be disabled." + type = number + default = null +} + variable "auto_upgrade" { description = "Whether the nodes will be automatically upgraded." type = bool @@ -164,3 +169,24 @@ variable "labels" { description = "GCE resource labels to be applied to resources. Key-value pairs." type = map(string) } + +# Deprecated +variable "total_min_nodes" { + description = "DEPRECATED: Use autoscaling_total_min_nodes." + type = number + default = null + validation { + condition = var.total_min_nodes == null + error_message = "total_min_nodes was renamed to autoscaling_total_min_nodes and is deprecated; use autoscaling_total_min_nodes" + } +} + +variable "total_max_nodes" { + description = "DEPRECATED: Use autoscaling_total_max_nodes." + type = number + default = null + validation { + condition = var.total_max_nodes == null + error_message = "total_max_nodes was renamed to autoscaling_total_max_nodes and is deprecated; use autoscaling_total_max_nodes" + } +} From 33ec74d134812a4e3aea37179d4adf52d9b7b84b Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 8 Jun 2023 22:15:20 -0700 Subject: [PATCH 04/92] Expose the option on gke-cluster to not create a system node pool --- community/modules/scheduler/gke-cluster/README.md | 1 + community/modules/scheduler/gke-cluster/main.tf | 1 + community/modules/scheduler/gke-cluster/variables.tf | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/community/modules/scheduler/gke-cluster/README.md b/community/modules/scheduler/gke-cluster/README.md index 2b29505398..378d7134f8 100644 --- a/community/modules/scheduler/gke-cluster/README.md +++ b/community/modules/scheduler/gke-cluster/README.md @@ -133,6 +133,7 @@ No modules. | [service\_account](#input\_service\_account) | Service account to use with the system node pool |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | | [services\_ip\_range\_name](#input\_services\_ip\_range\_name) | The name of the secondary subnet range to use for services. | `string` | `"services"` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to host the cluster in. | `string` | n/a | yes | +| [system\_node\_pool\_enabled](#input\_system\_node\_pool\_enabled) | Create a system node pool. | `bool` | `true` | no | | [system\_node\_pool\_machine\_type](#input\_system\_node\_pool\_machine\_type) | Machine type for the system node pool. | `string` | `"e2-standard-4"` | no | | [system\_node\_pool\_name](#input\_system\_node\_pool\_name) | Name of the system node pool. | `string` | `"system"` | no | | [system\_node\_pool\_node\_count](#input\_system\_node\_pool\_node\_count) | The total min and max nodes to be maintained in the system node pool. |
object({
total_min_nodes = number
total_max_nodes = number
})
|
{
"total_max_nodes": 10,
"total_min_nodes": 2
}
| no | diff --git a/community/modules/scheduler/gke-cluster/main.tf b/community/modules/scheduler/gke-cluster/main.tf index d31db4fa13..f45ff43af8 100644 --- a/community/modules/scheduler/gke-cluster/main.tf +++ b/community/modules/scheduler/gke-cluster/main.tf @@ -169,6 +169,7 @@ resource "google_container_cluster" "gke_cluster" { # having to destroy the entire cluster. resource "google_container_node_pool" "system_node_pools" { provider = google-beta + count = var.system_node_pool_enabled ? 1 : 0 project = var.project_id name = var.system_node_pool_name diff --git a/community/modules/scheduler/gke-cluster/variables.tf b/community/modules/scheduler/gke-cluster/variables.tf index 1fc8ecef8a..f3cb005131 100644 --- a/community/modules/scheduler/gke-cluster/variables.tf +++ b/community/modules/scheduler/gke-cluster/variables.tf @@ -127,6 +127,12 @@ variable "enable_persistent_disk_csi" { default = true } +variable "system_node_pool_enabled" { + description = "Create a system node pool." + type = bool + default = true +} + variable "system_node_pool_name" { description = "Name of the system node pool." type = string From 9506290da353af7c74c6c0e92ea33f8cbe421c37 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 8 Jun 2023 22:44:51 -0700 Subject: [PATCH 05/92] Add options for create and update timeouts to gke modules --- community/modules/compute/gke-node-pool/README.md | 2 ++ community/modules/compute/gke-node-pool/main.tf | 5 +++++ community/modules/compute/gke-node-pool/variables.tf | 12 ++++++++++++ community/modules/scheduler/gke-cluster/README.md | 2 ++ community/modules/scheduler/gke-cluster/main.tf | 5 +++++ community/modules/scheduler/gke-cluster/variables.tf | 12 ++++++++++++ 6 files changed, 38 insertions(+) diff --git a/community/modules/compute/gke-node-pool/README.md b/community/modules/compute/gke-node-pool/README.md index f67f21820e..1429a72e54 100644 --- a/community/modules/compute/gke-node-pool/README.md +++ b/community/modules/compute/gke-node-pool/README.md @@ -191,6 +191,8 @@ No modules. | [static\_node\_count](#input\_static\_node\_count) | The static number of nodes in the node pool. If set, autoscaling will be disabled. | `number` | `null` | no | | [taints](#input\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "user-workload",
"value": true
}
]
| no | | [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
to 2, Simultaneous Multithreading (SMT) is enabled extending the total number
of virtual cores. For example, a machine of type c2-standard-60 will have 60
virtual cores with threads\_per\_core equal to 2. With threads\_per\_core equal
to 1 (SMT turned off), only the 30 physical cores will be available on the VM.

The default value of \"0\" will turn off SMT for supported machine types, and
will fall back to GCE defaults for unsupported machine types (t2d, shared-core
instances, or instances with less than 2 vCPU).

Disabling SMT can be more performant in many HPC workloads, therefore it is
disabled by default where compatible.

null = SMT configuration will use the GCE defaults for the machine type
0 = SMT will be disabled where compatible (default)
1 = SMT will always be disabled (will fail on incompatible machine types)
2 = SMT will always be enabled (will fail on incompatible machine types) | `number` | `0` | no | +| [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no | +| [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no | | [total\_max\_nodes](#input\_total\_max\_nodes) | DEPRECATED: Use autoscaling\_total\_max\_nodes. | `number` | `null` | no | | [total\_min\_nodes](#input\_total\_min\_nodes) | DEPRECATED: Use autoscaling\_total\_min\_nodes. | `number` | `null` | no | | [zones](#input\_zones) | A list of zones to be used. Zones must be in region of cluster. If null, cluster zones will be inherited. Note `zones` not `zone`; does not work with `zone` deployment variable. | `list(string)` | `null` | no | diff --git a/community/modules/compute/gke-node-pool/main.tf b/community/modules/compute/gke-node-pool/main.tf index 2161f4716f..ac3809fa34 100644 --- a/community/modules/compute/gke-node-pool/main.tf +++ b/community/modules/compute/gke-node-pool/main.tf @@ -117,6 +117,11 @@ resource "google_container_node_pool" "node_pool" { } } + timeouts { + create = var.timeout_create + update = var.timeout_update + } + lifecycle { ignore_changes = [ node_config[0].labels, diff --git a/community/modules/compute/gke-node-pool/variables.tf b/community/modules/compute/gke-node-pool/variables.tf index 293ff86cec..6d74f5e1b7 100644 --- a/community/modules/compute/gke-node-pool/variables.tf +++ b/community/modules/compute/gke-node-pool/variables.tf @@ -170,6 +170,18 @@ variable "labels" { type = map(string) } +variable "timeout_create" { + description = "Timeout for creating a node pool" + type = string + default = null +} + +variable "timeout_update" { + description = "Timeout for updating a node pool" + type = string + default = null +} + # Deprecated variable "total_min_nodes" { description = "DEPRECATED: Use autoscaling_total_min_nodes." diff --git a/community/modules/scheduler/gke-cluster/README.md b/community/modules/scheduler/gke-cluster/README.md index 378d7134f8..359a4b370f 100644 --- a/community/modules/scheduler/gke-cluster/README.md +++ b/community/modules/scheduler/gke-cluster/README.md @@ -138,6 +138,8 @@ No modules. | [system\_node\_pool\_name](#input\_system\_node\_pool\_name) | Name of the system node pool. | `string` | `"system"` | no | | [system\_node\_pool\_node\_count](#input\_system\_node\_pool\_node\_count) | The total min and max nodes to be maintained in the system node pool. |
object({
total_min_nodes = number
total_max_nodes = number
})
|
{
"total_max_nodes": 10,
"total_min_nodes": 2
}
| no | | [system\_node\_pool\_taints](#input\_system\_node\_pool\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "components.gke.io/gke-managed-components",
"value": true
}
]
| no | +| [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no | +| [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no | ## Outputs diff --git a/community/modules/scheduler/gke-cluster/main.tf b/community/modules/scheduler/gke-cluster/main.tf index f45ff43af8..1494eb4157 100644 --- a/community/modules/scheduler/gke-cluster/main.tf +++ b/community/modules/scheduler/gke-cluster/main.tf @@ -154,6 +154,11 @@ resource "google_container_cluster" "gke_cluster" { } } + timeouts { + create = var.timeout_create + update = var.timeout_update + } + lifecycle { # Ignore all changes to the default node pool. It's being removed after creation. ignore_changes = [ diff --git a/community/modules/scheduler/gke-cluster/variables.tf b/community/modules/scheduler/gke-cluster/variables.tf index f3cb005131..a114ab3443 100644 --- a/community/modules/scheduler/gke-cluster/variables.tf +++ b/community/modules/scheduler/gke-cluster/variables.tf @@ -238,3 +238,15 @@ variable "labels" { description = "GCE resource labels to be applied to resources. Key-value pairs." type = map(string) } + +variable "timeout_create" { + description = "Timeout for creating a node pool" + type = string + default = null +} + +variable "timeout_update" { + description = "Timeout for updating a node pool" + type = string + default = null +} From 82874033b4b2289e1c83ad10ba03c1bfea5ee44e Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Fri, 9 Jun 2023 15:12:51 -0700 Subject: [PATCH 06/92] Add option to enable gcfs on gke-node-pool --- community/modules/compute/gke-node-pool/README.md | 1 + community/modules/compute/gke-node-pool/main.tf | 7 +++++++ community/modules/compute/gke-node-pool/variables.tf | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/community/modules/compute/gke-node-pool/README.md b/community/modules/compute/gke-node-pool/README.md index 1429a72e54..00af5df3f7 100644 --- a/community/modules/compute/gke-node-pool/README.md +++ b/community/modules/compute/gke-node-pool/README.md @@ -180,6 +180,7 @@ No modules. | [compact\_placement](#input\_compact\_placement) | Places node pool's nodes in a closer physical proximity in order to reduce network latency between nodes. | `bool` | `false` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Size of disk for each node. | `number` | `100` | no | | [disk\_type](#input\_disk\_type) | Disk type for each node. | `string` | `"pd-standard"` | no | +| [enable\_gcfs](#input\_enable\_gcfs) | Enable the Google Container Filesystem (GCFS). See [restrictions](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#gcfs_config). | `bool` | `false` | no | | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string
count = number
gpu_partition_size = string
gpu_sharing_config = list(object({
gpu_sharing_strategy = string
max_shared_clients_per_gpu = number
}))
}))
| `null` | no | | [image\_type](#input\_image\_type) | The default image type used by NAP once a new node pool is being created. Use either COS\_CONTAINERD or UBUNTU\_CONTAINERD. | `string` | `"COS_CONTAINERD"` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | diff --git a/community/modules/compute/gke-node-pool/main.tf b/community/modules/compute/gke-node-pool/main.tf index ac3809fa34..915feff21a 100644 --- a/community/modules/compute/gke-node-pool/main.tf +++ b/community/modules/compute/gke-node-pool/main.tf @@ -89,6 +89,13 @@ resource "google_container_node_pool" "node_pool" { enable_integrity_monitoring = true } + dynamic "gcfs_config" { + for_each = var.enable_gcfs ? [1] : [] + content { + enabled = true + } + } + gvnic { enabled = true } diff --git a/community/modules/compute/gke-node-pool/variables.tf b/community/modules/compute/gke-node-pool/variables.tf index 6d74f5e1b7..eeb5fd1a99 100644 --- a/community/modules/compute/gke-node-pool/variables.tf +++ b/community/modules/compute/gke-node-pool/variables.tf @@ -54,6 +54,12 @@ variable "disk_type" { default = "pd-standard" } +variable "enable_gcfs" { + description = "Enable the Google Container Filesystem (GCFS). See [restrictions](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#gcfs_config)." + type = bool + default = false +} + variable "guest_accelerator" { description = "List of the type and count of accelerator cards attached to the instance." type = list(object({ From 3212a7c040b51f66e9c8229e51fc55f2106957d9 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 12 Jun 2023 12:34:06 -0500 Subject: [PATCH 07/92] Add disk_type support to Packer module --- modules/packer/custom-image/README.md | 1 + modules/packer/custom-image/image.pkr.hcl | 1 + modules/packer/custom-image/variables.pkr.hcl | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/modules/packer/custom-image/README.md b/modules/packer/custom-image/README.md index 3e7f692518..4999f8d642 100644 --- a/modules/packer/custom-image/README.md +++ b/modules/packer/custom-image/README.md @@ -241,6 +241,7 @@ No resources. | [communicator](#input\_communicator) | Communicator to use for provisioners that require access to VM ("ssh" or "winrm") | `string` | `null` | no | | [deployment\_name](#input\_deployment\_name) | HPC Toolkit deployment name | `string` | n/a | yes | | [disk\_size](#input\_disk\_size) | Size of disk image in GB | `number` | `null` | no | +| [disk\_type](#input\_disk\_type) | Type of persistent disk to provision | `string` | `"pd-balanced"` | no | | [image\_family](#input\_image\_family) | The family name of the image to be built. Defaults to `deployment_name` | `string` | `null` | no | | [image\_name](#input\_image\_name) | The name of the image to be built. If not supplied, it will be set to image\_family-$ISO\_TIMESTAMP | `string` | `null` | no | | [image\_storage\_locations](#input\_image\_storage\_locations) | Storage location, either regional or multi-regional, where snapshot content is to be stored and only accepts 1 value.
See https://developer.hashicorp.com/packer/plugins/builders/googlecompute#image_storage_locations | `list(string)` | `null` | no | diff --git a/modules/packer/custom-image/image.pkr.hcl b/modules/packer/custom-image/image.pkr.hcl index 6ee3b63554..8e0672ba75 100644 --- a/modules/packer/custom-image/image.pkr.hcl +++ b/modules/packer/custom-image/image.pkr.hcl @@ -68,6 +68,7 @@ source "googlecompute" "toolkit_image" { accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size + disk_type = var.disk_type omit_external_ip = var.omit_external_ip use_internal_ip = var.omit_external_ip subnetwork = var.subnetwork_name diff --git a/modules/packer/custom-image/variables.pkr.hcl b/modules/packer/custom-image/variables.pkr.hcl index 11a887bc85..3218db65a3 100644 --- a/modules/packer/custom-image/variables.pkr.hcl +++ b/modules/packer/custom-image/variables.pkr.hcl @@ -34,6 +34,12 @@ variable "disk_size" { default = null } +variable "disk_type" { + description = "Type of persistent disk to provision" + type = string + default = "pd-balanced" +} + variable "zone" { description = "Cloud zone in which to provision image building VM" type = string From 9673974d73e9be2872c1372b0741474b4153fdf5 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 12 Jun 2023 12:34:30 -0500 Subject: [PATCH 08/92] Add Powershell script support to Packer module --- modules/packer/custom-image/README.md | 1 + modules/packer/custom-image/image.pkr.hcl | 38 +++++++++++++++---- modules/packer/custom-image/variables.pkr.hcl | 6 +++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/modules/packer/custom-image/README.md b/modules/packer/custom-image/README.md index 4999f8d642..827f42ac9b 100644 --- a/modules/packer/custom-image/README.md +++ b/modules/packer/custom-image/README.md @@ -252,6 +252,7 @@ No resources. | [network\_project\_id](#input\_network\_project\_id) | Project ID of Shared VPC network | `string` | `null` | no | | [omit\_external\_ip](#input\_omit\_external\_ip) | Provision the image building VM without a public IP address | `bool` | `true` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except the use of GPUs requires it to be `TERMINATE` | `string` | `null` | no | +| [powershell\_scripts](#input\_powershell\_scripts) | A list of paths to local powershell scripts which will be uploaded to customize the Windows VM image (requires WinRM communicator) | `list(string)` | `[]` | no | | [project\_id](#input\_project\_id) | Project in which to create VM and image | `string` | n/a | yes | | [scopes](#input\_scopes) | Service account scopes to attach to the instance. See
https://cloud.google.com/compute/docs/access/service-accounts. | `list(string)` |
[
"https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/compute",
"https://www.googleapis.com/auth/devstorage.full_control",
"https://www.googleapis.com/auth/logging.write"
]
| no | | [service\_account\_email](#input\_service\_account\_email) | The service account email to use. If null or 'default', then the default Compute Engine service account will be used. | `string` | `null` | no | diff --git a/modules/packer/custom-image/image.pkr.hcl b/modules/packer/custom-image/image.pkr.hcl index 8e0672ba75..bbcd601ec5 100644 --- a/modules/packer/custom-image/image.pkr.hcl +++ b/modules/packer/custom-image/image.pkr.hcl @@ -37,13 +37,20 @@ locals { local.startup_script_metadata, ) - # determine communicator to use and whether to enable Identity-Aware Proxy - no_shell_scripts = length(var.shell_scripts) == 0 - no_ansible_playbooks = length(var.ansible_playbooks) == 0 - no_provisioners = local.no_shell_scripts && local.no_ansible_playbooks - communicator_default = local.no_provisioners ? "none" : "ssh" - communicator = var.communicator == null ? local.communicator_default : var.communicator - use_iap = local.communicator == "none" ? false : var.use_iap + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) @@ -55,6 +62,10 @@ locals { ? var.on_host_maintenance : local.on_host_maintenance_default ) + + winrm_username = local.communicator == "winrm" ? "packer_user" : null + winrm_insecure = local.communicator == "winrm" ? true : null + winrm_use_ssl = local.communicator == "winrm" ? true : null } source "googlecompute" "toolkit_image" { @@ -81,6 +92,9 @@ source "googlecompute" "toolkit_image" { tags = var.tags use_iap = local.use_iap use_os_login = var.use_os_login + winrm_username = local.winrm_username + winrm_insecure = local.winrm_insecure + winrm_use_ssl = local.winrm_use_ssl zone = var.zone labels = var.labels metadata = local.metadata @@ -108,6 +122,16 @@ build { } } + # provisioner "powershell" blocks + dynamic "provisioner" { + labels = ["powershell"] + for_each = var.powershell_scripts + content { + elevated_user = "Administrator" + script = provisioner.value + } + } + # provisioner "ansible-local" blocks # this installs custom roles/collections from ansible-galaxy in /home/packer # which will be removed at the end; consider modifying /etc/ansible/ansible.cfg diff --git a/modules/packer/custom-image/variables.pkr.hcl b/modules/packer/custom-image/variables.pkr.hcl index 3218db65a3..49c31edc78 100644 --- a/modules/packer/custom-image/variables.pkr.hcl +++ b/modules/packer/custom-image/variables.pkr.hcl @@ -156,6 +156,12 @@ variable "shell_scripts" { default = [] } +variable "powershell_scripts" { + description = "A list of paths to local powershell scripts which will be uploaded to customize the Windows VM image (requires WinRM communicator)" + type = list(string) + default = [] +} + variable "startup_script" { description = "Startup script (as raw string) used to build the custom Linux VM image (overridden by var.startup_script_file if both are set)" type = string From 21f5b5eb4b8acdf9d596bb29d717a654ae059ae7 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 12 Jun 2023 13:15:59 -0500 Subject: [PATCH 09/92] Update golden copies of Packer module --- .../igc_pkr/one/image/image.pkr.hcl | 39 +++++++++++++++---- .../igc_pkr/one/image/variables.pkr.hcl | 12 ++++++ .../text_escape/zero/lime/image.pkr.hcl | 39 +++++++++++++++---- .../text_escape/zero/lime/variables.pkr.hcl | 12 ++++++ 4 files changed, 88 insertions(+), 14 deletions(-) diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl index 6ee3b63554..bbcd601ec5 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl @@ -37,13 +37,20 @@ locals { local.startup_script_metadata, ) - # determine communicator to use and whether to enable Identity-Aware Proxy - no_shell_scripts = length(var.shell_scripts) == 0 - no_ansible_playbooks = length(var.ansible_playbooks) == 0 - no_provisioners = local.no_shell_scripts && local.no_ansible_playbooks - communicator_default = local.no_provisioners ? "none" : "ssh" - communicator = var.communicator == null ? local.communicator_default : var.communicator - use_iap = local.communicator == "none" ? false : var.use_iap + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) @@ -55,6 +62,10 @@ locals { ? var.on_host_maintenance : local.on_host_maintenance_default ) + + winrm_username = local.communicator == "winrm" ? "packer_user" : null + winrm_insecure = local.communicator == "winrm" ? true : null + winrm_use_ssl = local.communicator == "winrm" ? true : null } source "googlecompute" "toolkit_image" { @@ -68,6 +79,7 @@ source "googlecompute" "toolkit_image" { accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size + disk_type = var.disk_type omit_external_ip = var.omit_external_ip use_internal_ip = var.omit_external_ip subnetwork = var.subnetwork_name @@ -80,6 +92,9 @@ source "googlecompute" "toolkit_image" { tags = var.tags use_iap = local.use_iap use_os_login = var.use_os_login + winrm_username = local.winrm_username + winrm_insecure = local.winrm_insecure + winrm_use_ssl = local.winrm_use_ssl zone = var.zone labels = var.labels metadata = local.metadata @@ -107,6 +122,16 @@ build { } } + # provisioner "powershell" blocks + dynamic "provisioner" { + labels = ["powershell"] + for_each = var.powershell_scripts + content { + elevated_user = "Administrator" + script = provisioner.value + } + } + # provisioner "ansible-local" blocks # this installs custom roles/collections from ansible-galaxy in /home/packer # which will be removed at the end; consider modifying /etc/ansible/ansible.cfg diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl index 11a887bc85..49c31edc78 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl @@ -34,6 +34,12 @@ variable "disk_size" { default = null } +variable "disk_type" { + description = "Type of persistent disk to provision" + type = string + default = "pd-balanced" +} + variable "zone" { description = "Cloud zone in which to provision image building VM" type = string @@ -150,6 +156,12 @@ variable "shell_scripts" { default = [] } +variable "powershell_scripts" { + description = "A list of paths to local powershell scripts which will be uploaded to customize the Windows VM image (requires WinRM communicator)" + type = list(string) + default = [] +} + variable "startup_script" { description = "Startup script (as raw string) used to build the custom Linux VM image (overridden by var.startup_script_file if both are set)" type = string diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl index 6ee3b63554..bbcd601ec5 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl @@ -37,13 +37,20 @@ locals { local.startup_script_metadata, ) - # determine communicator to use and whether to enable Identity-Aware Proxy - no_shell_scripts = length(var.shell_scripts) == 0 - no_ansible_playbooks = length(var.ansible_playbooks) == 0 - no_provisioners = local.no_shell_scripts && local.no_ansible_playbooks - communicator_default = local.no_provisioners ? "none" : "ssh" - communicator = var.communicator == null ? local.communicator_default : var.communicator - use_iap = local.communicator == "none" ? false : var.use_iap + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) @@ -55,6 +62,10 @@ locals { ? var.on_host_maintenance : local.on_host_maintenance_default ) + + winrm_username = local.communicator == "winrm" ? "packer_user" : null + winrm_insecure = local.communicator == "winrm" ? true : null + winrm_use_ssl = local.communicator == "winrm" ? true : null } source "googlecompute" "toolkit_image" { @@ -68,6 +79,7 @@ source "googlecompute" "toolkit_image" { accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size + disk_type = var.disk_type omit_external_ip = var.omit_external_ip use_internal_ip = var.omit_external_ip subnetwork = var.subnetwork_name @@ -80,6 +92,9 @@ source "googlecompute" "toolkit_image" { tags = var.tags use_iap = local.use_iap use_os_login = var.use_os_login + winrm_username = local.winrm_username + winrm_insecure = local.winrm_insecure + winrm_use_ssl = local.winrm_use_ssl zone = var.zone labels = var.labels metadata = local.metadata @@ -107,6 +122,16 @@ build { } } + # provisioner "powershell" blocks + dynamic "provisioner" { + labels = ["powershell"] + for_each = var.powershell_scripts + content { + elevated_user = "Administrator" + script = provisioner.value + } + } + # provisioner "ansible-local" blocks # this installs custom roles/collections from ansible-galaxy in /home/packer # which will be removed at the end; consider modifying /etc/ansible/ansible.cfg diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl index 11a887bc85..49c31edc78 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl @@ -34,6 +34,12 @@ variable "disk_size" { default = null } +variable "disk_type" { + description = "Type of persistent disk to provision" + type = string + default = "pd-balanced" +} + variable "zone" { description = "Cloud zone in which to provision image building VM" type = string @@ -150,6 +156,12 @@ variable "shell_scripts" { default = [] } +variable "powershell_scripts" { + description = "A list of paths to local powershell scripts which will be uploaded to customize the Windows VM image (requires WinRM communicator)" + type = list(string) + default = [] +} + variable "startup_script" { description = "Startup script (as raw string) used to build the custom Linux VM image (overridden by var.startup_script_file if both are set)" type = string From 5fdb6428f0356d7b55fbf5209b1186c7fdfba46c Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 12 Jun 2023 15:28:44 -0500 Subject: [PATCH 10/92] Add support for fixed version of HTCondor installation --- community/modules/scripts/htcondor-install/README.md | 1 + .../scripts/htcondor-install/files/install-htcondor.yaml | 2 +- community/modules/scripts/htcondor-install/main.tf | 5 ++++- community/modules/scripts/htcondor-install/variables.tf | 6 ++++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/community/modules/scripts/htcondor-install/README.md b/community/modules/scripts/htcondor-install/README.md index 7025bf93b4..4224055085 100644 --- a/community/modules/scripts/htcondor-install/README.md +++ b/community/modules/scripts/htcondor-install/README.md @@ -124,6 +124,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [condor\_version](#input\_condor\_version) | Yum/DNF-compatible version string; leave unset to default to 10.x series (examples: "10.5.1","10.*")) | `string` | `"10.*"` | no | | [enable\_docker](#input\_enable\_docker) | Install and enable docker daemon alongside HTCondor | `bool` | `true` | no | ## Outputs diff --git a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml index 70c0055cb4..d4426a4c78 100644 --- a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml +++ b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml @@ -50,7 +50,7 @@ priority: "90" - name: Install HTCondor ansible.builtin.package: - name: condor + name: condor-{{ condor_version | default("10.*") | string }} state: present - name: Ensure token directory ansible.builtin.file: diff --git a/community/modules/scripts/htcondor-install/main.tf b/community/modules/scripts/htcondor-install/main.tf index 6659a1965a..afd7430dd9 100644 --- a/community/modules/scripts/htcondor-install/main.tf +++ b/community/modules/scripts/htcondor-install/main.tf @@ -19,7 +19,10 @@ locals { "type" = "ansible-local" "source" = "${path.module}/files/install-htcondor.yaml" "destination" = "install-htcondor.yaml" - "args" = "-e enable_docker=${var.enable_docker}" + "args" = join(" ", [ + "-e enable_docker=${var.enable_docker}", + "-e condor_version=${var.condor_version}", + ]) } runner_install_autoscaler_deps = { diff --git a/community/modules/scripts/htcondor-install/variables.tf b/community/modules/scripts/htcondor-install/variables.tf index 2ccd9d1156..0334d62d52 100644 --- a/community/modules/scripts/htcondor-install/variables.tf +++ b/community/modules/scripts/htcondor-install/variables.tf @@ -19,3 +19,9 @@ variable "enable_docker" { type = bool default = true } + +variable "condor_version" { + description = "Yum/DNF-compatible version string; leave unset to default to 10.x series (examples: \"10.5.1\",\"10.*\"))" + type = string + default = "10.*" +} From ff0c2685a77d327343c6d4183a8eebb8afe18823 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 8 Jun 2023 23:15:59 -0700 Subject: [PATCH 11/92] Update gke service account variable to separate email and scopes --- .../modules/compute/gke-node-pool/README.md | 4 ++- .../modules/compute/gke-node-pool/main.tf | 6 ++-- .../compute/gke-node-pool/variables.tf | 34 +++++++++++++------ .../modules/scheduler/gke-cluster/README.md | 4 ++- .../modules/scheduler/gke-cluster/main.tf | 6 ++-- .../scheduler/gke-cluster/variables.tf | 34 +++++++++++++------ 6 files changed, 60 insertions(+), 28 deletions(-) diff --git a/community/modules/compute/gke-node-pool/README.md b/community/modules/compute/gke-node-pool/README.md index 1429a72e54..39b526296f 100644 --- a/community/modules/compute/gke-node-pool/README.md +++ b/community/modules/compute/gke-node-pool/README.md @@ -186,7 +186,9 @@ No modules. | [machine\_type](#input\_machine\_type) | The name of a Google Compute Engine machine type. | `string` | `"c2-standard-60"` | no | | [name](#input\_name) | The name of the node pool. If left blank, will default to the machine type. | `string` | `null` | no | | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | Service account to use with the system node pool |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | +| [service\_account](#input\_service\_account) | DEPRECATED: use service\_account\_email and scopes. |
object({
email = string,
scopes = set(string)
})
| `null` | no | +| [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to use with the node pool | `string` | `null` | no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to to use with the node pool. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | | [static\_node\_count](#input\_static\_node\_count) | The static number of nodes in the node pool. If set, autoscaling will be disabled. | `number` | `null` | no | | [taints](#input\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "user-workload",
"value": true
}
]
| no | diff --git a/community/modules/compute/gke-node-pool/main.tf b/community/modules/compute/gke-node-pool/main.tf index ac3809fa34..f8364ed929 100644 --- a/community/modules/compute/gke-node-pool/main.tf +++ b/community/modules/compute/gke-node-pool/main.tf @@ -20,7 +20,7 @@ locals { } locals { - sa_email = var.service_account.email != null ? var.service_account.email : data.google_compute_default_service_account.default_sa.email + sa_email = var.service_account_email != null ? var.service_account_email : data.google_compute_default_service_account.default_sa.email has_gpu = var.guest_accelerator != null || contains(["a2", "g2"], local.machine_family) gpu_taint = local.has_gpu ? [{ @@ -76,8 +76,8 @@ resource "google_container_node_pool" "node_pool" { disk_size_gb = var.disk_size_gb disk_type = var.disk_type resource_labels = local.labels - service_account = var.service_account.email - oauth_scopes = var.service_account.scopes + service_account = var.service_account_email + oauth_scopes = var.service_account_scopes machine_type = var.machine_type spot = var.spot taint = concat(var.taints, local.gpu_taint) diff --git a/community/modules/compute/gke-node-pool/variables.tf b/community/modules/compute/gke-node-pool/variables.tf index 6d74f5e1b7..41e07a9c5a 100644 --- a/community/modules/compute/gke-node-pool/variables.tf +++ b/community/modules/compute/gke-node-pool/variables.tf @@ -139,16 +139,16 @@ variable "compact_placement" { default = false } -variable "service_account" { - description = "Service account to use with the system node pool" - type = object({ - email = string, - scopes = set(string) - }) - default = { - email = null - scopes = ["https://www.googleapis.com/auth/cloud-platform"] - } +variable "service_account_email" { + description = "Service account e-mail address to use with the node pool" + type = string + default = null +} + +variable "service_account_scopes" { + description = "Scopes to to use with the node pool." + type = set(string) + default = ["https://www.googleapis.com/auth/cloud-platform"] } variable "taints" { @@ -183,6 +183,7 @@ variable "timeout_update" { } # Deprecated + variable "total_min_nodes" { description = "DEPRECATED: Use autoscaling_total_min_nodes." type = number @@ -202,3 +203,16 @@ variable "total_max_nodes" { error_message = "total_max_nodes was renamed to autoscaling_total_max_nodes and is deprecated; use autoscaling_total_max_nodes" } } + +variable "service_account" { + description = "DEPRECATED: use service_account_email and scopes." + type = object({ + email = string, + scopes = set(string) + }) + default = null + validation { + condition = var.service_account == null + error_message = "service_account is deprecated and replaced with service_account_email and scopes." + } +} diff --git a/community/modules/scheduler/gke-cluster/README.md b/community/modules/scheduler/gke-cluster/README.md index 359a4b370f..6e9811c105 100644 --- a/community/modules/scheduler/gke-cluster/README.md +++ b/community/modules/scheduler/gke-cluster/README.md @@ -130,7 +130,9 @@ No modules. | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | | [region](#input\_region) | The region to host the cluster in. | `string` | n/a | yes | | [release\_channel](#input\_release\_channel) | The release channel of this cluster. Accepted values are `UNSPECIFIED`, `RAPID`, `REGULAR` and `STABLE`. | `string` | `"UNSPECIFIED"` | no | -| [service\_account](#input\_service\_account) | Service account to use with the system node pool |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | +| [service\_account](#input\_service\_account) | DEPRECATED: use service\_account\_email and scopes. |
object({
email = string,
scopes = set(string)
})
| `null` | no | +| [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to use with the system node pool | `string` | `null` | no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to to use with the system node pool. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | | [services\_ip\_range\_name](#input\_services\_ip\_range\_name) | The name of the secondary subnet range to use for services. | `string` | `"services"` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to host the cluster in. | `string` | n/a | yes | | [system\_node\_pool\_enabled](#input\_system\_node\_pool\_enabled) | Create a system node pool. | `bool` | `true` | no | diff --git a/community/modules/scheduler/gke-cluster/main.tf b/community/modules/scheduler/gke-cluster/main.tf index 1494eb4157..c03d2874d2 100644 --- a/community/modules/scheduler/gke-cluster/main.tf +++ b/community/modules/scheduler/gke-cluster/main.tf @@ -29,7 +29,7 @@ locals { security_group = var.authenticator_security_group }] - sa_email = var.service_account.email != null ? var.service_account.email : data.google_compute_default_service_account.default_sa.email + sa_email = var.service_account_email != null ? var.service_account_email : data.google_compute_default_service_account.default_sa.email } data "google_compute_default_service_account" "default_sa" { @@ -196,8 +196,8 @@ resource "google_container_node_pool" "system_node_pools" { node_config { resource_labels = local.labels - service_account = var.service_account.email - oauth_scopes = var.service_account.scopes + service_account = var.service_account_email + oauth_scopes = var.service_account_scopes machine_type = var.system_node_pool_machine_type taint = var.system_node_pool_taints diff --git a/community/modules/scheduler/gke-cluster/variables.tf b/community/modules/scheduler/gke-cluster/variables.tf index a114ab3443..a4f284e2ec 100644 --- a/community/modules/scheduler/gke-cluster/variables.tf +++ b/community/modules/scheduler/gke-cluster/variables.tf @@ -204,16 +204,16 @@ variable "master_authorized_networks" { default = [] } -variable "service_account" { - description = "Service account to use with the system node pool" - type = object({ - email = string, - scopes = set(string) - }) - default = { - email = null - scopes = ["https://www.googleapis.com/auth/cloud-platform"] - } +variable "service_account_email" { + description = "Service account e-mail address to use with the system node pool" + type = string + default = null +} + +variable "service_account_scopes" { + description = "Scopes to to use with the system node pool." + type = set(string) + default = ["https://www.googleapis.com/auth/cloud-platform"] } variable "autoscaling_profile" { @@ -250,3 +250,17 @@ variable "timeout_update" { type = string default = null } + +# Deprecated +variable "service_account" { + description = "DEPRECATED: use service_account_email and scopes." + type = object({ + email = string, + scopes = set(string) + }) + default = null + validation { + condition = var.service_account == null + error_message = "service_account is deprecated and replaced with service_account_email and scopes." + } +} From c64004e80470658dcff6bbc156a8a2d47dceedb6 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 12 Jun 2023 15:29:04 -0500 Subject: [PATCH 12/92] Upgrade HTCondor example to use Rocky Linux 8 --- community/examples/htc-htcondor.yaml | 4 ++++ .../htcondor-configure/files/htcondor_configure.yml | 1 - .../htcondor-install/files/install-htcondor.yaml | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/community/examples/htc-htcondor.yaml b/community/examples/htc-htcondor.yaml index f5460480da..355a39c16d 100644 --- a/community/examples/htc-htcondor.yaml +++ b/community/examples/htc-htcondor.yaml @@ -20,6 +20,10 @@ vars: deployment_name: htcondor-001 region: us-central1 zone: us-central1-c + # Only CentOS 7 and Rocky 8 are supported; for CentOS 7, remove next 2 lines + instance_image: + project: cloud-hpc-image-public + family: hpc-rocky-linux-8 # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md diff --git a/community/modules/scheduler/htcondor-configure/files/htcondor_configure.yml b/community/modules/scheduler/htcondor-configure/files/htcondor_configure.yml index 1b69a8149b..faa8b348a7 100644 --- a/community/modules/scheduler/htcondor-configure/files/htcondor_configure.yml +++ b/community/modules/scheduler/htcondor-configure/files/htcondor_configure.yml @@ -41,7 +41,6 @@ - name: Set HTCondor Pool password (token signing key) ansible.builtin.shell: | set -e -o pipefail - export CLOUDSDK_PYTHON=/usr/bin/python POOL_PASSWORD=$(gcloud secrets versions access latest --secret={{ password_id }}) echo -n "$POOL_PASSWORD" | sh -c "condor_store_cred add -c -i -" args: diff --git a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml index d4426a4c78..bf0d8bc6b4 100644 --- a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml +++ b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml @@ -28,6 +28,11 @@ ansible.builtin.package: name: - epel-release + # adding key explicitly addresses https://github.com/ansible/ansible/pull/71640 + - name: Add HTCondor RPM key + ansible.builtin.rpm_key: + state: present + key: https://research.cs.wisc.edu/htcondor/repo/keys/HTCondor-10.x-Key - name: Enable HTCondor Feature Release repository ansible.builtin.yum_repository: name: htcondor-feature @@ -61,6 +66,11 @@ - name: Install Docker and configure HTCondor to use it when: enable_docker | bool # allows string to be passed at CLI block: + # adding key explicitly addresses https://github.com/ansible/ansible/pull/71640 + - name: Add Docker RPM key + ansible.builtin.rpm_key: + state: present + key: https://download.docker.com/linux/centos/gpg - name: Setup Docker repo ansible.builtin.yum_repository: name: docker-ce-stable From ccb0dabc189f59fad3c8c4156b152937f5e50ec5 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 12 Jun 2023 18:12:30 -0500 Subject: [PATCH 13/92] Update HTCondor execute point module - use Rocky Linux 8 by default - align var.instance_image with guidelines in #1423 --- community/modules/compute/htcondor-execute-point/README.md | 2 +- community/modules/compute/htcondor-execute-point/main.tf | 4 ++-- community/modules/compute/htcondor-execute-point/variables.tf | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/community/modules/compute/htcondor-execute-point/README.md b/community/modules/compute/htcondor-execute-point/README.md index 723fe0bafd..68d327df4f 100644 --- a/community/modules/compute/htcondor-execute-point/README.md +++ b/community/modules/compute/htcondor-execute-point/README.md @@ -182,7 +182,7 @@ No resources. | [deployment\_name](#input\_deployment\_name) | HPC Toolkit deployment name. HTCondor cloud resource names will include this value. | `string` | n/a | yes | | [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB | `number` | `100` | no | | [enable\_oslogin](#input\_enable\_oslogin) | Enable or Disable OS Login with "ENABLE" or "DISABLE". Set to "INHERIT" to inherit project OS Login setting. | `string` | `"ENABLE"` | no | -| [image](#input\_image) | HTCondor execute point VM image |
object({
family = string,
project = string
})
|
{
"family": "hpc-centos-7",
"project": "cloud-hpc-image-public"
}
| no | +| [instance\_image](#input\_instance\_image) | HTCondor execute point VM image |
object({
family = string,
project = string
})
|
{
"family": "hpc-rocky-linux-8",
"project": "cloud-hpc-image-public"
}
| no | | [labels](#input\_labels) | Labels to add to HTConodr execute points | `map(string)` | n/a | yes | | [machine\_type](#input\_machine\_type) | Machine type to use for HTCondor execute points | `string` | `"n2-standard-4"` | no | | [max\_size](#input\_max\_size) | Maximum size of the HTCondor execute point pool. | `number` | `100` | no | diff --git a/community/modules/compute/htcondor-execute-point/main.tf b/community/modules/compute/htcondor-execute-point/main.tf index 37c0b5e5b1..091e329a8f 100644 --- a/community/modules/compute/htcondor-execute-point/main.tf +++ b/community/modules/compute/htcondor-execute-point/main.tf @@ -63,8 +63,8 @@ module "execute_point_instance_template" { preemptible = var.spot startup_script = var.startup_script metadata = local.metadata - source_image_family = var.image.family - source_image_project = var.image.project + source_image_family = var.instance_image.family + source_image_project = var.instance_image.project } module "mig" { diff --git a/community/modules/compute/htcondor-execute-point/variables.tf b/community/modules/compute/htcondor-execute-point/variables.tf index 683c89f4d6..f1a21bb2c7 100644 --- a/community/modules/compute/htcondor-execute-point/variables.tf +++ b/community/modules/compute/htcondor-execute-point/variables.tf @@ -65,14 +65,14 @@ variable "network_storage" { default = [] } -variable "image" { +variable "instance_image" { description = "HTCondor execute point VM image" type = object({ family = string, project = string }) default = { - family = "hpc-centos-7" + family = "hpc-rocky-linux-8" project = "cloud-hpc-image-public" } } From 09d7658f5c59901cd8af7df939f16a378b5d4ba4 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 12 Jun 2023 23:14:05 -0500 Subject: [PATCH 14/92] Improve resilience of HTCondor installation - yum-cron.service or dnf-automatic.service can often start execution in the middle of google-startup-scripts. This can cause problems executing package or GPG key operations through ansible. Resolve by adopting ansible.builtin.yum (which uses dnf as a backend when available) and adding a default lock_timeout of 5 minutes. - Remove use of ansible.builtin.gpg_key as it does not honor any timeout and it doesn't appear to solve the problem it was thought to solve. --- .../files/install-htcondor.yaml | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml index bf0d8bc6b4..7aecf62b9a 100644 --- a/community/modules/scripts/htcondor-install/files/install-htcondor.yaml +++ b/community/modules/scripts/htcondor-install/files/install-htcondor.yaml @@ -23,16 +23,14 @@ vars: enable_docker: true become: true + module_defaults: + ansible.builtin.yum: + lock_timeout: 300 tasks: - name: Enable EPEL repository - ansible.builtin.package: + ansible.builtin.yum: name: - epel-release - # adding key explicitly addresses https://github.com/ansible/ansible/pull/71640 - - name: Add HTCondor RPM key - ansible.builtin.rpm_key: - state: present - key: https://research.cs.wisc.edu/htcondor/repo/keys/HTCondor-10.x-Key - name: Enable HTCondor Feature Release repository ansible.builtin.yum_repository: name: htcondor-feature @@ -54,7 +52,7 @@ repo_gpgcheck: true priority: "90" - name: Install HTCondor - ansible.builtin.package: + ansible.builtin.yum: name: condor-{{ condor_version | default("10.*") | string }} state: present - name: Ensure token directory @@ -66,11 +64,6 @@ - name: Install Docker and configure HTCondor to use it when: enable_docker | bool # allows string to be passed at CLI block: - # adding key explicitly addresses https://github.com/ansible/ansible/pull/71640 - - name: Add Docker RPM key - ansible.builtin.rpm_key: - state: present - key: https://download.docker.com/linux/centos/gpg - name: Setup Docker repo ansible.builtin.yum_repository: name: docker-ce-stable @@ -80,7 +73,7 @@ gpgcheck: yes gpgkey: https://download.docker.com/linux/centos/gpg - name: Install Docker - ansible.builtin.package: + ansible.builtin.yum: name: - docker-ce - docker-ce-cli From fdac3ac54eed184d39519347ae574bbb953b033f Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 12 Jun 2023 23:14:05 -0500 Subject: [PATCH 15/92] Convert HTCondor example to multigroup deployment - Improve HTCondor example through building a custom image - Modify integration test to use deploy/destroy commands - Modify example to autoscale 2 idle nodes and add a test to check --- community/examples/htc-htcondor.yaml | 49 ++++++++--- .../htcondor-integration-test.yml | 86 +++++++++++-------- .../test-htcondor-access-point.yml | 11 ++- 3 files changed, 97 insertions(+), 49 deletions(-) diff --git a/community/examples/htc-htcondor.yaml b/community/examples/htc-htcondor.yaml index 355a39c16d..9a2f9b4aeb 100644 --- a/community/examples/htc-htcondor.yaml +++ b/community/examples/htc-htcondor.yaml @@ -17,19 +17,17 @@ blueprint_name: htc-htcondor vars: project_id: ## Set GCP Project ID Here ## - deployment_name: htcondor-001 + deployment_name: htcondor-pool region: us-central1 zone: us-central1-c - # Only CentOS 7 and Rocky 8 are supported; for CentOS 7, remove next 2 lines - instance_image: - project: cloud-hpc-image-public - family: hpc-rocky-linux-8 + disk_size_gb: 100 + image_family: htcondor-10x # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md deployment_groups: -- group: htcondor +- group: primary modules: - id: network1 source: modules/network/vpc @@ -39,6 +37,28 @@ deployment_groups: - id: htcondor_install source: community/modules/scripts/htcondor-install + - id: htcondor_install_script + source: modules/scripts/startup-script + settings: + runners: + - $(htcondor_install.install_htcondor_runner) + - $(htcondor_install.install_autoscaler_deps_runner) + +- group: packer + modules: + - id: custom-image + source: modules/packer/custom-image + kind: packer + use: + - network1 + - htcondor_install_script + settings: + disk_size: $(vars.disk_size_gb) + source_image_family: hpc-rocky-linux-8 + image_family: $(vars.image_family) + +- group: pool + modules: - id: htcondor_configure source: community/modules/scheduler/htcondor-configure use: @@ -48,7 +68,6 @@ deployment_groups: source: modules/scripts/startup-script settings: runners: - - $(htcondor_install.install_htcondor_runner) - $(htcondor_configure.central_manager_runner) - id: htcondor_cm @@ -58,6 +77,9 @@ deployment_groups: - htcondor_startup_central_manager settings: name_prefix: cm + instance_image: + project: $(vars.project_id) + family: $(vars.image_family) add_deployment_name_before_prefix: true machine_type: c2-standard-4 disable_public_ips: true @@ -83,7 +105,6 @@ deployment_groups: source: modules/scripts/startup-script settings: runners: - - $(htcondor_install.install_htcondor_runner) - $(htcondor_configure.execute_point_runner) # the HTCondor modules support up to 2 execute points per blueprint @@ -95,6 +116,10 @@ deployment_groups: - network1 - htcondor_startup_execute_point settings: + instance_image: + project: $(vars.project_id) + family: $(vars.image_family) + min_idle: 2 service_account: email: $(htcondor_configure.execute_point_service_account) scopes: @@ -107,6 +132,9 @@ deployment_groups: - htcondor_startup_execute_point settings: spot: true + instance_image: + project: $(vars.project_id) + family: $(vars.image_family) service_account: email: $(htcondor_configure.execute_point_service_account) scopes: @@ -116,8 +144,6 @@ deployment_groups: source: modules/scripts/startup-script settings: runners: - - $(htcondor_install.install_htcondor_runner) - - $(htcondor_install.install_autoscaler_deps_runner) - $(htcondor_install.install_autoscaler_runner) - $(htcondor_configure.access_point_runner) - $(htcondor_execute_point.configure_autoscaler_runner) @@ -143,6 +169,9 @@ deployment_groups: - htcondor_startup_access_point settings: name_prefix: ap + instance_image: + project: $(vars.project_id) + family: $(vars.image_family) add_deployment_name_before_prefix: true machine_type: c2-standard-4 service_account: diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml index 7a6a9d3b73..e678b025a2 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml @@ -33,18 +33,14 @@ file: tasks/create_deployment_directory.yml - name: Create Infrastructure and test block: - - name: Setup network and HTCondor install scripts - ansible.builtin.command: - cmd: "{{ item }}" - chdir: "{{ workspace }}/{{ deployment_name }}/htcondor" + - name: Execute ghpc deploy + register: deployment + changed_when: deployment.changed + ansible.builtin.command: ./ghpc deploy {{ deployment_name }} --auto-approve args: - creates: "{{ workspace }}/{{ deployment_name }}/.terraform" + chdir: "{{ workspace }}" environment: TF_IN_AUTOMATION: "TRUE" - with_items: - - terraform init - - terraform validate - - terraform apply -auto-approve -no-color - name: Print instance IDs of VMs ansible.builtin.include_tasks: file: tasks/get_instance_ids.yml @@ -52,10 +48,10 @@ register: access_ip changed_when: false args: - chdir: "{{ workspace }}/{{ deployment_name }}/htcondor" + chdir: "{{ workspace }}/{{ deployment_name }}/pool" executable: /bin/bash ansible.builtin.shell: | - set -o pipefail + set -e -o pipefail terraform output -json external_ip_htcondor_access | jq -r '.[0]' - name: Add Login node as host ansible.builtin.add_host: @@ -66,9 +62,12 @@ - name: Get Builder IP register: build_ip changed_when: false - ansible.builtin.shell: >- - dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | - awk -F'"' '{print $2}' + args: + executable: /bin/bash + ansible.builtin.shell: | + set -e -o pipefail + dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | \ + awk -F'"' '{print $2}' - name: Create firewall rule register: fw_result changed_when: fw_result.rc == 0 @@ -111,19 +110,25 @@ - firewall-rules - delete - "{{ deployment_name }}" - - name: Tear Down Pool - changed_when: true # assume something destroyed - failed_when: false # keep cleaning up - run_once: true - delegate_to: localhost + - name: Destroy deployment + register: ghpc_destroy + changed_when: ghpc_destroy.changed + ignore_errors: true + ansible.builtin.command: ./ghpc destroy {{ deployment_name }} --auto-approve + args: + chdir: "{{ workspace }}" environment: TF_IN_AUTOMATION: "TRUE" - ansible.builtin.command: - cmd: "{{ item }}" - chdir: "{{ workspace }}/{{ deployment_name }}/htcondor" - with_items: - - terraform init - - terraform destroy -auto-approve + - name: Delete VM Image + register: image_deletion + changed_when: image_deletion.changed + ignore_errors: true + ansible.builtin.shell: | + set -e -o pipefail + gcloud compute images delete --project={{ project }} --quiet $(jq -r '.builds[-1].artifact_id' packer-manifest.json | cut -d ":" -f2) + args: + chdir: "{{ workspace }}/{{ deployment_name }}/packer/custom-image" + executable: /bin/bash - name: Run Integration Tests hosts: remote_host @@ -148,7 +153,6 @@ timeout: 480 - name: Run Integration tests for HPC toolkit ansible.builtin.include_tasks: "{{ test }}" - run_once: true vars: access_point: "{{ access_point }}" loop: "{{ post_deploy_tests }}" @@ -156,11 +160,10 @@ loop_var: test always: - name: Delete Firewall Rule + delegate_to: localhost register: fw_deleted changed_when: fw_deleted.rc == 0 failed_when: false # keep cleaning up - run_once: true - delegate_to: localhost ansible.builtin.command: argv: - gcloud @@ -168,15 +171,24 @@ - firewall-rules - delete - "{{ deployment_name }}" - - name: Tear Down Pool - changed_when: true # assume something destroyed + - name: Destroy deployment delegate_to: localhost - run_once: true + register: ghpc_destroy + changed_when: ghpc_destroy.changed + ignore_errors: true + ansible.builtin.command: ./ghpc destroy {{ deployment_name }} --auto-approve + args: + chdir: "{{ workspace }}" environment: TF_IN_AUTOMATION: "TRUE" - ansible.builtin.command: - cmd: "{{ item }}" - chdir: "{{ workspace }}/{{ deployment_name }}/htcondor" - with_items: - - terraform init - - terraform destroy -auto-approve + - name: Delete VM Image + delegate_to: localhost + register: image_deletion + changed_when: image_deletion.changed + ignore_errors: true + ansible.builtin.shell: | + set -e -o pipefail + gcloud compute images delete --project={{ project }} --quiet $(jq -r '.builds[-1].artifact_id' packer-manifest.json | cut -d ":" -f2) + args: + chdir: "{{ workspace }}/{{ deployment_name }}/packer/custom-image" + executable: /bin/bash diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml index 6555539e56..7d13d958ee 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml @@ -13,10 +13,17 @@ # limitations under the License. --- -- name: Ensure schedd has joined pool +- name: Ensure schedd has joined the pool ansible.builtin.command: condor_status -schedd -autof Name register: schedd_status changed_when: False - until: ansible_fqdn == schedd_status.stdout + until: schedd_status.stdout == ansible_fqdn retries: 8 delay: 15 +- name: Ensure execute points have joined the pool + ansible.builtin.command: condor_status -collector -autoformat HostsTotal + register: hosts_total + changed_when: False + until: hosts_total.stdout == "2" + retries: 20 + delay: 15 From a36d78fbfb02fae4823c1f73eb0de0db81cd4682 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Sun, 11 Jun 2023 15:19:16 -0700 Subject: [PATCH 16/92] Conditionally exclude nodeSelector when not needed --- community/modules/compute/gke-job-template/main.tf | 3 +++ .../compute/gke-job-template/templates/gke-job-base.yaml.tftpl | 2 ++ 2 files changed, 5 insertions(+) diff --git a/community/modules/compute/gke-job-template/main.tf b/community/modules/compute/gke-job-template/main.tf index 025b344aed..be35046e8d 100644 --- a/community/modules/compute/gke-job-template/main.tf +++ b/community/modules/compute/gke-job-template/main.tf @@ -48,6 +48,8 @@ locals { suffix = var.random_name_sufix ? "-${random_id.resource_name_suffix.hex}" : "" + has_node_selector = var.machine_family != null || length(var.node_selectors) != 0 + job_template_contents = templatefile( "${path.module}/templates/gke-job-base.yaml.tftpl", { @@ -59,6 +61,7 @@ locals { machine_family = var.machine_family node_pool_names = var.node_pool_name node_selectors = var.node_selectors + has_node_selector = local.has_node_selector should_request_cpu = local.should_request_cpu full_node_request = local.full_node_request millicpu_request = "${local.millicpu}m" diff --git a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl index 4a8f21d079..cced1a9d30 100644 --- a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl +++ b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl @@ -25,6 +25,7 @@ spec: - ${node_pool} %{~ endfor ~} %{~ endif ~} + %{~ if has_node_selector ~} nodeSelector: %{~ if machine_family != null ~} cloud.google.com/machine-family: ${machine_family} @@ -32,6 +33,7 @@ spec: %{~ for key, val in node_selectors ~} ${key}: ${val} %{~ endfor ~} + %{~ endif ~} tolerations: %{~ for toleration in tolerations ~} - key: ${toleration.key} From 86c2df91c51c6d30c0c405c8a333064fcd0d1805 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 13 Jun 2023 14:14:23 -0500 Subject: [PATCH 17/92] Address feedback from #1440 --- community/examples/htc-htcondor.yaml | 12 ++++++------ community/modules/scripts/htcondor-install/README.md | 10 ++++++---- .../ansible_playbooks/test-htcondor-access-point.yml | 1 + 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/community/examples/htc-htcondor.yaml b/community/examples/htc-htcondor.yaml index 9a2f9b4aeb..5c3d477efa 100644 --- a/community/examples/htc-htcondor.yaml +++ b/community/examples/htc-htcondor.yaml @@ -21,7 +21,7 @@ vars: region: us-central1 zone: us-central1-c disk_size_gb: 100 - image_family: htcondor-10x + new_image_family: htcondor-10x # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md @@ -55,7 +55,7 @@ deployment_groups: settings: disk_size: $(vars.disk_size_gb) source_image_family: hpc-rocky-linux-8 - image_family: $(vars.image_family) + image_family: $(vars.new_image_family) - group: pool modules: @@ -79,7 +79,7 @@ deployment_groups: name_prefix: cm instance_image: project: $(vars.project_id) - family: $(vars.image_family) + family: $(vars.new_image_family) add_deployment_name_before_prefix: true machine_type: c2-standard-4 disable_public_ips: true @@ -118,7 +118,7 @@ deployment_groups: settings: instance_image: project: $(vars.project_id) - family: $(vars.image_family) + family: $(vars.new_image_family) min_idle: 2 service_account: email: $(htcondor_configure.execute_point_service_account) @@ -134,7 +134,7 @@ deployment_groups: spot: true instance_image: project: $(vars.project_id) - family: $(vars.image_family) + family: $(vars.new_image_family) service_account: email: $(htcondor_configure.execute_point_service_account) scopes: @@ -171,7 +171,7 @@ deployment_groups: name_prefix: ap instance_image: project: $(vars.project_id) - family: $(vars.image_family) + family: $(vars.new_image_family) add_deployment_name_before_prefix: true machine_type: c2-standard-4 service_account: diff --git a/community/modules/scripts/htcondor-install/README.md b/community/modules/scripts/htcondor-install/README.md index 4224055085..7ba8d16fd1 100644 --- a/community/modules/scripts/htcondor-install/README.md +++ b/community/modules/scripts/htcondor-install/README.md @@ -1,12 +1,14 @@ ## Description This module creates a Toolkit runner that will install HTCondor on RedHat 7 or -derivative operating systems such as the CentOS 7 release in the [HPC VM -Image][hpcvmimage]. It should also function on RedHat or Rocky Linux releases 8 -and 9, however it is not yet supported. Please report any [issues] on these -platforms. +8 and its derivative operating systems. These include the CentOS 7 and Rocky +Linux 8 releases of the [HPC VM Image][hpcvmimage]. It may also function on +RedHat 9 and derivatives, however it is not yet supported. Please report any +[issues] on these 3 distributions or open a [discussion] to request support on +Debian or Ubuntu distributions. [issues]: https://github.com/GoogleCloudPlatform/hpc-toolkit/issues +[discussion]: https://github.com/GoogleCloudPlatform/hpc-toolkit/discussions It also exports a list of Google Cloud APIs which must be enabled prior to provisioning an HTCondor Pool. diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml index 7d13d958ee..0419c3276b 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-htcondor-access-point.yml @@ -24,6 +24,7 @@ ansible.builtin.command: condor_status -collector -autoformat HostsTotal register: hosts_total changed_when: False + # must match total min_idle settings of execute-point modules in blueprint until: hosts_total.stdout == "2" retries: 20 delay: 15 From d62362c11579b0d7afb72b0f19d7f42122fd2c61 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 13 Jun 2023 14:14:53 -0500 Subject: [PATCH 18/92] Eliminate all ansible-lint errors for base and Slurm integration tests --- .../ansible_playbooks/base-integration-test.yml | 12 +++++++----- .../ansible_playbooks/slurm-integration-test.yml | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml index 7d78370bbf..58ff5a5f29 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml @@ -80,11 +80,14 @@ ## Setup firewall for cloud build - name: Get Builder IP - changed_when: false - ansible.builtin.shell: >- - dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | - awk -F'"' '{print $2}' register: build_ip + changed_when: false + args: + executable: /bin/bash + ansible.builtin.shell: | + set -e -o pipefail + dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | \ + awk -F'"' '{print $2}' - name: Create firewall rule register: fw_result changed_when: fw_result.rc == 0 @@ -159,7 +162,6 @@ minutes: 2 - name: Run Integration tests for HPC toolkit ansible.builtin.include_tasks: "{{ test }}" - run_once: true vars: remote_node: "{{ remote_node }}" deployment_name: "{{ deployment_name }}" diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml index 70bf2ccc2a..0c424bb20d 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml @@ -86,11 +86,14 @@ ## Setup firewall for cloud build - name: Get Builder IP - changed_when: false - ansible.builtin.shell: >- - dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | - awk -F'"' '{print $2}' register: build_ip + changed_when: false + args: + executable: /bin/bash + ansible.builtin.shell: | + set -e -o pipefail + dig TXT +short o-o.myaddr.l.google.com @ns1.google.com | \ + awk -F'"' '{print $2}' - name: Create firewall rule register: fw_created changed_when: fw_created.rc == 0 @@ -172,7 +175,6 @@ timeout: 600 - name: Run Integration tests for HPC toolkit ansible.builtin.include_tasks: "{{ test }}" - run_once: true vars: login_node: "{{ login_node }}" custom_vars: "{{ custom_vars }}" From 8c1863a2efb6a6aaa60ec6261f4ac87127004e91 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 13 Jun 2023 14:49:40 -0500 Subject: [PATCH 19/92] Update internal use of startup-script to latest --- community/modules/compute/pbspro-execution/README.md | 2 +- community/modules/compute/pbspro-execution/main.tf | 2 +- .../modules/remote-desktop/chrome-remote-desktop/README.md | 2 +- community/modules/remote-desktop/chrome-remote-desktop/main.tf | 2 +- community/modules/scheduler/pbspro-client/README.md | 2 +- community/modules/scheduler/pbspro-client/main.tf | 2 +- community/modules/scheduler/pbspro-server/README.md | 2 +- community/modules/scheduler/pbspro-server/main.tf | 2 +- modules/compute/vm-instance/README.md | 2 +- modules/compute/vm-instance/startup_from_network_storage.tf | 2 +- modules/scheduler/batch-job-template/README.md | 2 +- .../batch-job-template/startup_from_network_storage.tf | 2 +- modules/scheduler/batch-login-node/README.md | 2 +- modules/scheduler/batch-login-node/main.tf | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/community/modules/compute/pbspro-execution/README.md b/community/modules/compute/pbspro-execution/README.md index 4d0952accb..d01233508c 100644 --- a/community/modules/compute/pbspro-execution/README.md +++ b/community/modules/compute/pbspro-execution/README.md @@ -74,7 +74,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [execution\_startup\_script](#module\_execution\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [execution\_startup\_script](#module\_execution\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | | [pbs\_execution](#module\_pbs\_execution) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance | 6c6b9e0a | | [pbs\_install](#module\_pbs\_install) | github.com/GoogleCloudPlatform/hpc-toolkit//community/modules/scripts/pbspro-install | 6c6b9e0a | diff --git a/community/modules/compute/pbspro-execution/main.tf b/community/modules/compute/pbspro-execution/main.tf index a15cb90583..4d7c6a6d88 100644 --- a/community/modules/compute/pbspro-execution/main.tf +++ b/community/modules/compute/pbspro-execution/main.tf @@ -53,7 +53,7 @@ module "pbs_install" { } module "execution_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" deployment_name = var.deployment_name project_id = var.project_id diff --git a/community/modules/remote-desktop/chrome-remote-desktop/README.md b/community/modules/remote-desktop/chrome-remote-desktop/README.md index dce66471fa..941e7a813c 100644 --- a/community/modules/remote-desktop/chrome-remote-desktop/README.md +++ b/community/modules/remote-desktop/chrome-remote-desktop/README.md @@ -63,7 +63,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [client\_startup\_script](#module\_client\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [client\_startup\_script](#module\_client\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | | [instances](#module\_instances) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance | 69848ab | ## Resources diff --git a/community/modules/remote-desktop/chrome-remote-desktop/main.tf b/community/modules/remote-desktop/chrome-remote-desktop/main.tf index 9c5fa761a6..93ac14bd08 100644 --- a/community/modules/remote-desktop/chrome-remote-desktop/main.tf +++ b/community/modules/remote-desktop/chrome-remote-desktop/main.tf @@ -55,7 +55,7 @@ locals { } module "client_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" deployment_name = var.deployment_name project_id = var.project_id diff --git a/community/modules/scheduler/pbspro-client/README.md b/community/modules/scheduler/pbspro-client/README.md index ee1ec11597..4e90e39b12 100644 --- a/community/modules/scheduler/pbspro-client/README.md +++ b/community/modules/scheduler/pbspro-client/README.md @@ -74,7 +74,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [client\_startup\_script](#module\_client\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [client\_startup\_script](#module\_client\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | | [pbs\_client](#module\_pbs\_client) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance | 6c6b9e0a | | [pbs\_install](#module\_pbs\_install) | github.com/GoogleCloudPlatform/hpc-toolkit//community/modules/scripts/pbspro-install | 6c6b9e0a | diff --git a/community/modules/scheduler/pbspro-client/main.tf b/community/modules/scheduler/pbspro-client/main.tf index 5b42f5e2a1..75db8bfaa9 100644 --- a/community/modules/scheduler/pbspro-client/main.tf +++ b/community/modules/scheduler/pbspro-client/main.tf @@ -43,7 +43,7 @@ module "pbs_install" { } module "client_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" deployment_name = var.deployment_name project_id = var.project_id diff --git a/community/modules/scheduler/pbspro-server/README.md b/community/modules/scheduler/pbspro-server/README.md index afc0b82965..7d29a5f456 100644 --- a/community/modules/scheduler/pbspro-server/README.md +++ b/community/modules/scheduler/pbspro-server/README.md @@ -72,7 +72,7 @@ No providers. | [pbs\_install](#module\_pbs\_install) | github.com/GoogleCloudPlatform/hpc-toolkit//community/modules/scripts/pbspro-install | 6c6b9e0a | | [pbs\_qmgr](#module\_pbs\_qmgr) | github.com/GoogleCloudPlatform/hpc-toolkit//community/modules/scripts/pbspro-qmgr | 6c6b9e0a | | [pbs\_server](#module\_pbs\_server) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance | 6c6b9e0a | -| [server\_startup\_script](#module\_server\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [server\_startup\_script](#module\_server\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | ## Resources diff --git a/community/modules/scheduler/pbspro-server/main.tf b/community/modules/scheduler/pbspro-server/main.tf index a0603f4a93..34d8767bd6 100644 --- a/community/modules/scheduler/pbspro-server/main.tf +++ b/community/modules/scheduler/pbspro-server/main.tf @@ -55,7 +55,7 @@ module "pbs_qmgr" { } module "server_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" deployment_name = var.deployment_name project_id = var.project_id diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index 62c8869662..da47291117 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -158,7 +158,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | ## Resources diff --git a/modules/compute/vm-instance/startup_from_network_storage.tf b/modules/compute/vm-instance/startup_from_network_storage.tf index f9e53f44a6..46f913334c 100644 --- a/modules/compute/vm-instance/startup_from_network_storage.tf +++ b/modules/compute/vm-instance/startup_from_network_storage.tf @@ -55,7 +55,7 @@ locals { } module "netstorage_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" labels = local.labels project_id = var.project_id diff --git a/modules/scheduler/batch-job-template/README.md b/modules/scheduler/batch-job-template/README.md index f469e95322..06459e8f6e 100644 --- a/modules/scheduler/batch-job-template/README.md +++ b/modules/scheduler/batch-job-template/README.md @@ -135,7 +135,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| | [instance\_template](#module\_instance\_template) | terraform-google-modules/vm/google//modules/instance_template | ~> 8.0 | -| [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | ## Resources diff --git a/modules/scheduler/batch-job-template/startup_from_network_storage.tf b/modules/scheduler/batch-job-template/startup_from_network_storage.tf index f9e53f44a6..46f913334c 100644 --- a/modules/scheduler/batch-job-template/startup_from_network_storage.tf +++ b/modules/scheduler/batch-job-template/startup_from_network_storage.tf @@ -55,7 +55,7 @@ locals { } module "netstorage_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" labels = local.labels project_id = var.project_id diff --git a/modules/scheduler/batch-login-node/README.md b/modules/scheduler/batch-login-node/README.md index 122d574350..8be40bd1cb 100644 --- a/modules/scheduler/batch-login-node/README.md +++ b/modules/scheduler/batch-login-node/README.md @@ -89,7 +89,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [login\_startup\_script](#module\_login\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 34bb7250 | +| [login\_startup\_script](#module\_login\_startup\_script) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | 185837b5 | ## Resources diff --git a/modules/scheduler/batch-login-node/main.tf b/modules/scheduler/batch-login-node/main.tf index 3ecbca3b97..8cd6840d14 100644 --- a/modules/scheduler/batch-login-node/main.tf +++ b/modules/scheduler/batch-login-node/main.tf @@ -104,7 +104,7 @@ locals { } module "login_startup_script" { - source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=34bb7250" + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script?ref=185837b5" labels = local.labels project_id = var.project_id deployment_name = var.deployment_name From c0a4cc25aa1e5053d8448b665335ad36d90aa93d Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Tue, 13 Jun 2023 12:45:15 -0700 Subject: [PATCH 20/92] Refacot gke-job-template to remove duplicate information in templating --- .../modules/compute/gke-job-template/main.tf | 43 +++++++++---------- .../templates/gke-job-base.yaml.tftpl | 17 +++----- 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/community/modules/compute/gke-job-template/main.tf b/community/modules/compute/gke-job-template/main.tf index be35046e8d..e9e80b1671 100644 --- a/community/modules/compute/gke-job-template/main.tf +++ b/community/modules/compute/gke-job-template/main.tf @@ -38,39 +38,38 @@ locals { ) ) millicpu = floor(local.cpu_request * 1000) - should_request_cpu = local.millicpu >= 0 + cpu_request_string = local.millicpu >= 0 ? "${local.millicpu}m" : null full_node_request = local.min_allocatable_cpu >= 0 && var.requested_cpu_per_pod < 0 - should_request_gpu = alltrue(var.has_gpu) # arbitrarily, user can edit in template. # May come from node pool in future. - gpu_limit = 1 + gpu_limit_string = alltrue(var.has_gpu) ? "1" : null suffix = var.random_name_sufix ? "-${random_id.resource_name_suffix.hex}" : "" - has_node_selector = var.machine_family != null || length(var.node_selectors) != 0 + machine_family_node_selector = var.machine_family != null ? [{ + key = "cloud.google.com/machine-family" + value = var.machine_family + }] : [] + node_selectors = concat(local.machine_family_node_selector, var.node_selectors) job_template_contents = templatefile( "${path.module}/templates/gke-job-base.yaml.tftpl", { - name = var.name - suffix = local.suffix - image = var.image - command = var.command - node_count = var.node_count - machine_family = var.machine_family - node_pool_names = var.node_pool_name - node_selectors = var.node_selectors - has_node_selector = local.has_node_selector - should_request_cpu = local.should_request_cpu - full_node_request = local.full_node_request - millicpu_request = "${local.millicpu}m" - should_request_gpu = local.should_request_gpu - gpu_limit = local.gpu_limit - restart_policy = var.restart_policy - backoff_limit = var.backoff_limit - tolerations = distinct(var.tolerations) - labels = local.labels + name = var.name + suffix = local.suffix + image = var.image + command = var.command + node_count = var.node_count + node_pool_names = var.node_pool_name + node_selectors = local.node_selectors + full_node_request = local.full_node_request + cpu_request = local.cpu_request_string + gpu_limit = local.gpu_limit_string + restart_policy = var.restart_policy + backoff_limit = var.backoff_limit + tolerations = distinct(var.tolerations) + labels = local.labels } ) diff --git a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl index cced1a9d30..09f609424f 100644 --- a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl +++ b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl @@ -25,13 +25,10 @@ spec: - ${node_pool} %{~ endfor ~} %{~ endif ~} - %{~ if has_node_selector ~} + %{~ if length(node_selectors) > 0 ~} nodeSelector: - %{~ if machine_family != null ~} - cloud.google.com/machine-family: ${machine_family} - %{~ endif ~} - %{~ for key, val in node_selectors ~} - ${key}: ${val} + %{~ for selector in node_selectors ~} + ${selector.key}: ${selector.value} %{~ endfor ~} %{~ endif ~} tolerations: @@ -45,20 +42,20 @@ spec: - name: ${name}-container image: ${image} command: [%{~ for s in command ~}"${s}",%{~ endfor ~}] - %{~ if should_request_cpu || should_request_gpu ~} + %{~ if gpu_limit != null || cpu_request != null ~} resources: - %{~ if should_request_gpu ~} + %{~ if gpu_limit != null ~} limits: # GPUs should only be specified as limits # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/ nvidia.com/gpu: ${gpu_limit} %{~ endif ~} - %{~ if should_request_cpu ~} + %{~ if cpu_request != null ~} requests: %{~ if full_node_request ~} # cpu request attempts full node per pod %{~ endif ~} - cpu: ${millicpu_request} + cpu: ${cpu_request} %{~ endif ~} %{~ endif ~} restartPolicy: ${restart_policy} From 93b876b7761b819c5262a9d11269d9e53595f28d Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 13 Jun 2023 13:17:15 -0700 Subject: [PATCH 21/92] `resreader.go` code clean up (#1445) --- pkg/modulereader/resreader.go | 20 ++++---------------- pkg/modulereader/resreader_test.go | 10 ---------- 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/pkg/modulereader/resreader.go b/pkg/modulereader/resreader.go index ec10a45252..6b11fc7e3c 100644 --- a/pkg/modulereader/resreader.go +++ b/pkg/modulereader/resreader.go @@ -185,25 +185,13 @@ var kinds = map[string]ModReader{ "packer": NewPackerReader(), } -// IsValidReaderKind returns true if the kind input is valid -func IsValidReaderKind(input string) bool { - for k := range kinds { - if k == input { - return true - } - } - return false -} - // Factory returns a ModReader of type 'kind' func Factory(kind string) ModReader { - for k, v := range kinds { - if kind == k { - return v - } + r, ok := kinds[kind] + if !ok { + log.Fatalf("Invalid request to create a reader of kind %s", kind) } - log.Fatalf("Invalid request to create a reader of kind %s", kind) - return nil + return r } func defaultAPIList(source string) []string { diff --git a/pkg/modulereader/resreader_test.go b/pkg/modulereader/resreader_test.go index d5be2e3b6d..6aeae64127 100644 --- a/pkg/modulereader/resreader_test.go +++ b/pkg/modulereader/resreader_test.go @@ -72,16 +72,6 @@ func Test(t *testing.T) { TestingT(t) } -// modulereader.go -func (s *MySuite) TestIsValidKind(c *C) { - c.Assert(IsValidReaderKind(pkrKindString), Equals, true) - c.Assert(IsValidReaderKind(tfKindString), Equals, true) - c.Assert(IsValidReaderKind("Packer"), Equals, false) - c.Assert(IsValidReaderKind("Terraform"), Equals, false) - c.Assert(IsValidReaderKind("META"), Equals, false) - c.Assert(IsValidReaderKind(""), Equals, false) -} - func (s *MySuite) TestGetOutputsAsMap(c *C) { // Simple: empty outputs modInfo := ModuleInfo{} From 9b085a1565f2e9bc3ec972805a264405310e2169 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 13 Jun 2023 14:32:58 -0700 Subject: [PATCH 22/92] Do not use `log.Fatal` in `pkg/config` (#1444) Motivation: * `pkg/config` is a library, it should control lifyce of the application; * Improve tests failure investigation. --- cmd/root.go | 3 +++ pkg/config/config.go | 55 ++++++++++++++++++++++----------------- pkg/config/expand.go | 18 +++++-------- pkg/config/expand_test.go | 3 +-- pkg/config/validate.go | 16 ++++-------- 5 files changed, 47 insertions(+), 48 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index f0c23dfea0..220d829336 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -57,6 +57,9 @@ HPC deployments on the Google Cloud Platform.`, // Execute the root command func Execute() error { + // Don't prefix messages with data & time to improve readability. + // See https://pkg.go.dev/log#pkg-constants + log.SetFlags(0) mismatch, branch, hash, dir := checkGitHashMismatch() if mismatch { diff --git a/pkg/config/config.go b/pkg/config/config.go index f4c477ffae..f660368327 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -350,9 +350,15 @@ func (dc *DeploymentConfig) ExpandConfig() error { } dc.Config.setGlobalLabels() dc.Config.addKindToModules() - dc.validateConfig() - dc.expand() - dc.validate() + if err := dc.validateConfig(); err != nil { + return err + } + if err := dc.expand(); err != nil { + return err + } + if err := dc.validate(); err != nil { + return err + } return nil } @@ -602,45 +608,46 @@ func checkBackends(bp Blueprint) error { } // validateConfig runs a set of simple early checks on the imported input YAML -func (dc *DeploymentConfig) validateConfig() { - _, err := dc.Config.DeploymentName() - if err != nil { - log.Fatal(err) +func (dc *DeploymentConfig) validateConfig() error { + + if _, err := dc.Config.DeploymentName(); err != nil { + return err } - err = dc.Config.checkBlueprintName() - if err != nil { - log.Fatal(err) + + if err := dc.Config.checkBlueprintName(); err != nil { + return err } - if err = dc.validateVars(); err != nil { - log.Fatal(err) + if err := dc.validateVars(); err != nil { + return err } - if err = dc.Config.checkModulesInfo(); err != nil { - log.Fatal(err) + if err := dc.Config.checkModulesInfo(); err != nil { + return err } - if err = checkModulesAndGroups(dc.Config.DeploymentGroups); err != nil { - log.Fatal(err) + if err := checkModulesAndGroups(dc.Config.DeploymentGroups); err != nil { + return err } // checkPackerGroups must come after checkModulesAndGroups, in which group // Kind is set and aligned with module Kinds - if err = checkPackerGroups(dc.Config.DeploymentGroups); err != nil { - log.Fatal(err) + if err := checkPackerGroups(dc.Config.DeploymentGroups); err != nil { + return err } - if err = checkUsedModuleNames(dc.Config); err != nil { - log.Fatal(err) + if err := checkUsedModuleNames(dc.Config); err != nil { + return err } - if err = checkBackends(dc.Config); err != nil { - log.Fatal(err) + if err := checkBackends(dc.Config); err != nil { + return err } - if err = checkModuleSettings(dc.Config); err != nil { - log.Fatal(err) + if err := checkModuleSettings(dc.Config); err != nil { + return err } + return nil } // SkipValidator marks validator(s) as skipped, diff --git a/pkg/config/expand.go b/pkg/config/expand.go index 5f0ee19eb6..d0328e42c4 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -45,37 +45,33 @@ var ( // expand expands variables and strings in the yaml config. Used directly by // ExpandConfig for the create and expand commands. -func (dc *DeploymentConfig) expand() { +func (dc *DeploymentConfig) expand() error { if err := dc.addMetadataToModules(); err != nil { log.Printf("could not determine required APIs: %v", err) } if err := dc.expandBackends(); err != nil { - log.Fatalf("failed to apply default backend to deployment groups: %v", err) + return fmt.Errorf("failed to apply default backend to deployment groups: %v", err) } if err := dc.addDefaultValidators(); err != nil { - log.Fatalf( - "failed to update validators when expanding the config: %v", err) + return fmt.Errorf("failed to update validators when expanding the config: %v", err) } if err := dc.combineLabels(); err != nil { - log.Fatalf( - "failed to update module labels when expanding the config: %v", err) + return fmt.Errorf("failed to update module labels when expanding the config: %v", err) } if err := dc.applyUseModules(); err != nil { - log.Fatalf( - "failed to apply \"use\" modules when expanding the config: %v", err) + return fmt.Errorf("failed to apply \"use\" modules when expanding the config: %v", err) } if err := dc.applyGlobalVariables(); err != nil { - log.Fatalf( - "failed to apply deployment variables in modules when expanding the config: %v", - err) + return fmt.Errorf("failed to apply deployment variables in modules when expanding the config: %v", err) } dc.Config.populateOutputs() + return nil } func (dc *DeploymentConfig) addMetadataToModules() error { diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 1d1c8309e7..513f59c770 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -24,8 +24,7 @@ import ( func (s *MySuite) TestExpand(c *C) { dc := getDeploymentConfigForTest() - fmt.Println("TEST_DEBUG: If tests die without report, check TestExpand") - dc.expand() + c.Check(dc.expand(), IsNil) } func (s *MySuite) TestExpandBackends(c *C) { diff --git a/pkg/config/validate.go b/pkg/config/validate.go index e9aea870a1..9207191724 100644 --- a/pkg/config/validate.go +++ b/pkg/config/validate.go @@ -50,24 +50,18 @@ func (err *InvalidSettingError) Error() string { } // validate is the top-level function for running the validation suite. -func (dc DeploymentConfig) validate() { - // Drop the flags for log to improve readability only for running the validation suite - log.SetFlags(0) - +func (dc DeploymentConfig) validate() error { // variables should be validated before running validators if err := dc.executeValidators(); err != nil { - log.Fatal(err) + return err } - if err := dc.validateModules(); err != nil { - log.Fatal(err) + return err } if err := dc.validateModuleSettings(); err != nil { - log.Fatal(err) + return err } - - // Set it back to the initial value - log.SetFlags(log.LstdFlags) + return nil } // performs validation of global variables From 3646ab0c907e0babe40739d66eb57da29e1c4fb3 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 13 Jun 2023 15:14:03 -0700 Subject: [PATCH 23/92] Remove `Module.RequiredApis` (#1446) * Remove `Module.RequiredApis`; * Figure out list of APIs during validation. --- modules/README.md | 27 +------- pkg/config/config.go | 1 - pkg/config/config_test.go | 12 ++-- pkg/config/expand.go | 24 ------- pkg/config/validate.go | 64 ++++--------------- pkg/config/validator_test.go | 50 --------------- .../.ghpc/artifacts/expanded_blueprint.yaml | 16 ----- .../.ghpc/artifacts/expanded_blueprint.yaml | 6 -- .../.ghpc/artifacts/expanded_blueprint.yaml | 4 -- 9 files changed, 21 insertions(+), 183 deletions(-) diff --git a/modules/README.md b/modules/README.md index 9809b8e473..b53aaeee2c 100644 --- a/modules/README.md +++ b/modules/README.md @@ -433,34 +433,11 @@ in the project used by the HPC environment. For example, the [creation of VMs](compute/vm-instance/) requires the Compute Engine API (compute.googleapis.com). The [startup-script](scripts/startup-script/) module requires the Cloud Storage API (storage.googleapis.com) for storage of the -scripts themselves. Each module includes in the Toolkit source code describes -its required APIs internally. The Toolkit will merge the requiements from all +scripts themselves. Each module included in the Toolkit source code describes +its required APIs internally. The Toolkit will merge the requirements from all modules and [automatically validate](../README.md#blueprint-validation) that all APIs are enabled in the project specified by `$(vars.project_id)`. -For advanced multi-project use cases and for modules not included with the -Toolkit, you may manually add required APIs to each module with the following -format: - -```yaml -deployment_groups: -- group: primary - modules: - ... - - id: examplevm - source: modules/example/module - required_apis: - $(vars.project_id): - - compute.googleapis.com - - storage.googleapis.com - $(vars.other_project_id): - - storage.googleapis.com - explicit-project-id: - - file.googleapis.com - settings: - ... -``` - ## Common Settings The following common naming conventions should be used to decrease the verbosity diff --git a/pkg/config/config.go b/pkg/config/config.go index f660368327..0338d0b885 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -303,7 +303,6 @@ type Module struct { WrapSettingsWith map[string][]string Outputs []modulereader.OutputInfo `yaml:"outputs,omitempty"` Settings Dict - RequiredApis map[string][]string `yaml:"required_apis"` } // createWrapSettingsWith ensures WrapSettingsWith field is not nil, if it is diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 23279f184f..8b445cf8cb 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -204,8 +204,7 @@ func getDeploymentConfigForTest() DeploymentConfig { setTestModuleInfo(testModule, testModuleInfo) setTestModuleInfo(testModuleWithLabels, testModuleInfo) - // the next two steps simulate relevant steps in ghpc expand - dc.addMetadataToModules() + // the next step simulates relevant step in ghpc expand dc.addDefaultValidators() return dc @@ -224,6 +223,7 @@ func getBasicDeploymentConfigWithTestModule() DeploymentConfig { }, }, } + return DeploymentConfig{ Config: Blueprint{ BlueprintName: "simple", @@ -356,7 +356,6 @@ func getMultiGroupDeploymentConfig() DeploymentConfig { }, } - dc.addMetadataToModules() dc.addDefaultValidators() return dc } @@ -393,7 +392,12 @@ func getDeploymentConfigWithTestModuleEmptyKind() DeploymentConfig { // config.go func (s *MySuite) TestExpandConfig(c *C) { dc := getBasicDeploymentConfigWithTestModule() - dc.ExpandConfig() + for v := range dc.getValidators() { // skip all validators + dc.Config.Validators = append( + dc.Config.Validators, + validatorConfig{Validator: v, Skip: true}) + } + c.Check(dc.ExpandConfig(), IsNil) } func (s *MySuite) TestCheckModulesAndGroups(c *C) { diff --git a/pkg/config/expand.go b/pkg/config/expand.go index d0328e42c4..fe8344bf46 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -16,7 +16,6 @@ package config import ( "fmt" - "log" "regexp" "strings" @@ -46,10 +45,6 @@ var ( // expand expands variables and strings in the yaml config. Used directly by // ExpandConfig for the create and expand commands. func (dc *DeploymentConfig) expand() error { - if err := dc.addMetadataToModules(); err != nil { - log.Printf("could not determine required APIs: %v", err) - } - if err := dc.expandBackends(); err != nil { return fmt.Errorf("failed to apply default backend to deployment groups: %v", err) } @@ -74,25 +69,6 @@ func (dc *DeploymentConfig) expand() error { return nil } -func (dc *DeploymentConfig) addMetadataToModules() error { - return dc.Config.WalkModules(func(mod *Module) error { - if mod.RequiredApis != nil { - return nil - } - if dc.Config.Vars.Get("project_id").Type() != cty.String { - return fmt.Errorf("global variable project_id must be defined") - } - requiredAPIs := mod.InfoOrDie().RequiredApis - if requiredAPIs == nil { - requiredAPIs = []string{} - } - mod.RequiredApis = map[string][]string{ - "$(vars.project_id)": requiredAPIs, - } - return nil - }) -} - func (dc *DeploymentConfig) expandBackends() error { // 1. DEFAULT: use TerraformBackend configuration (if supplied) in each // resource group diff --git a/pkg/config/validate.go b/pkg/config/validate.go index 9207191724..41e209741a 100644 --- a/pkg/config/validate.go +++ b/pkg/config/validate.go @@ -28,7 +28,6 @@ import ( "github.com/pkg/errors" "github.com/zclconf/go-cty/cty" "golang.org/x/exp/maps" - "golang.org/x/exp/slices" "gopkg.in/yaml.v3" ) @@ -316,67 +315,26 @@ func (dc *DeploymentConfig) getValidators() map[string]func(validatorConfig) err return allValidators } -// The expected use case of this function is to merge blueprint requirements -// that are maps from project_id to string slices containing APIs or IAM roles -// required for provisioning. It will remove duplicate elements and ensure that -// the output is sorted for easy visual and automatic comparison. -// Solution: merge []string of new[key] into []string of base[key], removing -// duplicate elements and sorting the result -func mergeBlueprintRequirements(base map[string][]string, new map[string][]string) map[string][]string { - dest := make(map[string][]string) - maps.Copy(dest, base) - - // sort each value in dest in-place to ensure output is sorted when new map - // does not contain all keys in base - for _, v := range dest { - slices.Sort(v) - } - - for newProject, newRequirements := range new { - // this code is safe even if dest[newProject] has not yet been populated - dest[newProject] = append(dest[newProject], newRequirements...) - slices.Sort(dest[newProject]) - dest[newProject] = slices.Compact(dest[newProject]) - } - return dest -} - func (dc *DeploymentConfig) testApisEnabled(c validatorConfig) error { if err := c.check(testApisEnabledName, []string{}); err != nil { return err } - requiredApis := make(map[string][]string) - for _, grp := range dc.Config.DeploymentGroups { - for _, mod := range grp.Modules { - requiredApis = mergeBlueprintRequirements(requiredApis, mod.RequiredApis) - } + pv := dc.Config.Vars.Get("project_id") + if pv.Type() != cty.String { + return fmt.Errorf("the deployment variable `project_id` is either not set or is not a string") } - var errored bool - for project, apis := range requiredApis { - if hasVariable(project) { - expr, err := SimpleVarToExpression(project) - if err != nil { - return err - } - v, err := expr.Eval(dc.Config) - if err != nil { - return err - } - if v.Type() != cty.String { - return fmt.Errorf("the deployment variable %s is not a string", project) - } - project = v.AsString() - } - err := validators.TestApisEnabled(project, apis) - if err != nil { - log.Println(err) - errored = true + apis := map[string]bool{} + dc.Config.WalkModules(func(m *Module) error { + for _, api := range m.InfoOrDie().RequiredApis { + apis[api] = true } - } + return nil + }) - if errored { + if err := validators.TestApisEnabled(pv.AsString(), maps.Keys(apis)); err != nil { + log.Println(err) return fmt.Errorf(funcErrorMsgTemplate, testApisEnabledName.String()) } return nil diff --git a/pkg/config/validator_test.go b/pkg/config/validator_test.go index 10ed07ce5a..38809ae811 100644 --- a/pkg/config/validator_test.go +++ b/pkg/config/validator_test.go @@ -17,14 +17,11 @@ package config import ( "fmt" "path/filepath" - "sort" "hpc-toolkit/pkg/modulereader" "github.com/pkg/errors" "github.com/zclconf/go-cty/cty" - "golang.org/x/exp/maps" - "golang.org/x/exp/slices" . "gopkg.in/check.v1" ) @@ -192,53 +189,6 @@ func (s *MySuite) TestAddDefaultValidators(c *C) { c.Assert(dc.Config.Validators, HasLen, 7) } -func (s *MySuite) TestMergeBlueprintRequirements(c *C) { - map1 := make(map[string][]string) - map2 := make(map[string][]string) - - // each expected value should individually be sorted and have no duplicate - // elements, although different values may share elements - expectedValues1 := []string{"bar", "bat"} - expectedValues2 := []string{"value2", "value3"} - - reversedValues1 := slices.Clone(expectedValues1) - sort.Sort(sort.Reverse(sort.StringSlice(reversedValues1))) - - // TEST: merge with identical keys and duplicate elements in values - map1["key1"] = slices.Clone(reversedValues1) - map2["key1"] = []string{expectedValues1[0], expectedValues1[0]} - map3 := mergeBlueprintRequirements(map1, map2) - - // expected value (duplicates removed and sorted) - expectedMap := map[string][]string{ - "key1": expectedValues1, - } - c.Assert(maps.EqualFunc(map3, expectedMap, slices.Equal[string]), Equals, true) - - // unexpected value (duplicates removed and reverse sorted) - unexpectedMap := map[string][]string{ - "key1": reversedValues1, - } - c.Assert(maps.EqualFunc(map3, unexpectedMap, slices.Equal[string]), Equals, false) - - // TEST: merge with additional key in 1st map - map1["key2"] = []string{expectedValues2[1], expectedValues2[0]} - map3 = mergeBlueprintRequirements(map1, map2) - - // test the expected value (duplicates removed and sorted) - expectedMap = map[string][]string{ - "key1": slices.Clone(expectedValues1), - "key2": slices.Clone(expectedValues2), - } - c.Assert(maps.EqualFunc(map3, expectedMap, slices.Equal[string]), Equals, true) - - // TEST: merge with additional key in 2nd map (expected value unchanged!) - delete(map1, "key2") - map2["key2"] = slices.Clone(expectedValues2) - map3 = mergeBlueprintRequirements(map1, map2) - c.Assert(maps.EqualFunc(map3, expectedMap, slices.Equal[string]), Equals, true) -} - func (s *MySuite) TestExecuteValidators(c *C) { dc := getDeploymentConfigForTest() dc.Config.Validators = []validatorConfig{ diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml index 5d3066cb2e..80d461a90b 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml @@ -63,9 +63,6 @@ deployment_groups: deployment_name: ((var.deployment_name )) project_id: ((var.project_id )) region: ((var.region )) - required_apis: - $(vars.project_id): - - compute.googleapis.com - source: modules/file-system/filestore kind: terraform id: homefs @@ -85,9 +82,6 @@ deployment_groups: project_id: ((var.project_id )) region: ((var.region )) zone: ((var.zone )) - required_apis: - $(vars.project_id): - - file.googleapis.com - source: modules/file-system/filestore kind: terraform id: projectsfs @@ -107,9 +101,6 @@ deployment_groups: project_id: ((var.project_id )) region: ((var.region )) zone: ((var.zone )) - required_apis: - $(vars.project_id): - - file.googleapis.com - source: modules/scripts/startup-script kind: terraform id: script @@ -135,9 +126,6 @@ deployment_groups: echo "Hello, World!" destination: hello.sh type: shell - required_apis: - $(vars.project_id): - - storage.googleapis.com kind: terraform - group: one terraform_backend: @@ -161,10 +149,6 @@ deployment_groups: startup_script: ((module.script.startup_script)) subnetwork_name: ((module.network0.subnetwork_name)) zone: ((var.zone )) - required_apis: - $(vars.project_id): - - compute.googleapis.com - - storage.googleapis.com kind: packer terraform_backend_defaults: type: "" diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml index a285178473..6f82f91e27 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml @@ -65,9 +65,6 @@ deployment_groups: deployment_name: ((var.deployment_name )) project_id: ((var.project_id )) region: ((var.region )) - required_apis: - $(vars.project_id): - - compute.googleapis.com kind: terraform - group: one terraform_backend: @@ -94,9 +91,6 @@ deployment_groups: project_id: ((var.project_id )) region: ((var.region )) zone: ((var.zone )) - required_apis: - $(vars.project_id): - - file.googleapis.com kind: terraform terraform_backend_defaults: type: "" diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml index 7398085201..90dbabb5ae 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml @@ -68,10 +68,6 @@ deployment_groups: project_id: ((var.project_id)) subnetwork_name: \$(purple zone: ((var.zone)) - required_apis: - $(vars.project_id): - - compute.googleapis.com - - storage.googleapis.com kind: packer terraform_backend_defaults: type: "" From 5cd81595be333ca4746668cb8278ba45909fa1b7 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 13 Jun 2023 22:46:18 -0700 Subject: [PATCH 24/92] Add `docs/module-guidelines.md` (#1423) --- docs/module-guidelines.md | 185 ++++++++++++++++++++++++++++++++++++++ modules/README.md | 42 +-------- 2 files changed, 187 insertions(+), 40 deletions(-) create mode 100644 docs/module-guidelines.md diff --git a/docs/module-guidelines.md b/docs/module-guidelines.md new file mode 100644 index 0000000000..19a681e61b --- /dev/null +++ b/docs/module-guidelines.md @@ -0,0 +1,185 @@ +# Module authoring guidelines + +Modules should adhere to following guidelines. + +## Terraform Requirements + +The module source field must point to a single terraform module. We recommend +the following structure: + +* `main.tf` file composing the terraform resources using provided variables. +* `variables.tf` file defining the variables used. +* (Optional) `outputs.tf` file defining any exported outputs used (if any). +* (Optional) `modules/` sub-directory pointing to submodules needed to create the + top level module. + +## General Best Practices + +* Variables for environment-specific values (like `project_id`) should not be + given defaults. This forces the calling module to provide meaningful values. +* Variables should only have zero-value defaults, such as null (preferred) or empty string, + where leaving the variable empty is a valid preference which will not be + rejected by the underlying API(s). +* Set good defaults wherever possible. Be opinionated about HPC use cases. +* Follow common variable [naming conventions](#use-common-names-and-types-for-common-variables). +* If there are common hpc-toolkit variables already defined, then do not set defaults (`region`, `zone`, `project_id`, `deployment_name`, etc.) +* All files should contain a license header. Headers can be added automatically using [addlicense](https://github.com/google/addlicense), + or `make add-google-license` if adding a Google License. +* No `provider` blocks should be defined in re-usable modules. It is OK to impose a range of acceptable provider versions. + In the case on conflicts, the root module will configure all providers and pass alternatives as an alias. See: +https://developer.hashicorp.com/terraform/language/modules/develop/providers#implicit-provider-inheritance + +## Terraform Coding Standards + +Any Terraform based modules in the HPC Toolkit should implement the following +standards: + +* `terraform-docs` is used to generate `README` files for each module. +* The order for parameters in inputs should be: + * `description` + * `type` + * `default` +* The order for parameters in outputs should be: + * `description` + * `value` + +## Do not create resources that can be passed externally + +Do not create resources that can be passed externally unless: +* resource has to be owned uniquely by the module; +* resource has to conform to module specific constraints (e.g. vm-instance with particular image, or firewall rule to serve needs of this module); +* the resource cannot possibly be (re)used outside of this module. + +Examples resources already provided by core toolkit modules: + +* [vm-instance](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/compute/vm-instance) +* [vpc & subnetworks](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/network) +* [filestore](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/file-system/filestore) + +File systems, networks, service accounts, GCS buckets can all be passed into the module and should not be created within the module. + +## Prefer FQN over ambiguous formats + +For instance, in networks `network_self_link` over `network_name`, `subnetwork_self_link` over `subnetwork_name`, as these immediately work with shared VPCs, and already specify `region/zone/project_ids`. + +## All resources should be labeled + +The module, if it creates any resource, should take a variable called `labels` and apply it to every resource. + +```hcl +variable "labels" { + description = "Labels to add to the resources. Key-value pairs." + type = map(string) +} +``` + +If the module creates its own labels, then we recommend merging user-provided labels into the module’s labels: + +```hcl +locals { + labels = merge(var.labels, { ghpc_module = "my-module" }) +} +``` + +## Use common names and types for common variables + +Matching names allow implicitly inject variables into the module. + +* `project_id {type=string}` - the GCP project ID in which to create the GCP resources; +* `labels {type=map(string)}` - [labels](https://cloud.google.com/resource-manager/docs/creating-managing-labels) added to the module. In order to include any module in advanced + monitoring, labels must be exposed. We strongly recommend that all modules + expose this variable. It also makes it easy for customers to filter resources on the cloud console and billing; +* `region {type=string}` - the GCP; + [region](https://cloud.google.com/compute/docs/regions-zones) the module will be created in; +* `zone {type=string}` - the GCP [zone](https://cloud.google.com/compute/docs/regions-zones) + the module will be created in; +* `deployment_name {type=string}` - the name of the current deployment of a blueprint. This + can help to avoid naming conflicts of modules when multiple deployments are + created from the same blueprint. `deployment_name` is often used to determine default resource names, or a prefix to the resource names e.g. [`modules/filestore.deploy_name`](../modules/file-system/filestore/README.md#inputs); + +### `instance_image {type=object({family=string,project=string})}` + +To take/return information about instance image use variable `instance_image`. If it's critical for the module to include the `name` use `type=map(string)` instead. + +### `enable_oslogin {type=string}` + +When relevant, Enable or Disable OS Login with `"ENABLE"` or `"DISABLE"`. Set to `"INHERIT"` to inherit the project OS Login setting. . Note this ongoing development is not yet fully homogenized in the Cloud HPC Toolkit. + +### Network + +Properties of networks are represented by scattered variables: +* `network_name` - name of the network (avoid using this); +* `network_id` - ID of the network; +* `network_self_link` - URI of the VPC (preferred); +* `subnetwork_name` - the name of the primary subnetwork; +* `subnetwork_self_link` - self-link to the primary subnetwork (preferred); +* `subnetwork_address` - address range of the primary subnetwork. + +### Network Storage + +If your module provides a mountable network storage it should output `network_storage` of type: + +```hcl +object({ + server_ip = string + remote_mount = string + local_mount = string + fs_type = string + mount_options = string + client_install_runner = map(string) # Runner for installing client + mount_runner = map(string) # Runner to mount the file-system +}) +``` + +If a module returns multiple "storages" it should output `network_storage` of type `list(object(... same as above...))`. + +If a module consumes network storage it should have a variable `network_storage` of type `list(object(... any subset of fields from above ...))`. + +## Use startup-script module + +If there is a need to execute shell script, ansible playbook or just upload file to the vm-instance, consider using `startup-script` [module](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/scripts/startup-script) as a first option. `startup-script` module takes care of uploading local files to the GCS and downloading files to the vm-instance, and installing ansible if needed, configuring ssh, and executing requested scripts. + +To represent a script to execute HPC Toolkit modules use "runners". Runner is a `map(string)` with following expected fields: + +* `destination`: (Required) The name of the file at the destination VM; +* `type`: (Required) One of the following: shell, ansible-local, and data; +* `content`: (Optional) Content to be uploaded and executed; +* `source`: (Optional) A path to the file or data you want to upload; +* `args`: (Optional) Arguments to be passed to the executable. + +If your module consumes/produces scripts to run vm-instances, please adhere to this format. + +`startup-script` module example of usage: + +```hcl +variable "extra_runner" { + description = "Custom script provided by user to run on vm-instance" + type = map(string) +} +... +locals { + common_runner = { # some setup required by your module + "type" = "shell" + "content" = "echo Hello" + "destination" = "say_hi.sh" + } + runners = [local.common_runner, var.extra_runner] +} +... +module "startup_script" { + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script" + deployment_name = var.deployment_name + project_id = var.project_id + region = var.region + labels = local.labels + runners = local.runners +} +... +module "vm" { + source = "github.com/GoogleCloudPlatform/hpc-toolkit//modules/compute/vm-instance" + ... + startup_script = module.startup_script.startup_script +} +``` + +For more information see [startup-script/README](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/modules/scripts/startup-script#readme). diff --git a/modules/README.md b/modules/README.md index b53aaeee2c..931a210756 100644 --- a/modules/README.md +++ b/modules/README.md @@ -449,7 +449,7 @@ For example, if all modules are to be created in a single region, that region can be defined as a deployment variable named `region`, which is shared between all modules without an explicit setting. Similarly, if many modules need to be connected to the same VPC network, they all can add the vpc module ID to their -`use` list so that `network_name` would be inferred from that vpc module rather +`use` list so that `network_self_link` would be inferred from that vpc module rather than having to set it manually. * **project_id**: The GCP project ID in which to create the GCP resources. @@ -461,7 +461,6 @@ than having to set it manually. will be created in. * **zone**: The GCP [zone](https://cloud.google.com/compute/docs/regions-zones) the module will be created in. -* **network_name**: The name of the network a module will use or connect to. * **labels**: [Labels](https://cloud.google.com/resource-manager/docs/creating-managing-labels) added to the module. In order to include any module in advanced @@ -470,42 +469,5 @@ than having to set it manually. ## Writing Custom HPC Modules -Modules are flexible by design, however we do define some best practices when +Modules are flexible by design, however we do define some [best practices](../docs/module-guidelines.md) when creating a new module meant to be used with the HPC Toolkit. - -### Terraform Requirements - -The module source field must point to a single terraform module. We recommend -the following structure: - -* main.tf file composing the terraform resources using provided variables. -* variables.tf file defining the variables used. -* (Optional) outputs.tf file defining any exported outputs used (if any). -* (Optional) modules/ sub-directory pointing to submodules needed to create the - top level module. - -### General Best Practices - -* Variables for environment-specific values (like project_id) should not be - given defaults. This forces the calling module to provide meaningful values. -* Variables should only have zero-value defaults (like null or empty strings) - where leaving the variable empty is a valid preference which will not be - rejected by the underlying API(s). -* Set good defaults wherever possible. Be opinionated about HPC use cases. -* Follow common variable [naming conventions](#common-settings). - -### Terraform Coding Standards - -Any Terraform based modules in the HPC Toolkit should implement the following -standards: - -* terraform-docs is used to generate README files for each module. -* The first parameter listed under a module should be source (when referring to - an external implementation). -* The order for parameters in inputs should be: - * description - * type - * default -* The order for parameters in outputs should be: - * description - * value From c9fd7cb4e396008f2e869a11fb3f9fec96372810 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 14 Jun 2023 08:54:30 -0500 Subject: [PATCH 25/92] Improve Packer experience for Windows Automatically set metadata to create, configure, and delete the user needed to run Powershell scripts. Because this user is already an administrator, it is not necessary to use the elevated_user attribute on the Powershell provisioner. --- modules/packer/custom-image/image.pkr.hcl | 44 +++++++++++-------- .../igc_pkr/one/image/image.pkr.hcl | 44 +++++++++++-------- .../text_escape/zero/lime/image.pkr.hcl | 44 +++++++++++-------- 3 files changed, 75 insertions(+), 57 deletions(-) diff --git a/modules/packer/custom-image/image.pkr.hcl b/modules/packer/custom-image/image.pkr.hcl index bbcd601ec5..9098d32b05 100644 --- a/modules/packer/custom-image/image.pkr.hcl +++ b/modules/packer/custom-image/image.pkr.hcl @@ -18,9 +18,25 @@ locals { image_name_default = "${local.image_family}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" image_name = var.image_name != null ? var.image_name : local.image_name_default + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap + # construct metadata from startup_script and metadata variables startup_script_metadata = var.startup_script == null ? {} : { startup-script = var.startup_script } - user_management_metadata = { + + linux_user_metadata = { block-project-ssh-keys = "TRUE" shutdown-script = <<-EOT #!/bin/bash @@ -28,30 +44,21 @@ locals { sed -i '/${var.ssh_username}/d' /var/lib/google/google_users EOT } + windows_packer_user = "packer_user" + windows_user_metadata = { + sysprep-specialize-script-cmd = "winrm quickconfig -quiet & net user /add ${local.windows_packer_user} & net localgroup administrators ${local.windows_packer_user} /add & winrm set winrm/config/service/auth @{Basic=\\\"true\\\"}" + windows-shutdown-script-cmd = "net user /delete ${local.windows_packer_user}" + } + user_metadata = local.communicator == "winrm" ? local.windows_user_metadata : local.linux_user_metadata # merge metadata such that var.metadata always overrides user management # metadata but always allow var.startup_script to override var.metadata metadata = merge( - local.user_management_metadata, + local.user_metadata, var.metadata, local.startup_script_metadata, ) - # default to explicit var.communicator, otherwise in-order: ssh/winrm/none - shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" - ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" - powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" - communicator = coalesce( - var.communicator, - local.shell_script_communicator, - local.ansible_playbook_communicator, - local.powershell_script_communicator, - "none" - ) - - # must not enable IAP when no communicator is in use - use_iap = local.communicator == "none" ? false : var.use_iap - # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) machine_family = local.machine_vals[0] @@ -127,8 +134,7 @@ build { labels = ["powershell"] for_each = var.powershell_scripts content { - elevated_user = "Administrator" - script = provisioner.value + script = provisioner.value } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl index bbcd601ec5..9098d32b05 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl @@ -18,9 +18,25 @@ locals { image_name_default = "${local.image_family}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" image_name = var.image_name != null ? var.image_name : local.image_name_default + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap + # construct metadata from startup_script and metadata variables startup_script_metadata = var.startup_script == null ? {} : { startup-script = var.startup_script } - user_management_metadata = { + + linux_user_metadata = { block-project-ssh-keys = "TRUE" shutdown-script = <<-EOT #!/bin/bash @@ -28,30 +44,21 @@ locals { sed -i '/${var.ssh_username}/d' /var/lib/google/google_users EOT } + windows_packer_user = "packer_user" + windows_user_metadata = { + sysprep-specialize-script-cmd = "winrm quickconfig -quiet & net user /add ${local.windows_packer_user} & net localgroup administrators ${local.windows_packer_user} /add & winrm set winrm/config/service/auth @{Basic=\\\"true\\\"}" + windows-shutdown-script-cmd = "net user /delete ${local.windows_packer_user}" + } + user_metadata = local.communicator == "winrm" ? local.windows_user_metadata : local.linux_user_metadata # merge metadata such that var.metadata always overrides user management # metadata but always allow var.startup_script to override var.metadata metadata = merge( - local.user_management_metadata, + local.user_metadata, var.metadata, local.startup_script_metadata, ) - # default to explicit var.communicator, otherwise in-order: ssh/winrm/none - shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" - ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" - powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" - communicator = coalesce( - var.communicator, - local.shell_script_communicator, - local.ansible_playbook_communicator, - local.powershell_script_communicator, - "none" - ) - - # must not enable IAP when no communicator is in use - use_iap = local.communicator == "none" ? false : var.use_iap - # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) machine_family = local.machine_vals[0] @@ -127,8 +134,7 @@ build { labels = ["powershell"] for_each = var.powershell_scripts content { - elevated_user = "Administrator" - script = provisioner.value + script = provisioner.value } } diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl index bbcd601ec5..9098d32b05 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl @@ -18,9 +18,25 @@ locals { image_name_default = "${local.image_family}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" image_name = var.image_name != null ? var.image_name : local.image_name_default + # default to explicit var.communicator, otherwise in-order: ssh/winrm/none + shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" + ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" + powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + communicator = coalesce( + var.communicator, + local.shell_script_communicator, + local.ansible_playbook_communicator, + local.powershell_script_communicator, + "none" + ) + + # must not enable IAP when no communicator is in use + use_iap = local.communicator == "none" ? false : var.use_iap + # construct metadata from startup_script and metadata variables startup_script_metadata = var.startup_script == null ? {} : { startup-script = var.startup_script } - user_management_metadata = { + + linux_user_metadata = { block-project-ssh-keys = "TRUE" shutdown-script = <<-EOT #!/bin/bash @@ -28,30 +44,21 @@ locals { sed -i '/${var.ssh_username}/d' /var/lib/google/google_users EOT } + windows_packer_user = "packer_user" + windows_user_metadata = { + sysprep-specialize-script-cmd = "winrm quickconfig -quiet & net user /add ${local.windows_packer_user} & net localgroup administrators ${local.windows_packer_user} /add & winrm set winrm/config/service/auth @{Basic=\\\"true\\\"}" + windows-shutdown-script-cmd = "net user /delete ${local.windows_packer_user}" + } + user_metadata = local.communicator == "winrm" ? local.windows_user_metadata : local.linux_user_metadata # merge metadata such that var.metadata always overrides user management # metadata but always allow var.startup_script to override var.metadata metadata = merge( - local.user_management_metadata, + local.user_metadata, var.metadata, local.startup_script_metadata, ) - # default to explicit var.communicator, otherwise in-order: ssh/winrm/none - shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" - ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" - powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" - communicator = coalesce( - var.communicator, - local.shell_script_communicator, - local.ansible_playbook_communicator, - local.powershell_script_communicator, - "none" - ) - - # must not enable IAP when no communicator is in use - use_iap = local.communicator == "none" ? false : var.use_iap - # determine best value for on_host_maintenance if not supplied by user machine_vals = split("-", var.machine_type) machine_family = local.machine_vals[0] @@ -127,8 +134,7 @@ build { labels = ["powershell"] for_each = var.powershell_scripts content { - elevated_user = "Administrator" - script = provisioner.value + script = provisioner.value } } From 2cdb9e7d0f6ca5265a7deff3be2c582436fcaf67 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Wed, 14 Jun 2023 11:42:43 -0700 Subject: [PATCH 26/92] Add inert `Module.RequiresApis` for backward compatibility (#1450) --- pkg/config/config.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/config/config.go b/pkg/config/config.go index 0338d0b885..361f3070fb 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -303,6 +303,8 @@ type Module struct { WrapSettingsWith map[string][]string Outputs []modulereader.OutputInfo `yaml:"outputs,omitempty"` Settings Dict + // DEPRECATED fields, keep in the struct for backwards compatibility + RequiredApis interface{} `yaml:"required_apis,omitempty"` } // createWrapSettingsWith ensures WrapSettingsWith field is not nil, if it is From 3bd16d83feac7141df36d4b02d129de23d259564 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 14 Jun 2023 15:46:58 -0700 Subject: [PATCH 27/92] Bump google.golang.org/api from 0.125.0 to 0.126.0 (#1430) Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.125.0 to 0.126.0. - [Release notes](https://github.com/googleapis/google-api-go-client/releases) - [Changelog](https://github.com/googleapis/google-api-go-client/blob/main/CHANGES.md) - [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.125.0...v0.126.0) --- updated-dependencies: - dependency-name: google.golang.org/api dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index e329dff200..d313946c92 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/google/go-cmp v0.5.9 github.com/hashicorp/terraform-exec v0.18.1 github.com/zclconf/go-cty-debug v0.0.0-20191215020915-b22d67c1ba0b - google.golang.org/api v0.125.0 + google.golang.org/api v0.126.0 ) require ( diff --git a/go.sum b/go.sum index 37d522970d..9a4ebce3eb 100644 --- a/go.sum +++ b/go.sum @@ -855,8 +855,8 @@ google.golang.org/api v0.96.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ google.golang.org/api v0.97.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ13s= google.golang.org/api v0.98.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ13s= google.golang.org/api v0.100.0/go.mod h1:ZE3Z2+ZOr87Rx7dqFsdRQkRBk36kDtp/h+QpHbB7a70= -google.golang.org/api v0.125.0 h1:7xGvEY4fyWbhWMHf3R2/4w7L4fXyfpRGE9g6lp8+DCk= -google.golang.org/api v0.125.0/go.mod h1:mBwVAtz+87bEN6CbA1GtZPDOqY2R5ONPqJeIlvyo4Aw= +google.golang.org/api v0.126.0 h1:q4GJq+cAdMAC7XP7njvQ4tvohGLiSlytuL4BQxbIZ+o= +google.golang.org/api v0.126.0/go.mod h1:mBwVAtz+87bEN6CbA1GtZPDOqY2R5ONPqJeIlvyo4Aw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= From eabc3340831b8d34af297c1b4881eca1999311bf Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Wed, 14 Jun 2023 16:10:28 -0700 Subject: [PATCH 28/92] Fix regex for validating GroupName (#1449) * Fix regex for validating GroupName * Address comments --- pkg/config/config.go | 28 ++++++++++++---------------- pkg/config/config_test.go | 20 ++++++++++++++++++++ pkg/config/validate.go | 4 ---- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 361f3070fb..ccb187b20f 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -47,18 +47,14 @@ var errorMessages = map[string]string{ "yamlMarshalError": "failed to export the configuration to a blueprint yaml file", "fileSaveError": "failed to write the expanded yaml", // expand - "missingSetting": "a required setting is missing from a module", - "globalLabelType": "deployment variable 'labels' are not a map", - "settingsLabelType": "labels in module settings are not a map", - "invalidVar": "invalid variable definition in", - "invalidMod": "invalid module reference", - "invalidDeploymentRef": "invalid deployment-wide reference (only \"vars\") is supported)", - "varNotFound": "Could not find source of variable", - "intergroupOrder": "References to outputs from other groups must be to earlier groups", - "referenceWrongGroup": "Reference specified the wrong group for the module", - "noOutput": "Output not found for a variable", - "groupNotFound": "The group ID was not found", - "cannotUsePacker": "Packer modules cannot be used by other modules", + "missingSetting": "a required setting is missing from a module", + "settingsLabelType": "labels in module settings are not a map", + "invalidVar": "invalid variable definition in", + "invalidMod": "invalid module reference", + "varNotFound": "Could not find source of variable", + "intergroupOrder": "References to outputs from other groups must be to earlier groups", + "noOutput": "Output not found for a variable", + "cannotUsePacker": "Packer modules cannot be used by other modules", // validator "emptyID": "a module id cannot be empty", "emptySource": "a module source cannot be empty", @@ -69,9 +65,7 @@ var errorMessages = map[string]string{ "duplicateGroup": "group names must be unique", "duplicateID": "module IDs must be unique", "emptyGroupName": "group name must be set for each deployment group", - "illegalChars": "invalid character(s) found in group name", "invalidOutput": "requested output was not found in the module", - "varNotDefined": "variable not defined", "valueNotString": "value was not of type string", "valueEmptyString": "value is an empty string", "labelNameReqs": "name must begin with a lowercase letter, can only contain lowercase letters, numeric characters, underscores and dashes, and must be between 1 and 63 characters long", @@ -92,8 +86,10 @@ func (n GroupName) Validate() error { if n == "" { return errors.New(errorMessages["emptyGroupName"]) } - if hasIllegalChars(string(n)) { - return fmt.Errorf("%s %s", errorMessages["illegalChars"], n) + + if !regexp.MustCompile(`^\w(-*\w)*$`).MatchString(string(n)) { + return fmt.Errorf("invalid character(s) found in group name %q.\n"+ + "Allowed : alphanumeric, '_', and '-'; can not start/end with '-'", n) } return nil } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 8b445cf8cb..868e7bc781 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -1041,3 +1041,23 @@ func (s *MySuite) TestCheckModuleSettings(c *C) { bp.Vars.Set("zebra", cty.StringVal("stripes")) c.Check(checkModuleSettings(bp), IsNil) } + +func (s *MySuite) TestGroupNameValidate(c *C) { + // Invalid + c.Check(GroupName("").Validate(), NotNil) + c.Check(GroupName("-").Validate(), NotNil) + c.Check(GroupName("-g").Validate(), NotNil) + c.Check(GroupName("g-").Validate(), NotNil) + c.Check(GroupName("g+").Validate(), NotNil) + c.Check(GroupName("a b").Validate(), NotNil) + + // Valid + c.Check(GroupName("g").Validate(), IsNil) + c.Check(GroupName("gg").Validate(), IsNil) + c.Check(GroupName("_g").Validate(), IsNil) + c.Check(GroupName("g_dd").Validate(), IsNil) + c.Check(GroupName("g_dd-ff").Validate(), IsNil) + c.Check(GroupName("g-dd_ff").Validate(), IsNil) + c.Check(GroupName("1").Validate(), IsNil) + c.Check(GroupName("12g").Validate(), IsNil) +} diff --git a/pkg/config/validate.go b/pkg/config/validate.go index 41e209741a..22b6040dc8 100644 --- a/pkg/config/validate.go +++ b/pkg/config/validate.go @@ -205,10 +205,6 @@ func validateModule(c Module) error { return nil } -func hasIllegalChars(name string) bool { - return !regexp.MustCompile(`^[\w\+]+(\s*)[\w-\+\.]+$`).MatchString(name) -} - func validateOutputs(mod Module) error { modInfo := mod.InfoOrDie() // Only get the map if needed From 6a993e99587b800e1e6b8aa740d905f95286e226 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Wed, 14 Jun 2023 21:27:54 -0700 Subject: [PATCH 29/92] Reduce size of expanded blueprint by adding `omitempty` where applicable (#1452) --- pkg/config/config.go | 22 +++++++++---------- pkg/config/config_test.go | 17 +++++--------- pkg/config/dict.go | 6 +++++ .../.ghpc/artifacts/expanded_blueprint.yaml | 22 ------------------- .../.ghpc/artifacts/expanded_blueprint.yaml | 20 ----------------- .../.ghpc/artifacts/expanded_blueprint.yaml | 17 -------------- 6 files changed, 23 insertions(+), 81 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index ccb187b20f..5cbe280324 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -97,7 +97,7 @@ func (n GroupName) Validate() error { // DeploymentGroup defines a group of Modules that are all executed together type DeploymentGroup struct { Name GroupName `yaml:"group"` - TerraformBackend TerraformBackend `yaml:"terraform_backend"` + TerraformBackend TerraformBackend `yaml:"terraform_backend,omitempty"` Modules []Module `yaml:"modules"` Kind ModuleKind } @@ -255,8 +255,8 @@ func (v validatorName) String() string { type validatorConfig struct { Validator string - Inputs Dict - Skip bool + Inputs Dict `yaml:"inputs,omitempty"` + Skip bool `yaml:"skip,omitempty"` } func (v *validatorConfig) check(name validatorName, requiredInputs []string) error { @@ -295,10 +295,10 @@ type Module struct { DeploymentSource string `yaml:"-"` // "-" prevents user from specifying it Kind ModuleKind ID ModuleID - Use []ModuleID - WrapSettingsWith map[string][]string + Use []ModuleID `yaml:"use,omitempty"` + WrapSettingsWith map[string][]string `yaml:"wrapsettingswith,omitempty"` Outputs []modulereader.OutputInfo `yaml:"outputs,omitempty"` - Settings Dict + Settings Dict `yaml:"settings,omitempty"` // DEPRECATED fields, keep in the struct for backwards compatibility RequiredApis interface{} `yaml:"required_apis,omitempty"` } @@ -325,13 +325,13 @@ func (m Module) InfoOrDie() modulereader.ModuleInfo { // unless it has been set to a non-default value; the implementation as an // integer is primarily for internal purposes even if it can be set in blueprint type Blueprint struct { - BlueprintName string `yaml:"blueprint_name"` - GhpcVersion string `yaml:"ghpc_version,omitempty"` - Validators []validatorConfig - ValidationLevel int `yaml:"validation_level,omitempty"` + BlueprintName string `yaml:"blueprint_name"` + GhpcVersion string `yaml:"ghpc_version,omitempty"` + Validators []validatorConfig `yaml:"validators,omitempty"` + ValidationLevel int `yaml:"validation_level,omitempty"` Vars Dict DeploymentGroups []DeploymentGroup `yaml:"deployment_groups"` - TerraformBackendDefaults TerraformBackend `yaml:"terraform_backend_defaults"` + TerraformBackendDefaults TerraformBackend `yaml:"terraform_backend_defaults,omitempty"` } // DeploymentConfig is a container for the imported YAML data and supporting data for diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 868e7bc781..59b8d7fe46 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -165,18 +165,14 @@ func setTestModuleInfo(mod Module, info modulereader.ModuleInfo) { func getDeploymentConfigForTest() DeploymentConfig { testModule := Module{ - Source: "testSource", - Kind: TerraformKind, - ID: "testModule", - Use: []ModuleID{}, - WrapSettingsWith: make(map[string][]string), + Source: "testSource", + Kind: TerraformKind, + ID: "testModule", } testModuleWithLabels := Module{ - Source: "./role/source", - ID: "testModuleWithLabels", - Kind: TerraformKind, - Use: []ModuleID{}, - WrapSettingsWith: make(map[string][]string), + Source: "./role/source", + ID: "testModuleWithLabels", + Kind: TerraformKind, Settings: NewDict(map[string]cty.Value{ "moduleLabel": cty.StringVal("moduleLabelValue"), }), @@ -187,7 +183,6 @@ func getDeploymentConfigForTest() DeploymentConfig { } testBlueprint := Blueprint{ BlueprintName: "simple", - Validators: nil, Vars: NewDict(map[string]cty.Value{ "deployment_name": cty.StringVal("deployment_name"), "project_id": cty.StringVal("test-project"), diff --git a/pkg/config/dict.go b/pkg/config/dict.go index f9308f17d0..62bbf3b619 100644 --- a/pkg/config/dict.go +++ b/pkg/config/dict.go @@ -85,6 +85,12 @@ func (d *Dict) AsObject() cty.Value { return cty.ObjectVal(d.Items()) } +// IsZero determine whether it should be omitted when YAML marshaling +// with the `omitempty“ flag. +func (d Dict) IsZero() bool { + return len(d.m) == 0 +} + // YamlValue is wrapper around cty.Value to handle YAML unmarshal. type YamlValue struct { v cty.Value diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml index 80d461a90b..d8fdfa2906 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml @@ -16,26 +16,17 @@ blueprint_name: igc ghpc_version: golden validators: - validator: test_project_exists - inputs: {} skip: true - validator: test_apis_enabled - inputs: {} skip: true - validator: test_region_exists - inputs: {} skip: true - validator: test_zone_exists - inputs: {} skip: true - validator: test_zone_in_region - inputs: {} skip: true - validator: test_module_not_used - inputs: {} - skip: false - validator: test_deployment_variable_not_used - inputs: {} - skip: false vars: deployment_name: golden_copy_deployment labels: @@ -46,15 +37,10 @@ vars: zone: us-east4-c deployment_groups: - group: zero - terraform_backend: - type: "" - configuration: {} modules: - source: modules/network/vpc kind: terraform id: network0 - use: [] - wrapsettingswith: {} outputs: - name: subnetwork_name description: Automatically-generated output exported for use by later deployment groups @@ -104,7 +90,6 @@ deployment_groups: - source: modules/scripts/startup-script kind: terraform id: script - use: [] wrapsettingswith: labels: - merge( @@ -128,9 +113,6 @@ deployment_groups: type: shell kind: terraform - group: one - terraform_backend: - type: "" - configuration: {} modules: - source: modules/packer/custom-image kind: packer @@ -138,7 +120,6 @@ deployment_groups: use: - network0 - script - wrapsettingswith: {} settings: deployment_name: ((var.deployment_name )) labels: @@ -150,6 +131,3 @@ deployment_groups: subnetwork_name: ((module.network0.subnetwork_name)) zone: ((var.zone )) kind: packer -terraform_backend_defaults: - type: "" - configuration: {} diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml index 6f82f91e27..d8cee48918 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml @@ -16,26 +16,17 @@ blueprint_name: igc ghpc_version: golden validators: - validator: test_project_exists - inputs: {} skip: true - validator: test_apis_enabled - inputs: {} skip: true - validator: test_region_exists - inputs: {} skip: true - validator: test_zone_exists - inputs: {} skip: true - validator: test_zone_in_region - inputs: {} skip: true - validator: test_module_not_used - inputs: {} - skip: false - validator: test_deployment_variable_not_used - inputs: {} - skip: false vars: deployment_name: golden_copy_deployment labels: @@ -46,15 +37,10 @@ vars: zone: us-east4-c deployment_groups: - group: zero - terraform_backend: - type: "" - configuration: {} modules: - source: modules/network/vpc kind: terraform id: network0 - use: [] - wrapsettingswith: {} outputs: - name: nat_ips - name: subnetwork_name @@ -67,9 +53,6 @@ deployment_groups: region: ((var.region )) kind: terraform - group: one - terraform_backend: - type: "" - configuration: {} modules: - source: modules/file-system/filestore kind: terraform @@ -92,6 +75,3 @@ deployment_groups: region: ((var.region )) zone: ((var.zone )) kind: terraform -terraform_backend_defaults: - type: "" - configuration: {} diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml index 90dbabb5ae..969ebc56fe 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml @@ -16,26 +16,17 @@ blueprint_name: text_escape ghpc_version: golden validators: - validator: test_project_exists - inputs: {} skip: true - validator: test_apis_enabled - inputs: {} skip: true - validator: test_region_exists - inputs: {} skip: true - validator: test_zone_exists - inputs: {} skip: true - validator: test_zone_in_region - inputs: {} skip: true - validator: test_module_not_used - inputs: {} - skip: false - validator: test_deployment_variable_not_used - inputs: {} - skip: false vars: deployment_name: golden_copy_deployment labels: @@ -46,15 +37,10 @@ vars: zone: us-east4-c deployment_groups: - group: zero - terraform_backend: - type: "" - configuration: {} modules: - source: modules/packer/custom-image kind: packer id: lime - use: [] - wrapsettingswith: {} settings: deployment_name: ((var.deployment_name)) image_family: \$(zebra/to(ad @@ -69,6 +55,3 @@ deployment_groups: subnetwork_name: \$(purple zone: ((var.zone)) kind: packer -terraform_backend_defaults: - type: "" - configuration: {} From 4c76bc4ebc6c57becd3a9ec4de6205348dbea68d Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 15 Jun 2023 09:11:24 -0700 Subject: [PATCH 30/92] Remove `Module.DeploymentSource`, compute it on demand (#1453) Motivation: * Reduce state and state mutations; * Currently it only works because of the bug: shallow copy points to the same underlying `Module`s; --- pkg/config/config.go | 4 +--- pkg/modulewriter/modulewriter.go | 12 ++++++------ pkg/modulewriter/modulewriter_test.go | 11 +++++++---- pkg/modulewriter/packerwriter.go | 6 +++++- pkg/modulewriter/tfwriter.go | 6 +++++- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 5cbe280324..7eb3590eb0 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -290,9 +290,7 @@ type ModuleID string // Module stores YAML definition of an HPC cluster component defined in a blueprint type Module struct { - Source string - // DeploymentSource - is source to be used for this module in written deployment. - DeploymentSource string `yaml:"-"` // "-" prevents user from specifying it + Source string Kind ModuleKind ID ModuleID Use []ModuleID `yaml:"use,omitempty"` diff --git a/pkg/modulewriter/modulewriter.go b/pkg/modulewriter/modulewriter.go index 4c09751a1d..45dc95cdd2 100644 --- a/pkg/modulewriter/modulewriter.go +++ b/pkg/modulewriter/modulewriter.go @@ -225,11 +225,6 @@ func copySource(deploymentPath string, deploymentGroups *[]config.DeploymentGrou var copyEmbedded = false for iMod := range grp.Modules { mod := &grp.Modules[iMod] - ds, err := deploymentSource(*mod) - if err != nil { - return err - } - mod.DeploymentSource = ds if sourcereader.IsGitPath(mod.Source) && mod.Kind == config.TerraformKind { continue // do not download @@ -239,8 +234,13 @@ func copySource(deploymentPath string, deploymentGroups *[]config.DeploymentGrou copyEmbedded = true continue // all embedded terraform modules fill be copied at once } + /* Copy source files */ - dst := filepath.Join(basePath, mod.DeploymentSource) + ds, err := deploymentSource(*mod) + if err != nil { + return err + } + dst := filepath.Join(basePath, ds) if _, err := os.Stat(dst); err == nil { continue } diff --git a/pkg/modulewriter/modulewriter_test.go b/pkg/modulewriter/modulewriter_test.go index 36e04b710d..8e75d4eb69 100644 --- a/pkg/modulewriter/modulewriter_test.go +++ b/pkg/modulewriter/modulewriter_test.go @@ -491,7 +491,9 @@ func (s *MySuite) TestWriteMain(c *C) { // Test with modules testModule := config.Module{ - ID: "test_module", + ID: "test_module", + Kind: config.TerraformKind, + Source: "modules/network/vpc", Settings: config.NewDict(map[string]cty.Value{ "testSetting": cty.StringVal("testValue"), "passthrough": config.MustParseExpression(`"${var.deployment_name}-allow"`).AsValue(), @@ -528,6 +530,8 @@ func (s *MySuite) TestWriteMain(c *C) { WrapSettingsWith: map[string][]string{ "wrappedSetting": {"list(flatten(", "))"}, }, + Kind: config.TerraformKind, + Source: "modules/network/vpc", Settings: config.NewDict(map[string]cty.Value{ "wrappedSetting": cty.TupleVal([]cty.Value{ cty.StringVal("val1"), @@ -687,9 +691,8 @@ func (s *MySuite) TestWriteDeploymentGroup_PackerWriter(c *C) { } testPackerModule := config.Module{ - Kind: config.PackerKind, - ID: "testPackerModule", - DeploymentSource: "testPackerModule", + Kind: config.PackerKind, + ID: "testPackerModule", } testDeploymentGroup := config.DeploymentGroup{ Name: "packerGroup", diff --git a/pkg/modulewriter/packerwriter.go b/pkg/modulewriter/packerwriter.go index b5bb6ec18d..1bb90c3fbf 100644 --- a/pkg/modulewriter/packerwriter.go +++ b/pkg/modulewriter/packerwriter.go @@ -93,7 +93,11 @@ func (w PackerWriter) writeDeploymentGroup( return err } - modPath := filepath.Join(groupPath, mod.DeploymentSource) + ds, err := deploymentSource(mod) + if err != nil { + return err + } + modPath := filepath.Join(groupPath, ds) if err = writePackerAutovars(av.Items(), modPath); err != nil { return err } diff --git a/pkg/modulewriter/tfwriter.go b/pkg/modulewriter/tfwriter.go index 4280791b99..c9e87ca7c3 100644 --- a/pkg/modulewriter/tfwriter.go +++ b/pkg/modulewriter/tfwriter.go @@ -222,7 +222,11 @@ func writeMain( moduleBody := moduleBlock.Body() // Add source attribute - moduleBody.SetAttributeValue("source", cty.StringVal(mod.DeploymentSource)) + ds, err := deploymentSource(mod) + if err != nil { + return err + } + moduleBody.SetAttributeValue("source", cty.StringVal(ds)) // For each Setting for _, setting := range orderKeys(mod.Settings.Items()) { From 22a54f136f13e72560710ece8a4d41dff8501d46 Mon Sep 17 00:00:00 2001 From: Skyler Malinowski Date: Wed, 14 Jun 2023 16:16:52 -0400 Subject: [PATCH 31/92] Bump slurm-gcp modules to 5.7.4 --- .../compute/schedmd-slurm-gcp-v5-node-group/variables.tf | 2 +- .../compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md | 2 +- .../compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf | 2 +- .../schedmd-slurm-gcp-v5-partition-dynamic/variables.tf | 2 +- .../modules/compute/schedmd-slurm-gcp-v5-partition/README.md | 2 +- .../modules/compute/schedmd-slurm-gcp-v5-partition/main.tf | 2 +- .../compute/schedmd-slurm-gcp-v5-partition/variables.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v5-controller/README.md | 4 ++-- .../modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf | 4 ++-- .../scheduler/schedmd-slurm-gcp-v5-controller/variables.tf | 2 +- .../modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md | 2 +- .../modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf | 2 +- .../modules/scheduler/schedmd-slurm-gcp-v5-login/README.md | 4 ++-- .../modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf | 4 ++-- .../modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf | 2 +- tools/cloud-build/Dockerfile | 2 +- 16 files changed, 20 insertions(+), 20 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf index fd8bd310bf..78146b86e3 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "project_id" { description = "Project in which the HPC deployment will be created." diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md index 4476baf44e..cad8fc0697 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/README.md @@ -69,7 +69,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | 5.7.3 | +| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | 5.7.4 | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf index 9c5c707f50..42c16f4957 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf @@ -26,7 +26,7 @@ locals { } module "slurm_partition" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=5.7.4" slurm_cluster_name = local.slurm_cluster_name enable_job_exclusive = var.exclusive diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/variables.tf index f4fbe5116b..cfe375d34f 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "deployment_name" { description = "Name of the deployment." diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md index 866c63ab9a..7c91f2d941 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/README.md @@ -146,7 +146,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | 5.7.3 | +| [slurm\_partition](#module\_slurm\_partition) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition | 5.7.4 | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf index b34ee29a9c..4e60ba24d2 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/main.tf @@ -38,7 +38,7 @@ data "google_compute_zones" "available" { } module "slurm_partition" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_partition?ref=5.7.4" slurm_cluster_name = local.slurm_cluster_name partition_nodes = var.node_groups diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf index 5ba1e8a7dd..8c8fadbe92 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "deployment_name" { description = "Name of the deployment." diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md index f94bfd4135..d3944bdcd4 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md @@ -163,8 +163,8 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | 5.7.3 | -| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 5.7.3 | +| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance | 5.7.4 | +| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 5.7.4 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf index b907ef54b2..c535e18740 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf @@ -61,7 +61,7 @@ data "google_compute_default_service_account" "default" { } module "slurm_controller_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_instance?ref=5.7.4" access_config = local.access_config slurm_cluster_name = local.slurm_cluster_name @@ -97,7 +97,7 @@ module "slurm_controller_instance" { } module "slurm_controller_template" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=5.7.4" additional_disks = local.additional_disks can_ip_forward = var.can_ip_forward diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf index 064359b0e3..25e473327c 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "access_config" { description = "Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet." diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md index c40a761c75..802762018f 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md @@ -181,7 +181,7 @@ No providers. | Name | Source | Version | |------|--------|---------| -| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_hybrid | 5.7.3 | +| [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_hybrid | 5.7.4 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf index b4f1c6d781..787e558726 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/main.tf @@ -28,7 +28,7 @@ locals { } module "slurm_controller_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_hybrid?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_controller_hybrid?ref=5.7.4" project_id = var.project_id slurm_cluster_name = local.slurm_cluster_name diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md index e2431a0f2e..a746bf7f7c 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md @@ -85,8 +85,8 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | 5.7.3 | -| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 5.7.3 | +| [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance | 5.7.4 | +| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 5.7.4 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf index 3e5bd0fd85..a807147875 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf @@ -57,7 +57,7 @@ data "google_compute_default_service_account" "default" { } module "slurm_login_template" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=5.7.4" additional_disks = local.additional_disks can_ip_forward = var.can_ip_forward @@ -95,7 +95,7 @@ module "slurm_login_template" { } module "slurm_login_instance" { - source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=5.7.3" + source = "github.com/SchedMD/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_login_instance?ref=5.7.4" access_config = local.access_config slurm_cluster_name = local.slurm_cluster_name diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf index dcafbcaada..c571599f8b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf @@ -15,7 +15,7 @@ */ # Most variables have been sourced and modified from the SchedMD/slurm-gcp -# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +# github repository: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 variable "project_id" { type = string diff --git a/tools/cloud-build/Dockerfile b/tools/cloud-build/Dockerfile index 8da6bbff91..730ca2d214 100644 --- a/tools/cloud-build/Dockerfile +++ b/tools/cloud-build/Dockerfile @@ -50,7 +50,7 @@ WORKDIR /ghpc-tmp COPY ./ ./ RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt && \ + pip install --no-cache-dir -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt && \ pip install --no-cache-dir -r tools/cloud-build/requirements.txt && \ rm -rf ~/.cache/pip/* From 76938cc2f38a46a947a7b3efaa49d2721dfcd6c5 Mon Sep 17 00:00:00 2001 From: Skyler Malinowski Date: Wed, 14 Jun 2023 16:54:56 -0400 Subject: [PATCH 32/92] Bump slurm-gcp docs to 5.7.4 --- .../schedmd-slurm-gcp-v5-controller/README.md | 14 +++++++------- .../schedmd-slurm-gcp-v5-hybrid/README.md | 12 ++++++------ .../schedmd-slurm-gcp-v5-login/README.md | 10 +++++----- .../demo-with-cloud-controller-instructions.md | 2 +- docs/hybrid-slurm-cluster/deploy-instructions.md | 4 ++-- .../hybrid-slurm-cluster/on-prem-instructions.md | 16 ++++++++-------- examples/README.md | 4 ++-- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md index d3944bdcd4..05806bafb2 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md @@ -17,14 +17,14 @@ controller for optimal performance at different scales. > > ```shell > # Install Python3 and run -> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt +> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt > ``` -[SchedMD/slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 -[slurm\_controller\_instance]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_controller_instance -[slurm\_instance\_template]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_instance_template +[SchedMD/slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 +[slurm\_controller\_instance]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_controller_instance +[slurm\_instance\_template]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_instance_template [slurm-ug]: https://goo.gle/slurm-gcp-user-guide. -[requirements.txt]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/scripts/requirements.txt +[requirements.txt]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/scripts/requirements.txt [enable\_cleanup\_compute]: #input\_enable\_cleanup\_compute [enable\_cleanup\_subscriptions]: #input\_enable\_cleanup\_subscriptions [enable\_reconfigure]: #input\_enable\_reconfigure @@ -94,12 +94,12 @@ This option has some additional requirements: development environment deploying the cluster. One can use following commands: ```bash - pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt + pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt ``` For more information, see the [description][optdeps] of this module. -[optdeps]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster#optional +[optdeps]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster#optional ## Custom Images diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md index 802762018f..d79fa9cc32 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md @@ -38,7 +38,7 @@ manually. This will require addition configuration and verification of permissions. For more information see the [hybrid.md] documentation on [slurm-gcp]. -[slurm-controller-hybrid]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_controller_hybrid +[slurm-controller-hybrid]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_controller_hybrid > **_NOTE:_** The hybrid module requires the following dependencies to be > installed on the system deploying the module: @@ -58,15 +58,15 @@ permissions. For more information see the [hybrid.md] documentation on [pyyaml]: https://pypi.org/project/PyYAML/ [google-api-python-client]: https://pypi.org/project/google-api-python-client/ [google-cloud-pubsub]: https://pypi.org/project/google-cloud-pubsub/ -[requirements.txt]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/scripts/requirements.txt +[requirements.txt]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/scripts/requirements.txt ### Manual Configuration This module *does not* complete the installation of hybrid partitions on your slurm cluster. After deploying, you must follow the steps listed out in the [hybrid.md] documentation under [manual steps]. -[hybrid.md]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/docs/hybrid.md -[manual steps]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/docs/hybrid.md#manual-configurations +[hybrid.md]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/docs/hybrid.md +[manual steps]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/docs/hybrid.md#manual-configurations ### Example Usage The hybrid module can be added to a blueprint as follows: @@ -146,10 +146,10 @@ strongly advise only using versions 21 or 22 when using this module. Attempting to use this module with any version older than 21 may lead to unexpected results. -[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 [pre-existing-network-storage]: ../../../../modules/file-system/pre-existing-network-storage/ [schedmd-slurm-gcp-v5-partition]: ../../compute/schedmd-slurm-gcp-v5-partition/ -[packer templates]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/packer +[packer templates]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/packer ## License diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md index a746bf7f7c..251b39609b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md @@ -5,9 +5,9 @@ This module creates a login node for a Slurm cluster based on the terraform modules. The login node is used in conjunction with the [Slurm controller](../schedmd-slurm-gcp-v5-controller/README.md). -[SchedMD/slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 -[slurm\_login\_instance]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_login_instance -[slurm\_instance\_template]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/terraform/slurm_cluster/modules/slurm_instance_template +[SchedMD/slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 +[slurm\_login\_instance]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_login_instance +[slurm\_instance\_template]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/terraform/slurm_cluster/modules/slurm_instance_template ### Example @@ -49,8 +49,8 @@ The HPC Toolkit team maintains the wrapper around the [slurm-on-gcp] terraform modules. For support with the underlying modules, see the instructions in the [slurm-gcp README][slurm-gcp-readme]. -[slurm-on-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 -[slurm-gcp-readme]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3#slurm-on-google-cloud-platform +[slurm-on-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 +[slurm-gcp-readme]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4#slurm-on-google-cloud-platform ## License diff --git a/docs/hybrid-slurm-cluster/demo-with-cloud-controller-instructions.md b/docs/hybrid-slurm-cluster/demo-with-cloud-controller-instructions.md index 16fc7b7a61..67178fa5db 100644 --- a/docs/hybrid-slurm-cluster/demo-with-cloud-controller-instructions.md +++ b/docs/hybrid-slurm-cluster/demo-with-cloud-controller-instructions.md @@ -22,7 +22,7 @@ for use with an on-premise slurm-cluster. > further testing is done, documentation on applying the hybrid module to > on-premise slurm clusters will be added and expanded. -[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 ## Definitions diff --git a/docs/hybrid-slurm-cluster/deploy-instructions.md b/docs/hybrid-slurm-cluster/deploy-instructions.md index cc7be13fe1..781d4588ed 100644 --- a/docs/hybrid-slurm-cluster/deploy-instructions.md +++ b/docs/hybrid-slurm-cluster/deploy-instructions.md @@ -260,8 +260,8 @@ sudo systemctl restart slurmctld If the restart did not succeed, the logs at `/var/log/slurm/slurmctld.log` should point you in the right direction. -[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 -[slurm-gcp-hybrid]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/docs/hybrid.md +[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 +[slurm-gcp-hybrid]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/docs/hybrid.md [demo-with-cloud-controller-instructions.md]: ./demo-with-cloud-controller-instructions.md ## Validate the Hybrid Cluster diff --git a/docs/hybrid-slurm-cluster/on-prem-instructions.md b/docs/hybrid-slurm-cluster/on-prem-instructions.md index d982c2be2a..1c35bc8246 100644 --- a/docs/hybrid-slurm-cluster/on-prem-instructions.md +++ b/docs/hybrid-slurm-cluster/on-prem-instructions.md @@ -39,9 +39,9 @@ detail, as well as how to customize many of these assumptions to fit your needs. deployments in their [hybrid.md] documentation. [hybridmodule]: ../../community/modules/scheduler/schedmd-slurm-gcp-v5-hybrid/README.md -[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3 +[slurm-gcp]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4 [slurm\_controller\_hybrid]: https://github.com/SchedMD/slurm-gcp/tree/master/terraform/slurm_cluster/modules/slurm_controller_hybrid -[hybrid.md]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/docs/hybrid.md +[hybrid.md]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/docs/hybrid.md ### NFS Mounts @@ -235,12 +235,12 @@ image created with slurm 21.08.8: partition_name: compute ``` -[slurmgcppacker]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/packer -[example.pkrvars.hcl]: https://github.com/SchedMD/slurm-gcp/tree/5.7.3/packer/example.pkrvars.hcl -[slurmversion]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/packer/variables.pkr.hcl#L97 -[`service_account_scopes`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/packer/variables.pkr.hcl#L166 -[`munge_user`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/ansible/roles/munge/defaults/main.yml#L17 -[`slurm_user`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.3/ansible/roles/slurm/defaults/main.yml#L31 +[slurmgcppacker]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/packer +[example.pkrvars.hcl]: https://github.com/SchedMD/slurm-gcp/tree/5.7.4/packer/example.pkrvars.hcl +[slurmversion]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/packer/variables.pkr.hcl#L97 +[`service_account_scopes`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/packer/variables.pkr.hcl#L166 +[`munge_user`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/ansible/roles/munge/defaults/main.yml#L17 +[`slurm_user`]: https://github.com/SchedMD/slurm-gcp/blob/5.7.4/ansible/roles/slurm/defaults/main.yml#L31 ## On Premise Setup diff --git a/examples/README.md b/examples/README.md index c454437329..e91257536a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -119,7 +119,7 @@ the experimental badge (![experimental-badge]). > > ```shell > # Install Python3 and run -> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt +> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt > ``` Creates a basic auto-scaling Slurm cluster with mostly default settings. The @@ -524,7 +524,7 @@ For this example the following is needed in the selected region: > > ```shell > # Install Python3 and run -> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.3/scripts/requirements.txt +> pip3 install -r https://raw.githubusercontent.com/SchedMD/slurm-gcp/5.7.4/scripts/requirements.txt > ``` Similar to the [hpc-slurm.yaml] example, but using Ubuntu 20.04 instead of CentOS 7. From f024167d9cfaea065de2d3c33e64461801632286 Mon Sep 17 00:00:00 2001 From: Skyler Malinowski Date: Wed, 14 Jun 2023 16:41:45 -0400 Subject: [PATCH 33/92] Exclude partition-dynamic from suspend --- .../compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf index 42c16f4957..554a4da771 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition-dynamic/main.tf @@ -16,8 +16,11 @@ */ locals { - # Default to value in partition_conf if both set "Default" - partition_conf = merge(var.is_default == true ? { "Default" : "YES" } : {}, var.partition_conf) + # Default to value in partition_conf if both set the same key + partition_conf = merge({ + "Default" = var.is_default ? "YES" : null, + "SuspendTime" = "INFINITE" + }, var.partition_conf) # Since deployment name may be used to create a cluster name, we remove any invalid character from the beginning # Also, slurm imposed a lot of restrictions to this name, so we format it to an acceptable string From 7b9c670fb94fbd7dba16e06dfa69d8935d86150d Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 16 Jun 2023 08:09:45 -0700 Subject: [PATCH 34/92] Fix static_check warnings in `cmd/root*.go` (#1460) --- cmd/root.go | 8 ++++---- cmd/root_test.go | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index a5ae631ad8..c105f76e74 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -128,6 +128,9 @@ func hpcToolkitRepo() (repo *git.Repository, dir string, err error) { // found. If it's the hpc-toolkit repo, return it. // repo := new(git.Repository) dir, err = os.Getwd() + if err != nil { + return nil, "", err + } subdir := filepath.Dir(dir) o := git.PlainOpenOptions{DetectDotGit: true} repo, err = git.PlainOpenWithOptions(dir, &o) @@ -171,8 +174,5 @@ func hpcToolkitRepo() (repo *git.Repository, dir string, err error) { func isHpcToolkitRepo(r git.Repository) bool { h := plumbing.NewHash(GitInitialHash) _, err := r.CommitObject(h) - if err == nil { - return true - } - return false + return err == nil } diff --git a/cmd/root_test.go b/cmd/root_test.go index 08177873e1..3101b5dbb3 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -199,7 +199,7 @@ func checkPathsEqual(c *C, a, b string) { if err != nil { c.Fatal(err) } - b, err = filepath.EvalSymlinks(a) + b, err = filepath.EvalSymlinks(b) if err != nil { c.Fatal(err) } @@ -241,6 +241,9 @@ func initTestRepo(path string) (repo *git.Repository, initHash plumbing.Hash, er } initHash, err = commit("Init") + if err != nil { + return + } _, err = commit("Last") return } From f34b00d54c873df202bd8bc971b7d9b6c94ace65 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 16 Jun 2023 16:43:25 -0700 Subject: [PATCH 35/92] Fix panic while attempting to tokenize Null-value (#1468) --- pkg/modulewriter/hcl_utils.go | 4 ++++ pkg/modulewriter/hcl_utils_test.go | 1 + 2 files changed, 5 insertions(+) diff --git a/pkg/modulewriter/hcl_utils.go b/pkg/modulewriter/hcl_utils.go index c4d74e6598..f62fb173d1 100644 --- a/pkg/modulewriter/hcl_utils.go +++ b/pkg/modulewriter/hcl_utils.go @@ -63,6 +63,10 @@ func WriteHclAttributes(vars map[string]cty.Value, dst string) error { // TokensForValue is a modification of hclwrite.TokensForValue. // The only difference in behavior is handling "HCL literal" strings. func TokensForValue(val cty.Value) hclwrite.Tokens { + if val.IsNull() { // terminate early as Null value can has any type (e.g. String) + return hclwrite.TokensForValue(val) + } + // We need to handle both cases, until all "expression" users are moved to Expression if e, is := config.IsExpressionValue(val); is { return e.Tokenize() diff --git a/pkg/modulewriter/hcl_utils_test.go b/pkg/modulewriter/hcl_utils_test.go index d0b84e11ca..6833f8b75a 100644 --- a/pkg/modulewriter/hcl_utils_test.go +++ b/pkg/modulewriter/hcl_utils_test.go @@ -30,6 +30,7 @@ func TestTokensForValueNoLiteral(t *testing.T) { val := cty.ObjectVal(map[string]cty.Value{ "tan": cty.TupleVal([]cty.Value{ cty.StringVal("biege"), + cty.NullVal(cty.String), cty.MapVal(map[string]cty.Value{ "cu": cty.NumberIntVal(29), "ba": cty.NumberIntVal(56), From 6976c7c7eabf935d59e4f247262a152ce2245152 Mon Sep 17 00:00:00 2001 From: Issac Goldstand Date: Sun, 18 Jun 2023 11:36:17 +0000 Subject: [PATCH 36/92] Apply lifecycle config to not swap boot disk (and VM) each time a new disk image is available --- modules/compute/vm-instance/main.tf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/compute/vm-instance/main.tf b/modules/compute/vm-instance/main.tf index 73a80351cb..da72a6f604 100644 --- a/modules/compute/vm-instance/main.tf +++ b/modules/compute/vm-instance/main.tf @@ -96,6 +96,12 @@ resource "google_compute_disk" "boot_disk" { size = var.disk_size_gb labels = local.labels zone = var.zone + + lifecycle { + ignore_changes = [ + image + ] + } } resource "google_compute_resource_policy" "placement_policy" { From dac80365639e11c9732262b23a783a26379fcc9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Jun 2023 10:59:31 +0000 Subject: [PATCH 37/92] Bump golang.org/x/sys from 0.8.0 to 0.9.0 Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.8.0 to 0.9.0. - [Commits](https://github.com/golang/sys/compare/v0.8.0...v0.9.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index d313946c92..05e1ea41bb 100644 --- a/go.mod +++ b/go.mod @@ -80,7 +80,7 @@ require ( golang.org/x/crypto v0.9.0 // indirect golang.org/x/net v0.10.0 // indirect golang.org/x/oauth2 v0.8.0 // indirect - golang.org/x/sys v0.8.0 + golang.org/x/sys v0.9.0 golang.org/x/text v0.9.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect diff --git a/go.sum b/go.sum index 9a4ebce3eb..b3b69ae835 100644 --- a/go.sum +++ b/go.sum @@ -718,8 +718,8 @@ golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= +golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From 9ab74d19e77b431db492c2caae87ad2583bd68fd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Jun 2023 10:59:39 +0000 Subject: [PATCH 38/92] Bump github.com/otiai10/copy from 1.11.0 to 1.12.0 Bumps [github.com/otiai10/copy](https://github.com/otiai10/copy) from 1.11.0 to 1.12.0. - [Release notes](https://github.com/otiai10/copy/releases) - [Commits](https://github.com/otiai10/copy/compare/v1.11.0...v1.12.0) --- updated-dependencies: - dependency-name: github.com/otiai10/copy dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index d313946c92..fa7105492e 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( github.com/hashicorp/hcl v1.0.0 // indirect github.com/hashicorp/hcl/v2 v2.17.0 github.com/hashicorp/terraform-config-inspect v0.0.0-20221020162138-81db043ad408 - github.com/otiai10/copy v1.11.0 + github.com/otiai10/copy v1.12.0 github.com/pkg/errors v0.9.1 github.com/spf13/afero v1.9.5 github.com/spf13/cobra v1.7.0 diff --git a/go.sum b/go.sum index 9a4ebce3eb..8246fda19a 100644 --- a/go.sum +++ b/go.sum @@ -427,8 +427,8 @@ github.com/mitchellh/go-wordwrap v1.0.0 h1:6GlHJ/LTGMrIJbwgdqdl2eEH8o+Exx/0m8ir9 github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/otiai10/copy v1.11.0 h1:OKBD80J/mLBrwnzXqGtFCzprFSGioo30JcmR4APsNwc= -github.com/otiai10/copy v1.11.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww= +github.com/otiai10/copy v1.12.0 h1:cLMgSQnXBs1eehF0Wy/FAGsgDTDmAqFR7rQylBb1nDY= +github.com/otiai10/copy v1.12.0/go.mod h1:rSaLseMUsZFFbsFGc7wCJnnkTAvdc5L6VWxPE4308Ww= github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/pjbgf/sha1cd v0.3.0 h1:4D5XXmUUBUl/xQ6IjCkEAbqXskkq/4O7LmGn0AqMDs4= github.com/pjbgf/sha1cd v0.3.0/go.mod h1:nZ1rrWOcGJ5uZgEEVL1VUM9iRQiZvWdbZjkKyFzPPsI= From 4ab4239e6b8038c25f38ca8e9fe99b63fdb6e7b4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Jun 2023 11:00:04 +0000 Subject: [PATCH 39/92] Bump google.golang.org/api from 0.126.0 to 0.128.0 Bumps [google.golang.org/api](https://github.com/googleapis/google-api-go-client) from 0.126.0 to 0.128.0. - [Release notes](https://github.com/googleapis/google-api-go-client/releases) - [Changelog](https://github.com/googleapis/google-api-go-client/blob/main/CHANGES.md) - [Commits](https://github.com/googleapis/google-api-go-client/compare/v0.126.0...v0.128.0) --- updated-dependencies: - dependency-name: google.golang.org/api dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 4 ++-- go.sum | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index d313946c92..587550e5fc 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/google/go-cmp v0.5.9 github.com/hashicorp/terraform-exec v0.18.1 github.com/zclconf/go-cty-debug v0.0.0-20191215020915-b22d67c1ba0b - google.golang.org/api v0.126.0 + google.golang.org/api v0.128.0 ) require ( @@ -54,7 +54,7 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/google/s2a-go v0.1.4 // indirect github.com/google/uuid v1.3.0 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.2.3 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.2.4 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-safetemp v1.0.0 // indirect github.com/hashicorp/go-version v1.6.0 // indirect diff --git a/go.sum b/go.sum index 9a4ebce3eb..2dce24010b 100644 --- a/go.sum +++ b/go.sum @@ -346,8 +346,8 @@ github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/googleapis/enterprise-certificate-proxy v0.0.0-20220520183353-fd19c99a87aa/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.1.0/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.2.0/go.mod h1:8C0jb7/mgJe/9KK8Lm7X9ctZC2t60YyIpYEI16jx0Qg= -github.com/googleapis/enterprise-certificate-proxy v0.2.3 h1:yk9/cqRKtT9wXZSsRH9aurXEpJX+U6FLtpYTdC3R06k= -github.com/googleapis/enterprise-certificate-proxy v0.2.3/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= +github.com/googleapis/enterprise-certificate-proxy v0.2.4 h1:uGy6JWR/uMIILU8wbf+OkstIrNiMjGpEIyhx8f6W7s4= +github.com/googleapis/enterprise-certificate-proxy v0.2.4/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.1.0/go.mod h1:Q3nei7sK6ybPYH7twZdmQpAd1MKb7pfu6SK+H1/DsU0= @@ -855,8 +855,8 @@ google.golang.org/api v0.96.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ google.golang.org/api v0.97.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ13s= google.golang.org/api v0.98.0/go.mod h1:w7wJQLTM+wvQpNf5JyEcBoxK0RH7EDrh/L4qfsuJ13s= google.golang.org/api v0.100.0/go.mod h1:ZE3Z2+ZOr87Rx7dqFsdRQkRBk36kDtp/h+QpHbB7a70= -google.golang.org/api v0.126.0 h1:q4GJq+cAdMAC7XP7njvQ4tvohGLiSlytuL4BQxbIZ+o= -google.golang.org/api v0.126.0/go.mod h1:mBwVAtz+87bEN6CbA1GtZPDOqY2R5ONPqJeIlvyo4Aw= +google.golang.org/api v0.128.0 h1:RjPESny5CnQRn9V6siglged+DZCgfu9l6mO9dkX9VOg= +google.golang.org/api v0.128.0/go.mod h1:Y611qgqaE92On/7g65MQgxYul3c0rEB894kniWLY750= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= From 162dae9ca19f5d345bfe10640d46253fa776d688 Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Mon, 19 Jun 2023 14:57:01 -0700 Subject: [PATCH 40/92] Update "google", "google-beta" providers to 4.69.1 (#1464) --- pkg/modulewriter/tfversions.go | 4 ++-- .../golden_copies/expectations/igc_pkr/zero/versions.tf | 4 ++-- .../golden_copies/expectations/igc_tf/one/versions.tf | 4 ++-- .../golden_copies/expectations/igc_tf/zero/versions.tf | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/modulewriter/tfversions.go b/pkg/modulewriter/tfversions.go index 11f10d3bec..b3a1b1b31f 100644 --- a/pkg/modulewriter/tfversions.go +++ b/pkg/modulewriter/tfversions.go @@ -21,11 +21,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 4.65.2" + version = "~> 4.69.1" } google-beta = { source = "hashicorp/google-beta" - version = "~> 4.65.2" + version = "~> 4.69.1" } } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf index ea77e9348f..d1bb72b8b4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 4.65.2" + version = "~> 4.69.1" } google-beta = { source = "hashicorp/google-beta" - version = "~> 4.65.2" + version = "~> 4.69.1" } } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf index ea77e9348f..d1bb72b8b4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 4.65.2" + version = "~> 4.69.1" } google-beta = { source = "hashicorp/google-beta" - version = "~> 4.65.2" + version = "~> 4.69.1" } } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf index ea77e9348f..d1bb72b8b4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 4.65.2" + version = "~> 4.69.1" } google-beta = { source = "hashicorp/google-beta" - version = "~> 4.65.2" + version = "~> 4.69.1" } } } From 49c3cef517d51ff7b610191f6e385bc8085459da Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 12 Jun 2023 18:33:41 -0700 Subject: [PATCH 41/92] Add templating to gke-job-template for persistent volume claims --- .../modules/compute/gke-job-template/README.md | 1 + .../modules/compute/gke-job-template/main.tf | 10 +++++++++- .../templates/gke-job-base.yaml.tftpl | 15 +++++++++++++++ .../modules/compute/gke-job-template/variables.tf | 10 ++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/community/modules/compute/gke-job-template/README.md b/community/modules/compute/gke-job-template/README.md index 706042f1b6..27d713f6aa 100644 --- a/community/modules/compute/gke-job-template/README.md +++ b/community/modules/compute/gke-job-template/README.md @@ -95,6 +95,7 @@ No modules. | [node\_count](#input\_node\_count) | How many nodes the job should run in parallel. | `number` | `1` | no | | [node\_pool\_name](#input\_node\_pool\_name) | A list of node pool names on which to run the job. Can be populated via `use` feild. | `list(string)` | `[]` | no | | [node\_selectors](#input\_node\_selectors) | A list of node selectors to use to place the job. |
list(object({
key = string
value = string
}))
| `[]` | no | +| [persistent\_volume\_claim](#input\_persistent\_volume\_claim) | A list of objects that describes a k8s PVC that is to be used and mounted on the job. Generally supplied by the gke-persistent-volume module. |
list(object({
name = string
mount_path = string
mount_options = string
}))
| `[]` | no | | [random\_name\_sufix](#input\_random\_name\_sufix) | Appends a random suffix to the job name to avoid clashes. | `bool` | `true` | no | | [requested\_cpu\_per\_pod](#input\_requested\_cpu\_per\_pod) | The requested cpu per pod. If null, allocatable\_cpu\_per\_node will be used to claim whole nodes. If provided will override allocatable\_cpu\_per\_node. | `number` | `-1` | no | | [restart\_policy](#input\_restart\_policy) | Job restart policy. Only a RestartPolicy equal to `Never` or `OnFailure` is allowed. | `string` | `"Never"` | no | diff --git a/community/modules/compute/gke-job-template/main.tf b/community/modules/compute/gke-job-template/main.tf index e9e80b1671..9c0256e8fe 100644 --- a/community/modules/compute/gke-job-template/main.tf +++ b/community/modules/compute/gke-job-template/main.tf @@ -45,8 +45,15 @@ locals { # May come from node pool in future. gpu_limit_string = alltrue(var.has_gpu) ? "1" : null - suffix = var.random_name_sufix ? "-${random_id.resource_name_suffix.hex}" : "" + volumes = [for v in var.persistent_volume_claim : + { + name = "vol-${v.name}" + mount_path = v.mount_path + claim_name = v.name + } + ] + suffix = var.random_name_sufix ? "-${random_id.resource_name_suffix.hex}" : "" machine_family_node_selector = var.machine_family != null ? [{ key = "cloud.google.com/machine-family" value = var.machine_family @@ -70,6 +77,7 @@ locals { backoff_limit = var.backoff_limit tolerations = distinct(var.tolerations) labels = local.labels + volumes = local.volumes } ) diff --git a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl index 09f609424f..429d6a39c0 100644 --- a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl +++ b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl @@ -58,5 +58,20 @@ spec: cpu: ${cpu_request} %{~ endif ~} %{~ endif ~} + %{~ if length(volumes) != 0 ~} + volumeMounts: + %{~ for v in volumes ~} + - name: ${v.name} + mountPath: ${v.mount_path} + %{~ endfor ~} + %{~ endif ~} + %{~ if length(volumes) != 0 ~} + volumes: + %{~ for v in volumes ~} + - name: ${v.name} + persistentVolumeClaim: + claimName: ${v.claim_name} + %{~ endfor ~} + %{~ endif ~} restartPolicy: ${restart_policy} backoffLimit: ${backoff_limit} diff --git a/community/modules/compute/gke-job-template/variables.tf b/community/modules/compute/gke-job-template/variables.tf index 5bd34d2c1d..4135449a3c 100644 --- a/community/modules/compute/gke-job-template/variables.tf +++ b/community/modules/compute/gke-job-template/variables.tf @@ -113,6 +113,16 @@ variable "random_name_sufix" { default = true } +variable "persistent_volume_claim" { + description = "A list of objects that describes a k8s PVC that is to be used and mounted on the job. Generally supplied by the gke-persistent-volume module." + type = list(object({ + name = string + mount_path = string + mount_options = string + })) + default = [] +} + variable "labels" { description = "Labels to add to the GKE job template. Key-value pairs." type = map(string) From d5d0124751fbac39af4dc981afbd172f14e0ff49 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Sun, 11 Jun 2023 15:18:11 -0700 Subject: [PATCH 42/92] Add boilerplate for gke-persistent-volume --- .../file-system/gke-persistent-volume/main.tf | 20 +++++++++++++++++++ .../gke-persistent-volume/outputs.tf | 15 ++++++++++++++ .../gke-persistent-volume/variables.tf | 20 +++++++++++++++++++ .../gke-persistent-volume/versions.tf | 17 ++++++++++++++++ 4 files changed, 72 insertions(+) create mode 100644 community/modules/file-system/gke-persistent-volume/main.tf create mode 100644 community/modules/file-system/gke-persistent-volume/outputs.tf create mode 100644 community/modules/file-system/gke-persistent-volume/variables.tf create mode 100644 community/modules/file-system/gke-persistent-volume/versions.tf diff --git a/community/modules/file-system/gke-persistent-volume/main.tf b/community/modules/file-system/gke-persistent-volume/main.tf new file mode 100644 index 0000000000..a04f5f44b0 --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/main.tf @@ -0,0 +1,20 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + # This label allows for billing report tracking based on module. + labels = merge(var.labels, { ghpc_module = "gke-persistent-volume" }) +} diff --git a/community/modules/file-system/gke-persistent-volume/outputs.tf b/community/modules/file-system/gke-persistent-volume/outputs.tf new file mode 100644 index 0000000000..3a497ec474 --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/outputs.tf @@ -0,0 +1,15 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ diff --git a/community/modules/file-system/gke-persistent-volume/variables.tf b/community/modules/file-system/gke-persistent-volume/variables.tf new file mode 100644 index 0000000000..cd1a89f639 --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/variables.tf @@ -0,0 +1,20 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +variable "labels" { + description = "GCE resource labels to be applied to resources. Key-value pairs." + type = map(string) +} diff --git a/community/modules/file-system/gke-persistent-volume/versions.tf b/community/modules/file-system/gke-persistent-volume/versions.tf new file mode 100644 index 0000000000..a6d43c4c8f --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/versions.tf @@ -0,0 +1,17 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +terraform { + required_version = ">= 1.0" +} From c6f6cd0720dab6026757bdf3c1b95cca0f98bf6b Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 12 Jun 2023 22:45:49 -0700 Subject: [PATCH 43/92] Add outputs to filestore module needed by gke-persistent-volume --- modules/file-system/filestore/README.md | 2 ++ modules/file-system/filestore/outputs.tf | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/modules/file-system/filestore/README.md b/modules/file-system/filestore/README.md index 333122b8b5..74383a5252 100644 --- a/modules/file-system/filestore/README.md +++ b/modules/file-system/filestore/README.md @@ -172,6 +172,8 @@ No modules. | Name | Description | |------|-------------| +| [capacity\_gb](#output\_capacity\_gb) | File share capacity in GiB. | +| [filestore\_id](#output\_filestore\_id) | An identifier for the resource with format `projects/{{project}}/locations/{{location}}/instances/{{name}}` | | [install\_nfs\_client](#output\_install\_nfs\_client) | Script for installing NFS client | | [install\_nfs\_client\_runner](#output\_install\_nfs\_client\_runner) | Runner to install NFS client using the startup-script module | | [mount\_runner](#output\_mount\_runner) | Runner to mount the file-system using an ansible playbook. The startup-script
module will automatically handle installation of ansible.
- id: example-startup-script
source: modules/scripts/startup-script
settings:
runners:
- $(your-fs-id.mount\_runner)
... | diff --git a/modules/file-system/filestore/outputs.tf b/modules/file-system/filestore/outputs.tf index e9a546c316..bd9126798c 100644 --- a/modules/file-system/filestore/outputs.tf +++ b/modules/file-system/filestore/outputs.tf @@ -50,3 +50,13 @@ output "mount_runner" { EOT value = local.mount_runner } + +output "filestore_id" { + description = "An identifier for the resource with format `projects/{{project}}/locations/{{location}}/instances/{{name}}`" + value = google_filestore_instance.filestore_instance.id +} + +output "capacity_gb" { + description = "File share capacity in GiB." + value = google_filestore_instance.filestore_instance.file_shares[0].capacity_gb +} From deaeff69c3d5fff8f66854fc8d1ee1b1bf324b40 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 12 Jun 2023 23:15:38 -0700 Subject: [PATCH 44/92] Add support for filestore in gke-persistent-volume --- .../gke-persistent-volume/README.md | 62 +++++++++++++++++ .../file-system/gke-persistent-volume/main.tf | 68 +++++++++++++++++++ .../gke-persistent-volume/outputs.tf | 10 +++ .../templates/filestore-pv.yaml.tftpl | 23 +++++++ .../templates/filestore-pvc.yaml.tftpl | 17 +++++ .../gke-persistent-volume/variables.tf | 32 +++++++++ .../gke-persistent-volume/versions.tf | 17 +++++ 7 files changed, 229 insertions(+) create mode 100644 community/modules/file-system/gke-persistent-volume/README.md create mode 100644 community/modules/file-system/gke-persistent-volume/templates/filestore-pv.yaml.tftpl create mode 100644 community/modules/file-system/gke-persistent-volume/templates/filestore-pvc.yaml.tftpl diff --git a/community/modules/file-system/gke-persistent-volume/README.md b/community/modules/file-system/gke-persistent-volume/README.md new file mode 100644 index 0000000000..b95628d1af --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/README.md @@ -0,0 +1,62 @@ + +Copyright 2023 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.0 | +| [google](#requirement\_google) | >= 4.42 | +| [kubectl](#requirement\_kubectl) | >= 1.7.0 | +| [local](#requirement\_local) | >= 2.0.0 | + +## Providers + +| Name | Version | +|------|---------| +| [google](#provider\_google) | >= 4.42 | +| [kubectl](#provider\_kubectl) | >= 1.7.0 | +| [local](#provider\_local) | >= 2.0.0 | + +## Modules + +No modules. + +## Resources + +| Name | Type | +|------|------| +| [kubectl_manifest.filestore_pv](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | +| [kubectl_manifest.filestore_pvc](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | +| [local_file.debug_file](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | +| [google_client_config.default](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_config) | data source | +| [google_container_cluster.gke_cluster](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_cluster) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [capacity\_gb](#input\_capacity\_gb) | The storage capacity with which to create the persistent volume. | `number` | n/a | yes | +| [cluster\_id](#input\_cluster\_id) | An identifier for the GKE cluster in the format `projects/{{project}}/locations/{{location}}/clusters/{{cluster}}` | `string` | n/a | yes | +| [filestore\_id](#input\_filestore\_id) | An identifier for a filestore with the format `projects/{{project}}/locations/{{location}}/instances/{{name}}`. | `string` | n/a | yes | +| [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | +| [network\_storage](#input\_network\_storage) | Network attached storage mount to be configured. |
object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
})
| n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [persistent\_volume\_claim](#output\_persistent\_volume\_claim) | An object that describes a k8s PVC created by this module. | + diff --git a/community/modules/file-system/gke-persistent-volume/main.tf b/community/modules/file-system/gke-persistent-volume/main.tf index a04f5f44b0..4cdeeae686 100644 --- a/community/modules/file-system/gke-persistent-volume/main.tf +++ b/community/modules/file-system/gke-persistent-volume/main.tf @@ -18,3 +18,71 @@ locals { # This label allows for billing report tracking based on module. labels = merge(var.labels, { ghpc_module = "gke-persistent-volume" }) } + +locals { + split_filestore_id = var.filestore_id != null ? split("/", var.filestore_id) : [null, null, null, null, null, null] + location = local.split_filestore_id[3] + filestore_name = local.split_filestore_id[5] + filestore_share_name = trimprefix(var.network_storage.remote_mount, "/") + + pv_name = "${local.filestore_name}-pv" + pvc_name = "${local.filestore_name}-pvc" + + filestore_pv_contents = templatefile( + "${path.module}/templates/filestore-pv.yaml.tftpl", + { + pv_name = local.pv_name + capacity = "${var.capacity_gb}Gi" + location = local.location + filestore_name = local.filestore_name + share_name = local.filestore_share_name + ip_address = var.network_storage.server_ip + labels = local.labels + } + ) + + filestore_pvc_contents = templatefile( + "${path.module}/templates/filestore-pvc.yaml.tftpl", + { + pv_name = local.pv_name + capacity = "${var.capacity_gb}Gi" + pvc_name = local.pvc_name + labels = local.labels + } + ) + + split_cluster_id = split("/", var.cluster_id) + cluster_name = local.split_cluster_id[5] + cluster_location = local.split_cluster_id[3] +} + +resource "local_file" "debug_file" { + content = <<-EOF + ${local.filestore_pv_contents} + ${local.filestore_pvc_contents} + EOF + filename = "${path.root}/pv-pvc-debug-file-${local.filestore_name}.yaml" +} + +data "google_container_cluster" "gke_cluster" { + name = local.cluster_name + location = local.cluster_location +} + +data "google_client_config" "default" {} + +provider "kubectl" { + host = "https://${data.google_container_cluster.gke_cluster.endpoint}" + cluster_ca_certificate = base64decode(data.google_container_cluster.gke_cluster.master_auth[0].cluster_ca_certificate) + token = data.google_client_config.default.access_token + load_config_file = false +} + +resource "kubectl_manifest" "filestore_pv" { + yaml_body = local.filestore_pv_contents +} + +resource "kubectl_manifest" "filestore_pvc" { + yaml_body = local.filestore_pvc_contents + depends_on = [kubectl_manifest.filestore_pv] +} diff --git a/community/modules/file-system/gke-persistent-volume/outputs.tf b/community/modules/file-system/gke-persistent-volume/outputs.tf index 3a497ec474..8ba4491695 100644 --- a/community/modules/file-system/gke-persistent-volume/outputs.tf +++ b/community/modules/file-system/gke-persistent-volume/outputs.tf @@ -13,3 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +output "persistent_volume_claim" { + description = "An object that describes a k8s PVC created by this module." + value = { + name = local.pvc_name + mount_path = var.network_storage.local_mount + mount_options = var.network_storage.mount_options + } + depends_on = [kubectl_manifest.filestore_pvc] +} diff --git a/community/modules/file-system/gke-persistent-volume/templates/filestore-pv.yaml.tftpl b/community/modules/file-system/gke-persistent-volume/templates/filestore-pv.yaml.tftpl new file mode 100644 index 0000000000..cfda33978c --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/templates/filestore-pv.yaml.tftpl @@ -0,0 +1,23 @@ +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: ${pv_name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +spec: + storageClassName: "" + capacity: + storage: ${capacity} + accessModes: + - ReadWriteMany + persistentVolumeReclaimPolicy: Retain + volumeMode: Filesystem + csi: + driver: filestore.csi.storage.gke.io + volumeHandle: "modeInstance/${location}/${filestore_name}/${share_name}" + volumeAttributes: + ip: ${ip_address} + volume: ${share_name} diff --git a/community/modules/file-system/gke-persistent-volume/templates/filestore-pvc.yaml.tftpl b/community/modules/file-system/gke-persistent-volume/templates/filestore-pvc.yaml.tftpl new file mode 100644 index 0000000000..5bcd735807 --- /dev/null +++ b/community/modules/file-system/gke-persistent-volume/templates/filestore-pvc.yaml.tftpl @@ -0,0 +1,17 @@ +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: ${pvc_name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +spec: + accessModes: + - ReadWriteMany + storageClassName: "" + volumeName: ${pv_name} + resources: + requests: + storage: ${capacity} diff --git a/community/modules/file-system/gke-persistent-volume/variables.tf b/community/modules/file-system/gke-persistent-volume/variables.tf index cd1a89f639..a7ef4d1d83 100644 --- a/community/modules/file-system/gke-persistent-volume/variables.tf +++ b/community/modules/file-system/gke-persistent-volume/variables.tf @@ -14,6 +14,38 @@ * limitations under the License. */ +variable "cluster_id" { + description = "An identifier for the GKE cluster in the format `projects/{{project}}/locations/{{location}}/clusters/{{cluster}}`" + type = string +} + +variable "network_storage" { + description = "Network attached storage mount to be configured." + type = object({ + server_ip = string, + remote_mount = string, + local_mount = string, + fs_type = string, + mount_options = string, + client_install_runner = map(string) + mount_runner = map(string) + }) +} + +variable "filestore_id" { + description = "An identifier for a filestore with the format `projects/{{project}}/locations/{{location}}/instances/{{name}}`." + type = string + validation { + condition = var.filestore_id == null || length(split("/", var.filestore_id)) == 6 + error_message = "filestore_id must be in the format of 'projects/{{project}}/locations/{{location}}/instances/{{name}}'." + } +} + +variable "capacity_gb" { + description = "The storage capacity with which to create the persistent volume." + type = number +} + variable "labels" { description = "GCE resource labels to be applied to resources. Key-value pairs." type = map(string) diff --git a/community/modules/file-system/gke-persistent-volume/versions.tf b/community/modules/file-system/gke-persistent-volume/versions.tf index a6d43c4c8f..56c950669f 100644 --- a/community/modules/file-system/gke-persistent-volume/versions.tf +++ b/community/modules/file-system/gke-persistent-volume/versions.tf @@ -14,4 +14,21 @@ terraform { required_version = ">= 1.0" + required_providers { + google = { + source = "hashicorp/google" + version = ">= 4.42" + } + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.7.0" + } + local = { + source = "hashicorp/local" + version = ">= 2.0.0" + } + } + provider_meta "google" { + module_name = "blueprints/terraform/hpc-toolkit:gke-persistent-volume/v1.18.0" + } } From c55b8632a303e525594305bf31838dbd27845132 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 14 Jun 2023 14:14:52 -0700 Subject: [PATCH 45/92] Address feedback from #1442 --- community/modules/compute/gke-job-template/README.md | 2 +- community/modules/compute/gke-job-template/main.tf | 2 +- .../gke-job-template/templates/gke-job-base.yaml.tftpl | 4 ++-- .../modules/compute/gke-job-template/variables.tf | 2 +- .../file-system/gke-persistent-volume/README.md | 2 +- .../modules/file-system/gke-persistent-volume/main.tf | 10 ++++------ .../file-system/gke-persistent-volume/outputs.tf | 2 +- .../file-system/gke-persistent-volume/variables.tf | 2 +- 8 files changed, 12 insertions(+), 14 deletions(-) diff --git a/community/modules/compute/gke-job-template/README.md b/community/modules/compute/gke-job-template/README.md index 27d713f6aa..f3a0cfc8ca 100644 --- a/community/modules/compute/gke-job-template/README.md +++ b/community/modules/compute/gke-job-template/README.md @@ -95,7 +95,7 @@ No modules. | [node\_count](#input\_node\_count) | How many nodes the job should run in parallel. | `number` | `1` | no | | [node\_pool\_name](#input\_node\_pool\_name) | A list of node pool names on which to run the job. Can be populated via `use` feild. | `list(string)` | `[]` | no | | [node\_selectors](#input\_node\_selectors) | A list of node selectors to use to place the job. |
list(object({
key = string
value = string
}))
| `[]` | no | -| [persistent\_volume\_claim](#input\_persistent\_volume\_claim) | A list of objects that describes a k8s PVC that is to be used and mounted on the job. Generally supplied by the gke-persistent-volume module. |
list(object({
name = string
mount_path = string
mount_options = string
}))
| `[]` | no | +| [persistent\_volume\_claims](#input\_persistent\_volume\_claims) | A list of objects that describes a k8s PVC that is to be used and mounted on the job. Generally supplied by the gke-persistent-volume module. |
list(object({
name = string
mount_path = string
mount_options = string
}))
| `[]` | no | | [random\_name\_sufix](#input\_random\_name\_sufix) | Appends a random suffix to the job name to avoid clashes. | `bool` | `true` | no | | [requested\_cpu\_per\_pod](#input\_requested\_cpu\_per\_pod) | The requested cpu per pod. If null, allocatable\_cpu\_per\_node will be used to claim whole nodes. If provided will override allocatable\_cpu\_per\_node. | `number` | `-1` | no | | [restart\_policy](#input\_restart\_policy) | Job restart policy. Only a RestartPolicy equal to `Never` or `OnFailure` is allowed. | `string` | `"Never"` | no | diff --git a/community/modules/compute/gke-job-template/main.tf b/community/modules/compute/gke-job-template/main.tf index 9c0256e8fe..b7b719915e 100644 --- a/community/modules/compute/gke-job-template/main.tf +++ b/community/modules/compute/gke-job-template/main.tf @@ -45,7 +45,7 @@ locals { # May come from node pool in future. gpu_limit_string = alltrue(var.has_gpu) ? "1" : null - volumes = [for v in var.persistent_volume_claim : + volumes = [for v in var.persistent_volume_claims : { name = "vol-${v.name}" mount_path = v.mount_path diff --git a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl index 429d6a39c0..8df480e7f2 100644 --- a/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl +++ b/community/modules/compute/gke-job-template/templates/gke-job-base.yaml.tftpl @@ -58,14 +58,14 @@ spec: cpu: ${cpu_request} %{~ endif ~} %{~ endif ~} - %{~ if length(volumes) != 0 ~} + %{~ if length(volumes) > 0 ~} volumeMounts: %{~ for v in volumes ~} - name: ${v.name} mountPath: ${v.mount_path} %{~ endfor ~} %{~ endif ~} - %{~ if length(volumes) != 0 ~} + %{~ if length(volumes) > 0 ~} volumes: %{~ for v in volumes ~} - name: ${v.name} diff --git a/community/modules/compute/gke-job-template/variables.tf b/community/modules/compute/gke-job-template/variables.tf index 4135449a3c..55fac4fee7 100644 --- a/community/modules/compute/gke-job-template/variables.tf +++ b/community/modules/compute/gke-job-template/variables.tf @@ -113,7 +113,7 @@ variable "random_name_sufix" { default = true } -variable "persistent_volume_claim" { +variable "persistent_volume_claims" { description = "A list of objects that describes a k8s PVC that is to be used and mounted on the job. Generally supplied by the gke-persistent-volume module." type = list(object({ name = string diff --git a/community/modules/file-system/gke-persistent-volume/README.md b/community/modules/file-system/gke-persistent-volume/README.md index b95628d1af..c5c6d820e4 100644 --- a/community/modules/file-system/gke-persistent-volume/README.md +++ b/community/modules/file-system/gke-persistent-volume/README.md @@ -58,5 +58,5 @@ No modules. | Name | Description | |------|-------------| -| [persistent\_volume\_claim](#output\_persistent\_volume\_claim) | An object that describes a k8s PVC created by this module. | +| [persistent\_volume\_claims](#output\_persistent\_volume\_claims) | An object that describes a k8s PVC created by this module. | diff --git a/community/modules/file-system/gke-persistent-volume/main.tf b/community/modules/file-system/gke-persistent-volume/main.tf index 4cdeeae686..9e09d691f7 100644 --- a/community/modules/file-system/gke-persistent-volume/main.tf +++ b/community/modules/file-system/gke-persistent-volume/main.tf @@ -20,9 +20,8 @@ locals { } locals { - split_filestore_id = var.filestore_id != null ? split("/", var.filestore_id) : [null, null, null, null, null, null] - location = local.split_filestore_id[3] - filestore_name = local.split_filestore_id[5] + location = split("/", var.filestore_id)[3] + filestore_name = split("/", var.filestore_id)[5] filestore_share_name = trimprefix(var.network_storage.remote_mount, "/") pv_name = "${local.filestore_name}-pv" @@ -51,9 +50,8 @@ locals { } ) - split_cluster_id = split("/", var.cluster_id) - cluster_name = local.split_cluster_id[5] - cluster_location = local.split_cluster_id[3] + cluster_name = split("/", var.cluster_id)[5] + cluster_location = split("/", var.cluster_id)[3] } resource "local_file" "debug_file" { diff --git a/community/modules/file-system/gke-persistent-volume/outputs.tf b/community/modules/file-system/gke-persistent-volume/outputs.tf index 8ba4491695..6cdbf388a3 100644 --- a/community/modules/file-system/gke-persistent-volume/outputs.tf +++ b/community/modules/file-system/gke-persistent-volume/outputs.tf @@ -14,7 +14,7 @@ * limitations under the License. */ -output "persistent_volume_claim" { +output "persistent_volume_claims" { description = "An object that describes a k8s PVC created by this module." value = { name = local.pvc_name diff --git a/community/modules/file-system/gke-persistent-volume/variables.tf b/community/modules/file-system/gke-persistent-volume/variables.tf index a7ef4d1d83..235affff0d 100644 --- a/community/modules/file-system/gke-persistent-volume/variables.tf +++ b/community/modules/file-system/gke-persistent-volume/variables.tf @@ -36,7 +36,7 @@ variable "filestore_id" { description = "An identifier for a filestore with the format `projects/{{project}}/locations/{{location}}/instances/{{name}}`." type = string validation { - condition = var.filestore_id == null || length(split("/", var.filestore_id)) == 6 + condition = length(split("/", var.filestore_id)) == 6 error_message = "filestore_id must be in the format of 'projects/{{project}}/locations/{{location}}/instances/{{name}}'." } } From 5d59fa789820a4a16e23c4db6ddace46cd178f62 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 19 Jun 2023 22:47:33 -0700 Subject: [PATCH 46/92] Add documentation for gke-persistent-volume-module --- .../gke-persistent-volume/README.md | 70 +++++++++++++++++++ modules/README.md | 2 + 2 files changed, 72 insertions(+) diff --git a/community/modules/file-system/gke-persistent-volume/README.md b/community/modules/file-system/gke-persistent-volume/README.md index c5c6d820e4..d4810a5940 100644 --- a/community/modules/file-system/gke-persistent-volume/README.md +++ b/community/modules/file-system/gke-persistent-volume/README.md @@ -1,3 +1,73 @@ +## Description + +This module creates Kubernetes Persistent Volumes (PV) and Persistent Volume +Claims (PVC) that can be used by a [gke-job-template]. + +Currently, the `gke-persistent-volume` module only works with Filestore. Each +`gke-persistent-volume` can only use a single Filestore. If multiple Filestores +are used then multiple `gke-persistent-volume` modules are needed. + +> **_NOTE:_** This is an experimental module and the functionality and +> documentation will likely be updated in the near future. This module has only +> been tested in limited capacity. + +### Example + +The following example creates a Filestore and then uses the +`gke-persistent-volume` module to use the Filestore as shared storage in a +`gke-job-template`. + +```yaml + - id: gke_cluster + source: community/modules/scheduler/gke-cluster + use: [network1] + settings: + master_authorized_networks: + - display_name: deployment-machine + cidr_block: /32 + + - id: datafs + source: modules/file-system/filestore + use: [network1] + settings: { local_mount: /data } + + - id: datafs-pv + source: community/modules/file-system/gke-persistent-volume + use: [datafs, gke_cluster] + + - id: job-template + source: community/modules/compute/gke-job-template + use: [datafs-pv, compute_pool] +``` + +### Authorized Network + +Since the `gke-persistent-volume` module is making calls to the Kubernetes API +to create Kubernetes entities, the machine performing the deployment must be +authorized to connect to the Kubernetes API. You can add the +`master_authorized_networks` settings block, as shown in the example above, with +the IP address of the machine performing the deployment. This will ensure that +the deploying machine can connect to the cluster. + +### Connecting Via Use + +The diagram below shows the valid `use` relationships for the GKE HPC Toolkit +modules. For example the `gke-persistent-volume` module can `use` a +`gke-cluster` module and a `filestore` module, as shown in the example above. + +```mermaid +graph TD; + vpc-->|OneToMany|gke-cluster; + gke-cluster-->|OneToMany|gke-node-pool; + gke-node-pool-->|ManyToMany|gke-job-template; + gke-cluster-->|OneToMany|gke-persistent-volume; + gke-persistent-volume-->|ManyToMany|gke-job-template; + vpc-->|OneToMany|filestore; + filestore-->|OneToOne|gke-persistent-volume; +``` + +## License + Copyright 2023 Google LLC diff --git a/modules/README.md b/modules/README.md index 931a210756..7fe1aad833 100644 --- a/modules/README.md +++ b/modules/README.md @@ -80,6 +80,7 @@ Modules that are still in development and less stable are labeled with the * **[Intel-DAOS]** ![community-badge] : Creates a [DAOS](https://docs.daos.io/) file system. * **[cloud-storage-bucket]** ![community-badge] ![experimental-badge] : Creates a Google Cloud Storage (GCS) bucket. +* **[gke-persistent-volume]** ![community-badge] ![experimental-badge] : Creates persistent volumes and persistent volume claims for shared storage. * **[nfs-server]** ![community-badge] ![experimental-badge] : Creates a VM and configures an NFS server that can be mounted by other VM. @@ -89,6 +90,7 @@ Modules that are still in development and less stable are labeled with the [intel-daos]: ../community/modules/file-system/Intel-DAOS/README.md [nfs-server]: ../community/modules/file-system/nfs-server/README.md [cloud-storage-bucket]: ../community/modules/file-system/cloud-storage-bucket/README.md +[gke-persistent-volume]: ../community/modules/file-system/gke-persistent-volume/README.md ### Monitoring From 1df86bf941c8309eb1a072331c2ee478bd4fb1fc Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Fri, 16 Jun 2023 19:25:42 -0500 Subject: [PATCH 47/92] Fix capitalization of error messages --- pkg/modulereader/resreader.go | 2 +- pkg/modulereader/resreader_test.go | 6 +++--- pkg/sourcereader/embedded.go | 2 +- pkg/sourcereader/embedded_test.go | 2 +- pkg/sourcereader/git.go | 2 +- pkg/sourcereader/git_test.go | 2 +- pkg/sourcereader/local.go | 2 +- pkg/sourcereader/local_test.go | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/modulereader/resreader.go b/pkg/modulereader/resreader.go index 6b11fc7e3c..80fb4b238e 100644 --- a/pkg/modulereader/resreader.go +++ b/pkg/modulereader/resreader.go @@ -145,7 +145,7 @@ func GetModuleInfo(source string, kind string) (ModuleInfo, error) { modPath = source default: - return ModuleInfo{}, fmt.Errorf("Source is not valid: %s", source) + return ModuleInfo{}, fmt.Errorf("source is not valid: %s", source) } reader := Factory(kind) diff --git a/pkg/modulereader/resreader_test.go b/pkg/modulereader/resreader_test.go index 6aeae64127..16dcaeecb8 100644 --- a/pkg/modulereader/resreader_test.go +++ b/pkg/modulereader/resreader_test.go @@ -112,7 +112,7 @@ func (s *MySuite) TestGetModuleInfo_Embedded(c *C) { // Invalid: Unsupported Module Source badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" moduleInfo, err = GetModuleInfo(badSource, tfKindString) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } @@ -127,7 +127,7 @@ func (s *MySuite) TestGetModuleInfo_Git(c *C) { // Invalid: Unsupported Module Source badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" _, err = GetModuleInfo(badSource, tfKindString) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } @@ -148,7 +148,7 @@ func (s *MySuite) TestGetModuleInfo_Local(c *C) { // Invalid: Unsupported Module Source badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" moduleInfo, err = GetModuleInfo(badSource, tfKindString) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } diff --git a/pkg/sourcereader/embedded.go b/pkg/sourcereader/embedded.go index 920f198911..e681755950 100644 --- a/pkg/sourcereader/embedded.go +++ b/pkg/sourcereader/embedded.go @@ -103,7 +103,7 @@ func (r EmbeddedSourceReader) GetModule(modPath string, copyPath string) error { return fmt.Errorf("embedded file system is not initialized") } if !IsEmbeddedPath(modPath) { - return fmt.Errorf("Source is not valid: %s", modPath) + return fmt.Errorf("source is not valid: %s", modPath) } modDir, err := copyFSToTempDir(ModuleFS, modPath) diff --git a/pkg/sourcereader/embedded_test.go b/pkg/sourcereader/embedded_test.go index 003384d24e..3267e3921d 100644 --- a/pkg/sourcereader/embedded_test.go +++ b/pkg/sourcereader/embedded_test.go @@ -153,7 +153,7 @@ func (s *MySuite) TestGetModule_Embedded(c *C) { // Invalid: Unsupported Module Source by EmbeddedSourceReader badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" err = reader.GetModule(badSource, dest) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } diff --git a/pkg/sourcereader/git.go b/pkg/sourcereader/git.go index 2f3643c188..dcc1ef06a9 100644 --- a/pkg/sourcereader/git.go +++ b/pkg/sourcereader/git.go @@ -59,7 +59,7 @@ func copyGitModules(srcPath string, destPath string) error { // GetModule copies the git source to a provided destination (the deployment directory) func (r GitSourceReader) GetModule(modPath string, copyPath string) error { if !IsGitPath(modPath) { - return fmt.Errorf("Source is not valid: %s", modPath) + return fmt.Errorf("source is not valid: %s", modPath) } modDir, err := ioutil.TempDir("", "git-module-*") diff --git a/pkg/sourcereader/git_test.go b/pkg/sourcereader/git_test.go index 0dd321e0f8..4160c6fa72 100644 --- a/pkg/sourcereader/git_test.go +++ b/pkg/sourcereader/git_test.go @@ -62,6 +62,6 @@ func (s *MySuite) TestGetModule_Git(c *C) { // Invalid: Unsupported Module Source badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" err = reader.GetModule(badSource, tfKindString) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } diff --git a/pkg/sourcereader/local.go b/pkg/sourcereader/local.go index ee68b369da..a8744cb6f5 100644 --- a/pkg/sourcereader/local.go +++ b/pkg/sourcereader/local.go @@ -25,7 +25,7 @@ type LocalSourceReader struct{} // GetModule copies the local source to a provided destination (the deployment directory) func (r LocalSourceReader) GetModule(modPath string, copyPath string) error { if !IsLocalPath(modPath) { - return fmt.Errorf("Source is not valid: %s", modPath) + return fmt.Errorf("source is not valid: %s", modPath) } if _, err := os.Stat(modPath); os.IsNotExist(err) { diff --git a/pkg/sourcereader/local_test.go b/pkg/sourcereader/local_test.go index 4e6f9d0b38..3bc1330913 100644 --- a/pkg/sourcereader/local_test.go +++ b/pkg/sourcereader/local_test.go @@ -122,6 +122,6 @@ func (s *MySuite) TestGetModule_Local(c *C) { // Invalid: Unsupported Module Source by LocalSourceReader badSource := "gcs::https://www.googleapis.com/storage/v1/GoogleCloudPlatform/hpc-toolkit/modules" err = reader.GetModule(badSource, dest) - expectedErr = "Source is not valid: .*" + expectedErr = "source is not valid: .*" c.Assert(err, ErrorMatches, expectedErr) } From c903f306c56c3630268fb3792861602231e1610f Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Fri, 16 Jun 2023 19:25:43 -0500 Subject: [PATCH 48/92] Remove unused test func --- pkg/modulereader/resreader_test.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pkg/modulereader/resreader_test.go b/pkg/modulereader/resreader_test.go index 16dcaeecb8..e9b17d2240 100644 --- a/pkg/modulereader/resreader_test.go +++ b/pkg/modulereader/resreader_test.go @@ -24,7 +24,6 @@ import ( "reflect" "testing" - "github.com/spf13/afero" . "gopkg.in/check.v1" "gopkg.in/yaml.v3" ) @@ -152,15 +151,6 @@ func (s *MySuite) TestGetModuleInfo_Local(c *C) { c.Assert(err, ErrorMatches, expectedErr) } -// hcl_utils.go -func getTestFS() afero.IOFS { - aferoFS := afero.NewMemMapFs() - aferoFS.MkdirAll("modules/network/vpc", 0755) - afero.WriteFile( - aferoFS, "modules/network/vpc/main.tf", []byte(testMainTf), 0644) - return afero.NewIOFS(aferoFS) -} - func (s *MySuite) TestGetHCLInfo(c *C) { // Invalid source path - path does not exists fakePath := "./not/a/real/path" From 3366a2d253620e9739aba86ab63f19c349045ed2 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 20 Jun 2023 12:10:41 -0700 Subject: [PATCH 49/92] Relax `TestNetworkStorage` to accomodate `gke-persistent-volume` (#1483) --- pkg/inspect/modules_test.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pkg/inspect/modules_test.go b/pkg/inspect/modules_test.go index 6905f91be3..758d6abea6 100644 --- a/pkg/inspect/modules_test.go +++ b/pkg/inspect/modules_test.go @@ -15,6 +15,7 @@ package inspect_test import ( + "fmt" "hpc-toolkit/pkg/inspect" "hpc-toolkit/pkg/modulereader" "log" @@ -147,7 +148,7 @@ func TestLabelsType(t *testing.T) { } func TestNetworkStorage(t *testing.T) { - expected := `list(object({ + obj := modulereader.NormalizeType(`object({ server_ip = string remote_mount = string local_mount = string @@ -155,12 +156,15 @@ func TestNetworkStorage(t *testing.T) { mount_options = string client_install_runner = map(string) mount_runner = map(string) - }))` - for _, mod := range notEmpty(query(hasInput("network_storage")), t) { - checkInputType(t, mod, "network_storage", expected) - } + })`) + lst := modulereader.NormalizeType(fmt.Sprintf("list(%s)", obj)) - for _, mod := range query(all(ofRole("file-system"), not(hasOutput("network_storage")))) { - t.Errorf("%q does not output 'network_storage'", mod.Source) + for _, mod := range notEmpty(query(hasInput("network_storage")), t) { + i, _ := mod.Input("network_storage") + got := modulereader.NormalizeType(i.Type) + if got != obj && got != lst { + t.Errorf("%s `network_storage` has unexpected type expected:\n%#v\nor\n%#v\ngot:\n%#v", + mod.Source, obj, lst, got) + } } } From 1761f4e354fc47aba94347d04419a1feb8232870 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 20 Jun 2023 13:08:58 -0700 Subject: [PATCH 50/92] Deprecated `WrapSettingsWith` (#1466) * Don't use `WrapSettingsWith` for `labels` * Construct `merge` expression instead of using `WrapSettingsWith`; * Move `TokensForValue` into `pkg/config`; * Add `modulewriter_test` to compensate for missed coverage. * Mark `WrapSettingsWith` as deprecated, remove any usage; * Change `addListValue` to construct an expression; * Add support for multi-module `ProductOfModuleUse`; * Add "golden copy" test for `flatten` and `merge` usage. --- pkg/config/config.go | 60 ++++++--- pkg/config/config_test.go | 11 +- pkg/config/expand.go | 126 +++++++++--------- pkg/config/expand_test.go | 65 +++++---- pkg/config/expression.go | 73 ++++++++++ pkg/config/expression_test.go | 43 ++++++ pkg/modulewriter/hcl_utils.go | 64 +-------- pkg/modulewriter/hcl_utils_test.go | 44 ------ pkg/modulewriter/modulewriter_test.go | 43 +++--- pkg/modulewriter/tfwriter.go | 40 +----- .../golden_copies/configs/merge_flatten.yaml | 49 +++++++ .../.ghpc/artifacts/expanded_blueprint.yaml | 33 ++--- .../.ghpc/artifacts/expanded_blueprint.yaml | 11 +- .../artifacts/DO_NOT_MODIFY_THIS_DIRECTORY | 1 + .../.ghpc/artifacts/expanded_blueprint.yaml | 113 ++++++++++++++++ .../merge_flatten/instructions.txt | 23 ++++ .../expectations/merge_flatten/zero/main.tf | 74 ++++++++++ .../merge_flatten/zero/providers.tf | 27 ++++ .../merge_flatten/zero/terraform.tfvars | 28 ++++ .../merge_flatten/zero/variables.tf | 40 ++++++ .../merge_flatten/zero/versions.tf | 30 +++++ 21 files changed, 677 insertions(+), 321 deletions(-) create mode 100644 tools/validate_configs/golden_copies/configs/merge_flatten.yaml create mode 100644 tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/DO_NOT_MODIFY_THIS_DIRECTORY create mode 100644 tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml create mode 100644 tools/validate_configs/golden_copies/expectations/merge_flatten/instructions.txt create mode 100644 tools/validate_configs/golden_copies/expectations/merge_flatten/zero/main.tf create mode 100644 tools/validate_configs/golden_copies/expectations/merge_flatten/zero/providers.tf create mode 100644 tools/validate_configs/golden_copies/expectations/merge_flatten/zero/terraform.tfvars create mode 100644 tools/validate_configs/golden_copies/expectations/merge_flatten/zero/variables.tf create mode 100644 tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf diff --git a/pkg/config/config.go b/pkg/config/config.go index 7eb3590eb0..7d5adbd89b 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -290,23 +290,15 @@ type ModuleID string // Module stores YAML definition of an HPC cluster component defined in a blueprint type Module struct { - Source string - Kind ModuleKind - ID ModuleID - Use []ModuleID `yaml:"use,omitempty"` - WrapSettingsWith map[string][]string `yaml:"wrapsettingswith,omitempty"` - Outputs []modulereader.OutputInfo `yaml:"outputs,omitempty"` - Settings Dict `yaml:"settings,omitempty"` + Source string + Kind ModuleKind + ID ModuleID + Use []ModuleID `yaml:"use,omitempty"` + Outputs []modulereader.OutputInfo `yaml:"outputs,omitempty"` + Settings Dict `yaml:"settings,omitempty"` // DEPRECATED fields, keep in the struct for backwards compatibility - RequiredApis interface{} `yaml:"required_apis,omitempty"` -} - -// createWrapSettingsWith ensures WrapSettingsWith field is not nil, if it is -// a new map is created. -func (m *Module) createWrapSettingsWith() { - if m.WrapSettingsWith == nil { - m.WrapSettingsWith = make(map[string][]string) - } + RequiredApis interface{} `yaml:"required_apis,omitempty"` + WrapSettingsWith interface{} `yaml:"wrapsettingswith,omitempty"` } // InfoOrDie returns the ModuleInfo for the module or panics @@ -369,8 +361,8 @@ func (m Module) listUnusedModules() []ModuleID { used := map[ModuleID]bool{} // Recurse through objects/maps/lists checking each element for having `ProductOfModuleUse` mark. cty.Walk(m.Settings.AsObject(), func(p cty.Path, v cty.Value) (bool, error) { - if mark, has := HasMark[ProductOfModuleUse](v); has { - used[mark.Module] = true + for _, mod := range IsProductOfModuleUse(v) { + used[mod] = true } return true, nil }) @@ -748,10 +740,34 @@ func (bp *Blueprint) checkBlueprintName() error { return nil } -// ProductOfModuleUse is a "mark" applied to values in Module.Settings if -// this value was modified as a result of applying `use`. -type ProductOfModuleUse struct { - Module ModuleID +// productOfModuleUseMark is a "mark" applied to values that are result of `use`. +// Should not be used directly, use AsProductOfModuleUse and IsProductOfModuleUse instead. +type productOfModuleUseMark struct { + mods string +} + +// AsProductOfModuleUse marks a value as a result of `use` of given modules. +func AsProductOfModuleUse(v cty.Value, mods ...ModuleID) cty.Value { + s := make([]string, len(mods)) + for i, m := range mods { + s[i] = string(m) + } + return v.Mark(productOfModuleUseMark{strings.Join(s, ",")}) +} + +// IsProductOfModuleUse returns list of modules that contributed (by `use`) to this value. +func IsProductOfModuleUse(v cty.Value) []ModuleID { + mark, marked := HasMark[productOfModuleUseMark](v) + if !marked { + return []ModuleID{} + } + + s := strings.Split(mark.mods, ",") + mods := make([]ModuleID, len(s)) + for i, m := range s { + mods[i] = ModuleID(m) + } + return mods } // WalkModules walks all modules in the blueprint and calls the walker function diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 59b8d7fe46..b960c16b29 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -61,10 +61,9 @@ deployment_groups: `) testModules = []Module{ { - Source: "./modules/network/vpc", - Kind: TerraformKind, - ID: "vpc", - WrapSettingsWith: make(map[string][]string), + Source: "./modules/network/vpc", + Kind: TerraformKind, + ID: "vpc", Settings: NewDict(map[string]cty.Value{ "network_name": cty.StringVal("$\"${var.deployment_name}_net\""), "project_id": cty.StringVal("project_name"), @@ -428,7 +427,7 @@ func (s *MySuite) TestListUnusedModules(c *C) { ID: "m", Use: []ModuleID{"w"}, Settings: NewDict(map[string]cty.Value{ - "x": cty.True.Mark(ProductOfModuleUse{"w"})})} + "x": AsProductOfModuleUse(cty.True, "w")})} c.Check(m.listUnusedModules(), DeepEquals, []ModuleID{}) } @@ -437,7 +436,7 @@ func (s *MySuite) TestListUnusedModules(c *C) { ID: "m", Use: []ModuleID{"w", "u"}, Settings: NewDict(map[string]cty.Value{ - "x": cty.True.Mark(ProductOfModuleUse{"w"})})} + "x": AsProductOfModuleUse(cty.True, "w")})} c.Check(m.listUnusedModules(), DeepEquals, []ModuleID{"u"}) } } diff --git a/pkg/config/expand.go b/pkg/config/expand.go index fe8344bf46..aa43488a88 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -112,22 +112,24 @@ func getModuleInputMap(inputs []modulereader.VarInfo) map[string]string { // initialize a Toolkit setting that corresponds to a module input of type list // create new list if unset, append if already set, error if value not a list -func (mod *Module) addListValue(settingName string, value cty.Value) error { - var cur []cty.Value - if !mod.Settings.Has(settingName) { - mod.createWrapSettingsWith() - mod.WrapSettingsWith[settingName] = []string{"flatten([", "])"} - cur = []cty.Value{} - } else { - v := mod.Settings.Get(settingName) - ty := v.Type() - if !ty.IsTupleType() && !ty.IsSetType() && !ty.IsSetType() { - return fmt.Errorf("%s: module %s, setting %s", errorMessages["appendToNonList"], mod.ID, settingName) +func (mod *Module) addListValue(settingName string, value cty.Value) { + args := []cty.Value{value} + mods := map[ModuleID]bool{} + for _, mod := range IsProductOfModuleUse(value) { + mods[mod] = true + } + + if mod.Settings.Has(settingName) { + cur := mod.Settings.Get(settingName) + for _, mod := range IsProductOfModuleUse(cur) { + mods[mod] = true } - cur = mod.Settings.Get(settingName).AsValueSlice() + args = append(args, cur) } - mod.Settings.Set(settingName, cty.TupleVal(append(cur, value))) - return nil + + exp := FunctionCallExpression("flatten", cty.TupleVal(args)) + val := AsProductOfModuleUse(exp.AsValue(), maps.Keys(mods)...) + mod.Settings.Set(settingName, val) } // useModule matches input variables in a "using" module to output values @@ -169,17 +171,14 @@ func useModule( continue } - v := ModuleRef(useMod.ID, settingName). - AsExpression(). - AsValue(). - Mark(ProductOfModuleUse{Module: useMod.ID}) + v := AsProductOfModuleUse( + ModuleRef(useMod.ID, settingName).AsExpression().AsValue(), + useMod.ID) if !isList { mod.Settings.Set(settingName, v) } else { - if err := mod.addListValue(settingName, v); err != nil { - return err - } + mod.addListValue(settingName, v) } } return nil @@ -238,7 +237,7 @@ func (dc *DeploymentConfig) combineLabels() error { if !vars.Has(labels) { // Shouldn't happen if blueprint was properly constructed vars.Set(labels, cty.EmptyObjectVal) } - gl := mergeLabels(vars.Get(labels).AsValueMap(), defaults) + gl := mergeMaps(defaults, vars.Get(labels).AsValueMap()) vars.Set(labels, cty.ObjectVal(gl)) return dc.Config.WalkModules(func(mod *Module) error { @@ -247,60 +246,63 @@ func (dc *DeploymentConfig) combineLabels() error { } func combineModuleLabels(mod *Module, dc DeploymentConfig) error { - mod.createWrapSettingsWith() labels := "labels" - // previously expanded blueprint, user written BPs do not use `WrapSettingsWith` - if _, ok := mod.WrapSettingsWith[labels]; ok { - return nil // Do nothing + if !moduleHasInput(*mod, labels) { + return nil // no op } - // Check if labels are set for this module - if !moduleHasInput(*mod, labels) { - return nil + cur := mod.Settings.Get(labels) + extra := map[string]cty.Value{roleLabel: cty.StringVal(getRole(mod.Source))} + + if mod.Kind == TerraformKind { + mod.Settings.Set(labels, mergeLabelsTf(extra, cur)) + } else if mod.Kind == PackerKind { + gl := dc.Config.Vars.Get(labels).AsValueMap() + merged, err := mergeLabelsPkr(gl, extra, cur) + if err != nil { + return err + } + mod.Settings.Set(labels, merged) } + return nil +} +// Terraform labels are `merge(var.labels, {ghpc_role="foo"}, [module labels])` +func mergeLabelsTf(extra map[string]cty.Value, cur cty.Value) cty.Value { + args := []cty.Value{ + GlobalRef("labels").AsExpression().AsValue(), + cty.ObjectVal(extra), + } + if !cur.IsNull() { + args = append(args, cur) + } + return FunctionCallExpression("merge", args...).AsValue() +} + +// Packer doesn't support `merge`, so merge it here. +func mergeLabelsPkr(global map[string]cty.Value, extra map[string]cty.Value, cur cty.Value) (cty.Value, error) { modLabels := map[string]cty.Value{} - if mod.Settings.Has(labels) { - // Cast into map so we can index into them - v := mod.Settings.Get(labels) - ty := v.Type() + if !cur.IsNull() { + ty := cur.Type() if !ty.IsObjectType() && !ty.IsMapType() { - return fmt.Errorf("%s, Module %s, labels type: %s", - errorMessages["settingsLabelType"], mod.ID, ty.FriendlyName()) + return cty.NilVal, fmt.Errorf("%s,labels type: %s", errorMessages["settingsLabelType"], ty.FriendlyName()) } - if v.AsValueMap() != nil { - modLabels = v.AsValueMap() + if cur.AsValueMap() != nil { + modLabels = cur.AsValueMap() } } - // Add the role (e.g. compute, network, etc) - if _, exists := modLabels[roleLabel]; !exists { - modLabels[roleLabel] = cty.StringVal(getRole(mod.Source)) - } - - if mod.Kind == TerraformKind { - // Terraform module labels to be expressed as - // `merge(var.labels, { ghpc_role=..., **settings.labels })` - mod.WrapSettingsWith[labels] = []string{"merge(", ")"} - ref := GlobalRef(labels).AsExpression() - args := []cty.Value{ref.AsValue(), cty.ObjectVal(modLabels)} - mod.Settings.Set(labels, cty.TupleVal(args)) - } else if mod.Kind == PackerKind { - g := dc.Config.Vars.Get(labels).AsValueMap() - mod.Settings.Set(labels, cty.ObjectVal(mergeLabels(modLabels, g))) - } - return nil + return cty.ObjectVal(mergeMaps(global, extra, modLabels)), nil } -// mergeLabels returns a new map with the keys from both maps. If a key exists in both maps, -// the value from the first map is used. -func mergeLabels(a map[string]cty.Value, b map[string]cty.Value) map[string]cty.Value { +// mergeMaps takes an arbitrary number of maps, and returns a single map that contains +// a merged set of elements from all arguments. +// If more than one given map defines the same key, then the one that is later in the argument sequence takes precedence. +// See https://developer.hashicorp.com/terraform/language/functions/merge +func mergeMaps(ms ...map[string]cty.Value) map[string]cty.Value { r := map[string]cty.Value{} - for k, v := range a { - r[k] = v - } - for k, v := range b { - if _, exists := a[k]; !exists { + for _, m := range ms { + for k, v := range m { r[k] = v } } diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 513f59c770..1d64fba8a7 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -62,18 +62,16 @@ func (s *MySuite) TestAddListValue(c *C) { mod := Module{ID: "TestModule"} setting := "newSetting" - nonListSetting := "not-a-list" - first := cty.StringVal("value1") - second := cty.StringVal("value2") + first := AsProductOfModuleUse(cty.StringVal("value1"), "mod1") + second := AsProductOfModuleUse(cty.StringVal("value2"), "mod2") - c.Assert(mod.addListValue(setting, first), IsNil) - c.Check(mod.Settings.Get(setting), DeepEquals, cty.TupleVal([]cty.Value{first})) + mod.addListValue(setting, first) + c.Check(mod.Settings.Get(setting), DeepEquals, + AsProductOfModuleUse(MustParseExpression(`flatten(["value1"])`).AsValue(), "mod1")) - c.Assert(mod.addListValue(setting, second), IsNil) - c.Check(mod.Settings.Get(setting), DeepEquals, cty.TupleVal([]cty.Value{first, second})) - - mod.Settings.Set(nonListSetting, cty.StringVal("string-value")) - c.Assert(mod.addListValue(nonListSetting, second), NotNil) + mod.addListValue(setting, second) + c.Check(mod.Settings.Get(setting), DeepEquals, + AsProductOfModuleUse(MustParseExpression(`flatten(["value2", flatten(["value1"])])`).AsValue(), "mod2", "mod1")) } func (s *MySuite) TestUseModule(c *C) { @@ -87,7 +85,6 @@ func (s *MySuite) TestUseModule(c *C) { Type: "number", } ref := ModuleRef("UsedModule", "val1").AsExpression().AsValue() - useMark := ProductOfModuleUse{"UsedModule"} { // Pass: No Inputs, No Outputs mod := Module{ID: "lime", Source: "modSource"} @@ -124,7 +121,7 @@ func (s *MySuite) TestUseModule(c *C) { err := useModule(&mod, usedMod, []string{}) c.Check(err, IsNil) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ - "val1": ref.Mark(useMark), + "val1": AsProductOfModuleUse(ref, "UsedModule"), }) } @@ -146,7 +143,7 @@ func (s *MySuite) TestUseModule(c *C) { { // Pass: re-apply used modules, should be a no-op // Assume no settings were in blueprint mod := Module{ID: "lime", Source: "limeTree"} - mod.Settings.Set("val1", ref.Mark(useMark)) + mod.Settings.Set("val1", AsProductOfModuleUse(ref, "UsedModule")) setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{varInfoNumber}, }) @@ -156,7 +153,8 @@ func (s *MySuite) TestUseModule(c *C) { err := useModule(&mod, usedMod, []string{}) c.Check(err, IsNil) - c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{"val1": ref.Mark(useMark)}) + c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ + "val1": AsProductOfModuleUse(ref, "UsedModule")}) } { // Pass: Single Input/Output match, input is list, not already set @@ -170,9 +168,9 @@ func (s *MySuite) TestUseModule(c *C) { err := useModule(&mod, usedMod, []string{}) c.Check(err, IsNil) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ - "val1": cty.TupleVal([]cty.Value{ - ref.Mark(useMark), - })}) + "val1": AsProductOfModuleUse( + MustParseExpression(`flatten([module.UsedModule.val1])`).AsValue(), + "UsedModule")}) } { // Pass: Setting exists, Input is List, Output is not a list @@ -189,10 +187,9 @@ func (s *MySuite) TestUseModule(c *C) { err := useModule(&mod, usedMod, []string{}) c.Check(err, IsNil) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ - "val1": cty.TupleVal([]cty.Value{ - ref, - ref.Mark(useMark), - })}) + "val1": AsProductOfModuleUse( + MustParseExpression(`flatten([module.UsedModule.val1,[module.UsedModule.val1]])`).AsValue(), + "UsedModule")}) } { // Pass: Setting exists, Input is List, Output is not a list @@ -255,8 +252,7 @@ func (s *MySuite) TestApplyUseModules(c *C) { c.Assert(dc.applyUseModules(), IsNil) ref := ModuleRef("TestModule0", "test_inter_0").AsExpression().AsValue() c.Assert(m.Settings.Items(), DeepEquals, map[string]cty.Value{ - "test_inter_0": ref.Mark(ProductOfModuleUse{"TestModule0"}), - }) + "test_inter_0": AsProductOfModuleUse(ref, "TestModule0")}) } { // Deliberately break the match and see that no settings are added @@ -329,31 +325,30 @@ func (s *MySuite) TestCombineLabels(c *C) { lime := dc.Config.DeploymentGroups[0] // Labels are set and override role coral = lime.Modules[0] - c.Check(coral.WrapSettingsWith["labels"], DeepEquals, []string{"merge(", ")"}) - c.Check(coral.Settings.Get("labels"), DeepEquals, cty.TupleVal([]cty.Value{ + c.Check(coral.Settings.Get("labels"), DeepEquals, FunctionCallExpression( + "merge", labelsRef, cty.ObjectVal(map[string]cty.Value{ - "magenta": cty.StringVal("orchid"), + "ghpc_role": cty.StringVal("blue")}), + cty.ObjectVal(map[string]cty.Value{ "ghpc_role": cty.StringVal("maroon"), - }), - })) + "magenta": cty.StringVal("orchid")}), + ).AsValue()) + // Labels are not set, infer role from module.source khaki = lime.Modules[1] - c.Check(khaki.WrapSettingsWith["labels"], DeepEquals, []string{"merge(", ")"}) - c.Check(khaki.Settings.Get("labels"), DeepEquals, cty.TupleVal([]cty.Value{ + c.Check(khaki.Settings.Get("labels"), DeepEquals, FunctionCallExpression( + "merge", labelsRef, - cty.ObjectVal(map[string]cty.Value{ - "ghpc_role": cty.StringVal("brown")}), - })) + cty.ObjectVal(map[string]cty.Value{"ghpc_role": cty.StringVal("brown")}), + ).AsValue()) // No labels input silver = lime.Modules[2] - c.Check(silver.WrapSettingsWith["labels"], IsNil) c.Check(silver.Settings.Get("labels"), DeepEquals, cty.NilVal) // Packer, include global include explicitly // Keep overridden ghpc_deployment=navy orange = dc.Config.DeploymentGroups[1].Modules[0] - c.Check(orange.WrapSettingsWith["labels"], IsNil) c.Check(orange.Settings.Get("labels"), DeepEquals, cty.ObjectVal(map[string]cty.Value{ "ghpc_blueprint": cty.StringVal("simple"), "ghpc_deployment": cty.StringVal("navy"), diff --git a/pkg/config/expression.go b/pkg/config/expression.go index 23cc5b6ff8..f54f790f88 100644 --- a/pkg/config/expression.go +++ b/pkg/config/expression.go @@ -16,6 +16,7 @@ package config import ( "fmt" + "regexp" "strings" "github.com/hashicorp/hcl/v2" @@ -348,3 +349,75 @@ func HasMark[T any](val cty.Value) (T, bool) { } return tgt, found } + +func escapeBlueprintVariables(s string) string { + // Convert \$(not.variable) to $(not.variable) + re := regexp.MustCompile(`\\\$\(`) + return re.ReplaceAllString(s, `$(`) +} + +func escapeLiteralVariables(s string) string { + // Convert \((not.variable)) to ((not.variable)) + re := regexp.MustCompile(`\\\(\(`) + return re.ReplaceAllString(s, `((`) +} + +// TokensForValue is a modification of hclwrite.TokensForValue. +// The only difference in behavior is handling "HCL literal" strings. +func TokensForValue(val cty.Value) hclwrite.Tokens { + if val.IsNull() { // terminate early as Null value can has any type (e.g. String) + return hclwrite.TokensForValue(val) + } + + // We need to handle both cases, until all "expression" users are moved to Expression + if e, is := IsExpressionValue(val); is { + return e.Tokenize() + } + val, _ = val.Unmark() // remove marks, as we don't need them anymore + if s, is := IsYamlExpressionLiteral(val); is { // return it "as is" + return hclwrite.TokensForIdentifier(s) + } + + ty := val.Type() + if ty == cty.String { + s := val.AsString() + // The order of application matters, for an edge cases like: `\$\((` -> `$((` + s = escapeLiteralVariables(s) + s = escapeBlueprintVariables(s) + return hclwrite.TokensForValue(cty.StringVal(s)) + } + + if ty.IsListType() || ty.IsSetType() || ty.IsTupleType() { + tl := []hclwrite.Tokens{} + for it := val.ElementIterator(); it.Next(); { + _, v := it.Element() + tl = append(tl, TokensForValue(v)) + } + return hclwrite.TokensForTuple(tl) + } + if ty.IsMapType() || ty.IsObjectType() { + tl := []hclwrite.ObjectAttrTokens{} + for it := val.ElementIterator(); it.Next(); { + k, v := it.Element() + kt := hclwrite.TokensForIdentifier(k.AsString()) + if !hclsyntax.ValidIdentifier(k.AsString()) { + kt = TokensForValue(k) + } + vt := TokensForValue(v) + tl = append(tl, hclwrite.ObjectAttrTokens{Name: kt, Value: vt}) + } + return hclwrite.TokensForObject(tl) + + } + return hclwrite.TokensForValue(val) // rely on hclwrite implementation +} + +// FunctionCallExpression is a helper to build function call expression. +func FunctionCallExpression(n string, args ...cty.Value) Expression { + ta := make([]hclwrite.Tokens, len(args)) + for i, a := range args { + ta[i] = TokensForValue(a) + } + toks := hclwrite.TokensForFunctionCall(n, ta...) + return MustParseExpression(string(toks.Bytes())) +} diff --git a/pkg/config/expression_test.go b/pkg/config/expression_test.go index dcfcf98785..9205568ebc 100644 --- a/pkg/config/expression_test.go +++ b/pkg/config/expression_test.go @@ -20,6 +20,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/hashicorp/hcl/v2" "github.com/hashicorp/hcl/v2/hclsyntax" + "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" ) @@ -141,3 +142,45 @@ func TestSimpleVarToExpression(t *testing.T) { }) } } + +func TestTokensForValueNoLiteral(t *testing.T) { + val := cty.ObjectVal(map[string]cty.Value{ + "tan": cty.TupleVal([]cty.Value{ + cty.StringVal("biege"), + cty.NullVal(cty.String), + cty.MapVal(map[string]cty.Value{ + "cu": cty.NumberIntVal(29), + "ba": cty.NumberIntVal(56), + })}), + "pony.zebra": cty.NilVal, + }) + want := hclwrite.NewEmptyFile() + want.Body().AppendUnstructuredTokens(hclwrite.TokensForValue(val)) + + got := hclwrite.NewEmptyFile() + got.Body().AppendUnstructuredTokens(TokensForValue(val)) + + if diff := cmp.Diff(string(want.Bytes()), string(got.Bytes())); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestTokensForValueWithLiteral(t *testing.T) { + val := cty.ObjectVal(map[string]cty.Value{ + "tan": cty.TupleVal([]cty.Value{ + cty.StringVal("((var.kilo + 8))"), // HCL literal + MustParseExpression("var.tina + 4").AsValue(), // HclExpression value + })}) + want := ` +{ + tan = [var.kilo + 8, var.tina + 4] +}`[1:] + + gotF := hclwrite.NewEmptyFile() + gotF.Body().AppendUnstructuredTokens(TokensForValue(val)) + got := hclwrite.Format(gotF.Bytes()) // format to normalize whitespace + + if diff := cmp.Diff(want, string(got)); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} diff --git a/pkg/modulewriter/hcl_utils.go b/pkg/modulewriter/hcl_utils.go index f62fb173d1..74e0f33cb2 100644 --- a/pkg/modulewriter/hcl_utils.go +++ b/pkg/modulewriter/hcl_utils.go @@ -17,27 +17,13 @@ package modulewriter import ( "fmt" "path/filepath" - "regexp" "hpc-toolkit/pkg/config" - "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" ) -func escapeBlueprintVariables(s string) string { - // Convert \$(not.variable) to $(not.variable) - re := regexp.MustCompile(`\\\$\(`) - return re.ReplaceAllString(s, `$(`) -} - -func escapeLiteralVariables(s string) string { - // Convert \((not.variable)) to ((not.variable)) - re := regexp.MustCompile(`\\\(\(`) - return re.ReplaceAllString(s, `((`) -} - // WriteHclAttributes writes tfvars/pkvars.hcl files func WriteHclAttributes(vars map[string]cty.Value, dst string) error { if err := createBaseFile(dst); err != nil { @@ -48,7 +34,7 @@ func WriteHclAttributes(vars map[string]cty.Value, dst string) error { hclBody := hclFile.Body() for _, k := range orderKeys(vars) { hclBody.AppendNewline() - toks := TokensForValue(vars[k]) + toks := config.TokensForValue(vars[k]) hclBody.SetAttributeRaw(k, toks) } @@ -59,51 +45,3 @@ func WriteHclAttributes(vars map[string]cty.Value, dst string) error { } return err } - -// TokensForValue is a modification of hclwrite.TokensForValue. -// The only difference in behavior is handling "HCL literal" strings. -func TokensForValue(val cty.Value) hclwrite.Tokens { - if val.IsNull() { // terminate early as Null value can has any type (e.g. String) - return hclwrite.TokensForValue(val) - } - - // We need to handle both cases, until all "expression" users are moved to Expression - if e, is := config.IsExpressionValue(val); is { - return e.Tokenize() - } else if s, is := config.IsYamlExpressionLiteral(val); is { // return it "as is" - return hclwrite.TokensForIdentifier(s) - } - - ty := val.Type() - if ty == cty.String { - s := val.AsString() - // The order of application matters, for an edge cases like: `\$\((` -> `$((` - s = escapeLiteralVariables(s) - s = escapeBlueprintVariables(s) - return hclwrite.TokensForValue(cty.StringVal(s)) - } - - if ty.IsListType() || ty.IsSetType() || ty.IsTupleType() { - tl := []hclwrite.Tokens{} - for it := val.ElementIterator(); it.Next(); { - _, v := it.Element() - tl = append(tl, TokensForValue(v)) - } - return hclwrite.TokensForTuple(tl) - } - if ty.IsMapType() || ty.IsObjectType() { - tl := []hclwrite.ObjectAttrTokens{} - for it := val.ElementIterator(); it.Next(); { - k, v := it.Element() - kt := hclwrite.TokensForIdentifier(k.AsString()) - if !hclsyntax.ValidIdentifier(k.AsString()) { - kt = TokensForValue(k) - } - vt := TokensForValue(v) - tl = append(tl, hclwrite.ObjectAttrTokens{Name: kt, Value: vt}) - } - return hclwrite.TokensForObject(tl) - - } - return hclwrite.TokensForValue(val) // rely on hclwrite implementation -} diff --git a/pkg/modulewriter/hcl_utils_test.go b/pkg/modulewriter/hcl_utils_test.go index 6833f8b75a..570ab28156 100644 --- a/pkg/modulewriter/hcl_utils_test.go +++ b/pkg/modulewriter/hcl_utils_test.go @@ -15,59 +15,15 @@ package modulewriter import ( - "hpc-toolkit/pkg/config" "hpc-toolkit/pkg/modulereader" "os" "testing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" ) -func TestTokensForValueNoLiteral(t *testing.T) { - val := cty.ObjectVal(map[string]cty.Value{ - "tan": cty.TupleVal([]cty.Value{ - cty.StringVal("biege"), - cty.NullVal(cty.String), - cty.MapVal(map[string]cty.Value{ - "cu": cty.NumberIntVal(29), - "ba": cty.NumberIntVal(56), - })}), - "pony.zebra": cty.NilVal, - }) - want := hclwrite.NewEmptyFile() - want.Body().AppendUnstructuredTokens(hclwrite.TokensForValue(val)) - - got := hclwrite.NewEmptyFile() - got.Body().AppendUnstructuredTokens(TokensForValue(val)) - - if diff := cmp.Diff(string(want.Bytes()), string(got.Bytes())); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - -func TestTokensForValueWithLiteral(t *testing.T) { - val := cty.ObjectVal(map[string]cty.Value{ - "tan": cty.TupleVal([]cty.Value{ - cty.StringVal("((var.kilo + 8))"), // HCL literal - config.MustParseExpression("var.tina + 4").AsValue(), // HclExpression value - })}) - want := ` -{ - tan = [var.kilo + 8, var.tina + 4] -}`[1:] - - gotF := hclwrite.NewEmptyFile() - gotF.Body().AppendUnstructuredTokens(TokensForValue(val)) - got := hclwrite.Format(gotF.Bytes()) // format to normalize whitespace - - if diff := cmp.Diff(want, string(got)); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - func TestHclAtttributesRW(t *testing.T) { want := make(map[string]cty.Value) // test that a string that needs escaping when written is read correctly diff --git a/pkg/modulewriter/modulewriter_test.go b/pkg/modulewriter/modulewriter_test.go index 8e75d4eb69..a218373725 100644 --- a/pkg/modulewriter/modulewriter_test.go +++ b/pkg/modulewriter/modulewriter_test.go @@ -523,27 +523,6 @@ func (s *MySuite) TestWriteMain(c *C) { exists, err = stringExistsInFile("a_bucket", mainFilePath) c.Assert(err, IsNil) c.Assert(exists, Equals, true) - - // Test with WrapSettingsWith - testModuleWithWrap := config.Module{ - ID: "test_module_with_wrap", - WrapSettingsWith: map[string][]string{ - "wrappedSetting": {"list(flatten(", "))"}, - }, - Kind: config.TerraformKind, - Source: "modules/network/vpc", - Settings: config.NewDict(map[string]cty.Value{ - "wrappedSetting": cty.TupleVal([]cty.Value{ - cty.StringVal("val1"), - cty.StringVal("val2")}), - }), - } - testModules = append(testModules, testModuleWithWrap) - err = writeMain(testModules, testBackend, testMainDir) - c.Assert(err, IsNil) - exists, err = stringExistsInFile("list(flatten(", mainFilePath) - c.Assert(err, IsNil) - c.Assert(exists, Equals, true) } func (s *MySuite) TestWriteOutputs(c *C) { @@ -743,7 +722,7 @@ func (s *MySuite) TestWritePackerAutoVars(c *C) { func (s *MySuite) TestStringEscape(c *C) { f := func(s string) string { - toks := TokensForValue(cty.StringVal(s)) + toks := config.TokensForValue(cty.StringVal(s)) return string(toks.Bytes()) } // LiteralVariables @@ -813,3 +792,23 @@ func (s *MySuite) TestDeploymentSource(c *C) { c.Check(s, Matches, `^\./modules/y-\w\w\w\w$`) } } + +func (s *MySuite) TestSubstituteIgcReferencesInModule(c *C) { + d := config.Dict{} + d.Set("fold", cty.TupleVal([]cty.Value{ + cty.StringVal("zebra"), + config.MustParseExpression(`module.golf.red + 6 + module.golf.green`).AsValue(), + config.MustParseExpression(`module.tennis.brown`).AsValue(), + })) + m := SubstituteIgcReferencesInModule( + config.Module{Settings: d}, + map[config.Reference]modulereader.VarInfo{ + config.ModuleRef("golf", "red"): {Name: "pink"}, + config.ModuleRef("golf", "green"): {Name: "lime"}, + }) + c.Check(m.Settings.Items(), DeepEquals, map[string]cty.Value{"fold": cty.TupleVal([]cty.Value{ + cty.StringVal("zebra"), + config.MustParseExpression(`var.pink + 6 + var.lime`).AsValue(), + config.MustParseExpression(`module.tennis.brown`).AsValue(), + })}) +} diff --git a/pkg/modulewriter/tfwriter.go b/pkg/modulewriter/tfwriter.go index c9e87ca7c3..cfec510fe5 100644 --- a/pkg/modulewriter/tfwriter.go +++ b/pkg/modulewriter/tfwriter.go @@ -26,7 +26,6 @@ import ( "strings" "github.com/hashicorp/hcl/v2/ext/typeexpr" - "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" "golang.org/x/exp/maps" @@ -231,20 +230,7 @@ func writeMain( // For each Setting for _, setting := range orderKeys(mod.Settings.Items()) { value := mod.Settings.Get(setting) - if wrap, ok := mod.WrapSettingsWith[setting]; ok { - if len(wrap) != 2 { - return fmt.Errorf( - "invalid length of WrapSettingsWith for %s.%s, expected 2 got %d", - mod.ID, setting, len(wrap)) - } - toks, err := tokensForWrapped(wrap[0], value, wrap[1]) - if err != nil { - return fmt.Errorf("failed to process %s.%s: %v", mod.ID, setting, err) - } - moduleBody.SetAttributeRaw(setting, toks) - } else { - moduleBody.SetAttributeRaw(setting, TokensForValue(value)) - } + moduleBody.SetAttributeRaw(setting, config.TokensForValue(value)) } } // Write file @@ -256,30 +242,6 @@ func writeMain( return nil } -func tokensForWrapped(pref string, val cty.Value, suf string) (hclwrite.Tokens, error) { - var toks hclwrite.Tokens - if !val.Type().IsListType() && !val.Type().IsTupleType() { - return toks, fmt.Errorf( - "invalid value for wrapped setting, expected sequence, got %#v", val.Type()) - } - toks = append(toks, simpleTokens(pref)...) - - it, first := val.ElementIterator(), true - for it.Next() { - if !first { - toks = append(toks, &hclwrite.Token{ - Type: hclsyntax.TokenComma, - Bytes: []byte{','}}) - } - _, el := it.Element() - toks = append(toks, TokensForValue(el)...) - first = false - } - toks = append(toks, simpleTokens(suf)...) - - return toks, nil -} - var simpleTokens = hclwrite.TokensForIdentifier func writeProviders(vars map[string]cty.Value, dst string) error { diff --git a/tools/validate_configs/golden_copies/configs/merge_flatten.yaml b/tools/validate_configs/golden_copies/configs/merge_flatten.yaml new file mode 100644 index 0000000000..590510eb41 --- /dev/null +++ b/tools/validate_configs/golden_copies/configs/merge_flatten.yaml @@ -0,0 +1,49 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +blueprint_name: merge_flatten + +vars: + project_id: # + deployment_name: merge_flatten + region: us-east4 + zone: us-east4-c + +deployment_groups: +- group: zero + modules: + - id: network + source: modules/network/vpc + + - id: first-fs + source: modules/file-system/filestore + use: [network] + settings: + local_mount: /first + + - id: second-fs + source: modules/file-system/filestore + use: [network] + settings: + local_mount: /first + + - id: first-vm + source: modules/compute/vm-instance + use: [first-fs] + settings: + labels: {"green": "sleeves"} + + - id: second-vm + source: modules/compute/vm-instance + use: [first-fs, second-fs] diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml index d8fdfa2906..0e64f594dc 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml @@ -54,15 +54,12 @@ deployment_groups: id: homefs use: - network0 - wrapsettingswith: - labels: - - merge( - - ) settings: deployment_name: ((var.deployment_name )) - labels: - - ((var.labels )) - - ghpc_role: file-system + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) local_mount: /home network_id: ((module.network0.network_id )) project_id: ((var.project_id )) @@ -73,15 +70,12 @@ deployment_groups: id: projectsfs use: - network0 - wrapsettingswith: - labels: - - merge( - - ) settings: deployment_name: ((var.deployment_name )) - labels: - - ((var.labels )) - - ghpc_role: file-system + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) local_mount: /projects network_id: ((module.network0.network_id )) project_id: ((var.project_id )) @@ -90,19 +84,16 @@ deployment_groups: - source: modules/scripts/startup-script kind: terraform id: script - wrapsettingswith: - labels: - - merge( - - ) outputs: - name: startup_script description: Automatically-generated output exported for use by later deployment groups sensitive: true settings: deployment_name: ((var.deployment_name )) - labels: - - ((var.labels )) - - ghpc_role: scripts + labels: |- + ((merge(var.labels, { + ghpc_role = "scripts" + }) )) project_id: ((var.project_id )) region: ((var.region )) runners: diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml index d8cee48918..0ac4892f70 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml @@ -59,15 +59,12 @@ deployment_groups: id: homefs use: - network0 - wrapsettingswith: - labels: - - merge( - - ) settings: deployment_name: ((var.deployment_name )) - labels: - - ((var.labels )) - - ghpc_role: file-system + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) local_mount: /home name: ((module.network0.subnetwork_name)) network_id: ((module.network0.network_id)) diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/DO_NOT_MODIFY_THIS_DIRECTORY b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/DO_NOT_MODIFY_THIS_DIRECTORY new file mode 100644 index 0000000000..1613c718b5 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/DO_NOT_MODIFY_THIS_DIRECTORY @@ -0,0 +1 @@ +Files in this directory are managed by ghpc. Do not modify them manually! diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml new file mode 100644 index 0000000000..9228ff8a0c --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml @@ -0,0 +1,113 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +blueprint_name: merge_flatten +ghpc_version: golden +validators: + - validator: test_project_exists + skip: true + - validator: test_apis_enabled + skip: true + - validator: test_region_exists + skip: true + - validator: test_zone_exists + skip: true + - validator: test_zone_in_region + skip: true + - validator: test_module_not_used + - validator: test_deployment_variable_not_used +vars: + deployment_name: golden_copy_deployment + labels: + ghpc_blueprint: merge_flatten + ghpc_deployment: golden_copy_deployment + project_id: invalid-project + region: us-east4 + zone: us-east4-c +deployment_groups: + - group: zero + modules: + - source: modules/network/vpc + kind: terraform + id: network + settings: + deployment_name: ((var.deployment_name )) + project_id: ((var.project_id )) + region: ((var.region )) + - source: modules/file-system/filestore + kind: terraform + id: first-fs + use: + - network + settings: + deployment_name: ((var.deployment_name )) + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) + local_mount: /first + network_id: ((module.network.network_id )) + project_id: ((var.project_id )) + region: ((var.region )) + zone: ((var.zone )) + - source: modules/file-system/filestore + kind: terraform + id: second-fs + use: + - network + settings: + deployment_name: ((var.deployment_name )) + labels: |- + ((merge(var.labels, { + ghpc_role = "file-system" + }) )) + local_mount: /first + network_id: ((module.network.network_id )) + project_id: ((var.project_id )) + region: ((var.region )) + zone: ((var.zone )) + - source: modules/compute/vm-instance + kind: terraform + id: first-vm + use: + - first-fs + settings: + deployment_name: ((var.deployment_name )) + labels: |- + ((merge(var.labels, { + ghpc_role = "compute" + }, { + green = "sleeves" + }) )) + network_storage: ((flatten([module.first-fs.network_storage]) )) + project_id: ((var.project_id )) + region: ((var.region )) + zone: ((var.zone )) + - source: modules/compute/vm-instance + kind: terraform + id: second-vm + use: + - first-fs + - second-fs + settings: + deployment_name: ((var.deployment_name )) + labels: |- + ((merge(var.labels, { + ghpc_role = "compute" + }) )) + network_storage: ((flatten([module.second-fs.network_storage, flatten([module.first-fs.network_storage])]) )) + project_id: ((var.project_id )) + region: ((var.region )) + zone: ((var.zone )) + kind: terraform diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/instructions.txt b/tools/validate_configs/golden_copies/expectations/merge_flatten/instructions.txt new file mode 100644 index 0000000000..119d4c7b20 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/instructions.txt @@ -0,0 +1,23 @@ +Advanced Deployment Instructions +================================ + +Terraform group 'zero' was successfully created in directory golden_copy_deployment/zero +To deploy, run the following commands: + +terraform -chdir=golden_copy_deployment/zero init +terraform -chdir=golden_copy_deployment/zero validate +terraform -chdir=golden_copy_deployment/zero apply + +Destroying infrastructure when no longer needed +=============================================== + +Automated +--------- + +./ghpc destroy golden_copy_deployment + +Advanced / Manual +----------------- +Infrastructure should be destroyed in reverse order of creation: + +terraform -chdir=golden_copy_deployment/zero destroy diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/main.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/main.tf new file mode 100644 index 0000000000..97498d30bb --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/main.tf @@ -0,0 +1,74 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module "network" { + source = "./modules/embedded/modules/network/vpc" + deployment_name = var.deployment_name + project_id = var.project_id + region = var.region +} + +module "first-fs" { + source = "./modules/embedded/modules/file-system/filestore" + deployment_name = var.deployment_name + labels = merge(var.labels, { + ghpc_role = "file-system" + }) + local_mount = "/first" + network_id = module.network.network_id + project_id = var.project_id + region = var.region + zone = var.zone +} + +module "second-fs" { + source = "./modules/embedded/modules/file-system/filestore" + deployment_name = var.deployment_name + labels = merge(var.labels, { + ghpc_role = "file-system" + }) + local_mount = "/first" + network_id = module.network.network_id + project_id = var.project_id + region = var.region + zone = var.zone +} + +module "first-vm" { + source = "./modules/embedded/modules/compute/vm-instance" + deployment_name = var.deployment_name + labels = merge(var.labels, { + ghpc_role = "compute" + }, { + green = "sleeves" + }) + network_storage = flatten([module.first-fs.network_storage]) + project_id = var.project_id + region = var.region + zone = var.zone +} + +module "second-vm" { + source = "./modules/embedded/modules/compute/vm-instance" + deployment_name = var.deployment_name + labels = merge(var.labels, { + ghpc_role = "compute" + }) + network_storage = flatten([module.second-fs.network_storage, flatten([module.first-fs.network_storage])]) + project_id = var.project_id + region = var.region + zone = var.zone +} diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/providers.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/providers.tf new file mode 100644 index 0000000000..ec0dc80b57 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/providers.tf @@ -0,0 +1,27 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +provider "google" { + project = var.project_id + zone = var.zone + region = var.region +} + +provider "google-beta" { + project = var.project_id + zone = var.zone + region = var.region +} diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/terraform.tfvars b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/terraform.tfvars new file mode 100644 index 0000000000..d9130ce006 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/terraform.tfvars @@ -0,0 +1,28 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +deployment_name = "golden_copy_deployment" + +labels = { + ghpc_blueprint = "merge_flatten" + ghpc_deployment = "golden_copy_deployment" +} + +project_id = "invalid-project" + +region = "us-east4" + +zone = "us-east4-c" diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/variables.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/variables.tf new file mode 100644 index 0000000000..bed06d89e6 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/variables.tf @@ -0,0 +1,40 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "deployment_name" { + description = "Toolkit deployment variable: deployment_name" + type = string +} + +variable "labels" { + description = "Toolkit deployment variable: labels" + type = any +} + +variable "project_id" { + description = "Toolkit deployment variable: project_id" + type = string +} + +variable "region" { + description = "Toolkit deployment variable: region" + type = string +} + +variable "zone" { + description = "Toolkit deployment variable: zone" + type = string +} diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf new file mode 100644 index 0000000000..d1bb72b8b4 --- /dev/null +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf @@ -0,0 +1,30 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +terraform { + required_version = ">= 1.2" + + required_providers { + google = { + source = "hashicorp/google" + version = "~> 4.69.1" + } + google-beta = { + source = "hashicorp/google-beta" + version = "~> 4.69.1" + } + } +} From a49f794dfc108e4f7c21394db753e10253f9db5c Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 20 Jun 2023 14:26:01 -0700 Subject: [PATCH 51/92] Print advanced instructions after `ghpc deploy` (#1463) * Print advanced instructions after `ghpc deploy`; * Change `WriteDeployment` to take `deploymentDirectory`; * Improve test coverage by reducing number of lines. --- cmd/create.go | 29 ++++++++++++++--------- cmd/deploy.go | 25 +++++++------------- pkg/modulewriter/modulewriter.go | 26 ++++++--------------- pkg/modulewriter/modulewriter_test.go | 33 ++++++++++----------------- 4 files changed, 45 insertions(+), 68 deletions(-) diff --git a/cmd/create.go b/cmd/create.go index 530d285779..89591bd9c6 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -18,12 +18,11 @@ limitations under the License. package cmd import ( - "errors" "fmt" "hpc-toolkit/pkg/config" "hpc-toolkit/pkg/modulewriter" "log" - "os" + "path/filepath" "strings" "github.com/spf13/cobra" @@ -77,15 +76,23 @@ var ( func runCreateCmd(cmd *cobra.Command, args []string) { dc := expandOrDie(args[0]) - if err := modulewriter.WriteDeployment(dc, outputDir, overwriteDeployment); err != nil { - var target *modulewriter.OverwriteDeniedError - if errors.As(err, &target) { - fmt.Printf("\n%s\n", err.Error()) - os.Exit(1) - } else { - log.Fatal(err) - } - } + deplName, err := dc.Config.DeploymentName() + cobra.CheckErr(err) + deplDir := filepath.Join(outputDir, deplName) + cobra.CheckErr(modulewriter.WriteDeployment(dc, deplDir, overwriteDeployment)) + + fmt.Println("To deploy your infrastructure please run:") + fmt.Println() + fmt.Printf("./ghpc deploy %s\n", deplDir) + fmt.Println() + printAdvancedInstructionsMessage(deplDir) +} + +func printAdvancedInstructionsMessage(deplDir string) { + fmt.Println("Find instructions for cleanly destroying infrastructure and advanced manual") + fmt.Println("deployment instructions at:") + fmt.Println() + fmt.Printf("%s\n", modulewriter.InstructionsPath(deplDir)) } func expandOrDie(path string) config.DeploymentConfig { diff --git a/cmd/deploy.go b/cmd/deploy.go index 635e4291dd..fe292ebbd6 100644 --- a/cmd/deploy.go +++ b/cmd/deploy.go @@ -48,7 +48,7 @@ var ( Args: cobra.MatchAll(cobra.ExactArgs(1), checkDir), ValidArgsFunction: matchDirs, PreRunE: parseDeployArgs, - RunE: runDeployCmd, + Run: runDeployCmd, SilenceUsage: true, } ) @@ -72,22 +72,15 @@ func getApplyBehavior(autoApprove bool) shell.ApplyBehavior { return shell.PromptBeforeApply } -func runDeployCmd(cmd *cobra.Command, args []string) error { +func runDeployCmd(cmd *cobra.Command, args []string) { expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) dc, err := config.NewDeploymentConfig(expandedBlueprintFile) - if err != nil { - return err - } - - if err := shell.ValidateDeploymentDirectory(dc.Config.DeploymentGroups, deploymentRoot); err != nil { - return err - } + cobra.CheckErr(err) + cobra.CheckErr(shell.ValidateDeploymentDirectory(dc.Config.DeploymentGroups, deploymentRoot)) for _, group := range dc.Config.DeploymentGroups { groupDir := filepath.Join(deploymentRoot, string(group.Name)) - if err = shell.ImportInputs(groupDir, artifactsDir, expandedBlueprintFile); err != nil { - return err - } + cobra.CheckErr(shell.ImportInputs(groupDir, artifactsDir, expandedBlueprintFile)) var err error switch group.Kind { @@ -100,12 +93,10 @@ func runDeployCmd(cmd *cobra.Command, args []string) error { default: err = fmt.Errorf("group %s is an unsupported kind %s", groupDir, group.Kind.String()) } - if err != nil { - return err - } - + cobra.CheckErr(err) } - return nil + fmt.Println("\n###############################") + printAdvancedInstructionsMessage(deploymentRoot) } func deployPackerGroup(moduleDir string) error { diff --git a/pkg/modulewriter/modulewriter.go b/pkg/modulewriter/modulewriter.go index 45dc95cdd2..3630f389ea 100644 --- a/pkg/modulewriter/modulewriter.go +++ b/pkg/modulewriter/modulewriter.go @@ -78,13 +78,7 @@ func factory(kind string) ModuleWriter { // WriteDeployment writes a deployment directory using modules defined the // environment blueprint. -func WriteDeployment(dc config.DeploymentConfig, outputDir string, overwriteFlag bool) error { - deploymentName, err := dc.Config.DeploymentName() - if err != nil { - return err - } - deploymentDir := filepath.Join(outputDir, deploymentName) - +func WriteDeployment(dc config.DeploymentConfig, deploymentDir string, overwriteFlag bool) error { overwrite := isOverwriteAllowed(deploymentDir, &dc.Config, overwriteFlag) if err := prepDepDir(deploymentDir, overwrite); err != nil { return err @@ -98,8 +92,7 @@ func WriteDeployment(dc config.DeploymentConfig, outputDir string, overwriteFlag return err } - advancedDeployInstructions := filepath.Join(deploymentDir, "instructions.txt") - f, err := os.Create(advancedDeployInstructions) + f, err := os.Create(InstructionsPath(deploymentDir)) if err != nil { return err } @@ -133,19 +126,14 @@ func WriteDeployment(dc config.DeploymentConfig, outputDir string, overwriteFlag } } } - - fmt.Println("To deploy your infrastructure please run:") - fmt.Println() - fmt.Printf("./ghpc deploy %s\n", deploymentDir) - fmt.Println() - fmt.Println("Find instructions for cleanly destroying infrastructure and advanced manual") - fmt.Println("deployment instructions at:") - fmt.Println() - fmt.Printf("%s\n", f.Name()) - return nil } +// InstructionsPath returns the path to the instructions file for a deployment +func InstructionsPath(deploymentDir string) string { + return filepath.Join(deploymentDir, "instructions.txt") +} + func createGroupDirs(deploymentPath string, deploymentGroups *[]config.DeploymentGroup) error { for _, grp := range *deploymentGroups { groupPath := filepath.Join(deploymentPath, string(grp.Name)) diff --git a/pkg/modulewriter/modulewriter_test.go b/pkg/modulewriter/modulewriter_test.go index a218373725..80c79617ad 100644 --- a/pkg/modulewriter/modulewriter_test.go +++ b/pkg/modulewriter/modulewriter_test.go @@ -165,25 +165,25 @@ func (s *MySuite) TestPrepDepDir_OverwriteRealDep(c *C) { // Test with a real deployment previously written testDC := getDeploymentConfigForTest() testDC.Config.Vars.Set("deployment_name", cty.StringVal("test_prep_dir")) - realDepDir := filepath.Join(testDir, "test_prep_dir") + depDir := filepath.Join(testDir, "test_prep_dir") // writes a full deployment w/ actual resource groups - WriteDeployment(testDC, testDir, false /* overwrite */) + WriteDeployment(testDC, depDir, false /* overwrite */) // confirm existence of resource groups (beyond .ghpc dir) - files, _ := ioutil.ReadDir(realDepDir) + files, _ := ioutil.ReadDir(depDir) c.Check(len(files) > 1, Equals, true) - err := prepDepDir(realDepDir, true /* overwrite */) + err := prepDepDir(depDir, true /* overwrite */) c.Check(err, IsNil) - c.Check(isDeploymentDirPrepped(realDepDir), IsNil) + c.Check(isDeploymentDirPrepped(depDir), IsNil) // Check prev resource groups were moved - prevModuleDir := filepath.Join(testDir, "test_prep_dir", HiddenGhpcDirName, prevDeploymentGroupDirName) + prevModuleDir := filepath.Join(depDir, HiddenGhpcDirName, prevDeploymentGroupDirName) files1, _ := ioutil.ReadDir(prevModuleDir) c.Check(len(files1) > 0, Equals, true) - files2, _ := ioutil.ReadDir(realDepDir) + files2, _ := ioutil.ReadDir(depDir) c.Check(len(files2), Equals, 3) // .ghpc, .gitignore, and instructions file } @@ -236,16 +236,16 @@ func (s *MySuite) TestWriteDeployment(c *C) { afero.WriteFile(aferoFS, "community/modules/green/lime/main.tf", []byte("lime"), 0644) sourcereader.ModuleFS = afero.NewIOFS(aferoFS) - testDC := getDeploymentConfigForTest() + dc := getDeploymentConfigForTest() + dir := filepath.Join(testDir, "test_write_deployment") - testDC.Config.Vars.Set("deployment_name", cty.StringVal("test_write_deployment")) - err := WriteDeployment(testDC, testDir, false /* overwriteFlag */) + err := WriteDeployment(dc, dir, false /* overwriteFlag */) c.Check(err, IsNil) // Overwriting the deployment fails - err = WriteDeployment(testDC, testDir, false /* overwriteFlag */) + err = WriteDeployment(dc, dir, false /* overwriteFlag */) c.Check(err, NotNil) // Overwriting the deployment succeeds with flag - err = WriteDeployment(testDC, testDir, true /* overwriteFlag */) + err = WriteDeployment(dc, dir, true /* overwriteFlag */) c.Check(err, IsNil) } @@ -307,15 +307,6 @@ func (s *MySuite) TestCreateGroupDirs(c *C) { c.Check(err, IsNil) } -func (s *MySuite) TestWriteDeployment_BadDeploymentName(c *C) { - testDC := getDeploymentConfigForTest() - var e *config.InputValueError - - testDC.Config.Vars.Set("deployment_name", cty.NumberIntVal(100)) - err := WriteDeployment(testDC, testDir, false /* overwriteFlag */) - c.Check(errors.As(err, &e), Equals, true) -} - // tfwriter.go func (s *MySuite) TestRestoreTfState(c *C) { // set up dir structure From 23c3afacf0243d38089cfcc6f874b7c6876f8fe5 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 20 Jun 2023 14:26:21 -0700 Subject: [PATCH 52/92] Add `terraform_backend_defaults` section to some examples (#1469) --- examples/hpc-enterprise-slurm.yaml | 8 ++++++++ examples/ml-slurm.yaml | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/examples/hpc-enterprise-slurm.yaml b/examples/hpc-enterprise-slurm.yaml index a24871540a..09e112c06e 100644 --- a/examples/hpc-enterprise-slurm.yaml +++ b/examples/hpc-enterprise-slurm.yaml @@ -34,6 +34,14 @@ vars: # Note that setting this option requires additional dependencies to be installed locally. enable_cleanup_compute: true +# Recommended to use GCS backend for Terraform state +# See https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/examples#optional-setting-up-a-remote-terraform-state +# +# terraform_backend_defaults: +# type: gcs +# configuration: +# bucket: <> + # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md diff --git a/examples/ml-slurm.yaml b/examples/ml-slurm.yaml index 45e420494b..f2dfe6ca9a 100644 --- a/examples/ml-slurm.yaml +++ b/examples/ml-slurm.yaml @@ -27,6 +27,14 @@ vars: new_image_family: ml-slurm disk_size_gb: 200 +# Recommended to use GCS backend for Terraform state +# See https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/examples#optional-setting-up-a-remote-terraform-state +# +# terraform_backend_defaults: +# type: gcs +# configuration: +# bucket: <> + deployment_groups: - group: primary modules: From 45d5451a94b6f79a509923ac78713d60446f7fdf Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 20 Jun 2023 15:07:37 -0700 Subject: [PATCH 53/92] Use consistent order in "product of module use" mark. (#1484) --- pkg/config/config.go | 2 ++ pkg/config/expand_test.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 7d5adbd89b..a1875d26e3 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -22,6 +22,7 @@ import ( "log" "os" "regexp" + "sort" "strings" "github.com/pkg/errors" @@ -752,6 +753,7 @@ func AsProductOfModuleUse(v cty.Value, mods ...ModuleID) cty.Value { for i, m := range mods { s[i] = string(m) } + sort.Strings(s) return v.Mark(productOfModuleUseMark{strings.Join(s, ",")}) } diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 1d64fba8a7..319c592563 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -71,7 +71,7 @@ func (s *MySuite) TestAddListValue(c *C) { mod.addListValue(setting, second) c.Check(mod.Settings.Get(setting), DeepEquals, - AsProductOfModuleUse(MustParseExpression(`flatten(["value2", flatten(["value1"])])`).AsValue(), "mod2", "mod1")) + AsProductOfModuleUse(MustParseExpression(`flatten(["value2", flatten(["value1"])])`).AsValue(), "mod1", "mod2")) } func (s *MySuite) TestUseModule(c *C) { From e065a42b01f532e62a404f7fc4d90ae73ca52095 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 20 Jun 2023 17:10:46 -0500 Subject: [PATCH 54/92] Export DeploymentSource func --- pkg/modulewriter/modulewriter.go | 6 +++--- pkg/modulewriter/modulewriter_test.go | 14 +++++++------- pkg/modulewriter/packerwriter.go | 2 +- pkg/modulewriter/tfwriter.go | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/modulewriter/modulewriter.go b/pkg/modulewriter/modulewriter.go index 3630f389ea..ff905dbb25 100644 --- a/pkg/modulewriter/modulewriter.go +++ b/pkg/modulewriter/modulewriter.go @@ -148,7 +148,7 @@ func createGroupDirs(deploymentPath string, deploymentGroups *[]config.Deploymen return nil } -// Get module source within deployment group +// DeploymentSource returns module source within deployment group // Rules are following: // - git source // => keep the same source @@ -158,7 +158,7 @@ func createGroupDirs(deploymentPath string, deploymentGroups *[]config.Deploymen // => ./modules/embedded/ // - other // => ./modules/- -func deploymentSource(mod config.Module) (string, error) { +func DeploymentSource(mod config.Module) (string, error) { if sourcereader.IsGitPath(mod.Source) && mod.Kind == config.TerraformKind { return mod.Source, nil } @@ -224,7 +224,7 @@ func copySource(deploymentPath string, deploymentGroups *[]config.DeploymentGrou } /* Copy source files */ - ds, err := deploymentSource(*mod) + ds, err := DeploymentSource(*mod) if err != nil { return err } diff --git a/pkg/modulewriter/modulewriter_test.go b/pkg/modulewriter/modulewriter_test.go index 80c79617ad..8a28c7f15d 100644 --- a/pkg/modulewriter/modulewriter_test.go +++ b/pkg/modulewriter/modulewriter_test.go @@ -742,43 +742,43 @@ func TestMain(m *testing.M) { func (s *MySuite) TestDeploymentSource(c *C) { { // git m := config.Module{Kind: config.TerraformKind, Source: "github.com/x/y.git"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Equals, "github.com/x/y.git") } { // packer m := config.Module{Kind: config.PackerKind, Source: "modules/packer/custom-image", ID: "custom-image"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Equals, "custom-image") } { // embedded core m := config.Module{Kind: config.TerraformKind, Source: "modules/x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Equals, "./modules/embedded/modules/x/y") } { // embedded community m := config.Module{Kind: config.TerraformKind, Source: "community/modules/x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Equals, "./modules/embedded/community/modules/x/y") } { // local rel in repo m := config.Module{Kind: config.TerraformKind, Source: "./modules/x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Matches, `^\./modules/y-\w\w\w\w$`) } { // local rel m := config.Module{Kind: config.TerraformKind, Source: "./../../../../x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Matches, `^\./modules/y-\w\w\w\w$`) } { // local abs m := config.Module{Kind: config.TerraformKind, Source: "/tmp/x/y"} - s, err := deploymentSource(m) + s, err := DeploymentSource(m) c.Check(err, IsNil) c.Check(s, Matches, `^\./modules/y-\w\w\w\w$`) } diff --git a/pkg/modulewriter/packerwriter.go b/pkg/modulewriter/packerwriter.go index 1bb90c3fbf..bf347cd63e 100644 --- a/pkg/modulewriter/packerwriter.go +++ b/pkg/modulewriter/packerwriter.go @@ -93,7 +93,7 @@ func (w PackerWriter) writeDeploymentGroup( return err } - ds, err := deploymentSource(mod) + ds, err := DeploymentSource(mod) if err != nil { return err } diff --git a/pkg/modulewriter/tfwriter.go b/pkg/modulewriter/tfwriter.go index cfec510fe5..c9161e9e46 100644 --- a/pkg/modulewriter/tfwriter.go +++ b/pkg/modulewriter/tfwriter.go @@ -221,7 +221,7 @@ func writeMain( moduleBody := moduleBlock.Body() // Add source attribute - ds, err := deploymentSource(mod) + ds, err := DeploymentSource(mod) if err != nil { return err } From fe691a199d2bf97dd7d6ff1b8d537da2dd15dab1 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 20 Jun 2023 17:10:47 -0500 Subject: [PATCH 55/92] Facilitate setting accelerator_type in Packer module --- modules/packer/custom-image/image.pkr.hcl | 4 +++- .../expectations/igc_pkr/one/image/image.pkr.hcl | 4 +++- .../expectations/text_escape/zero/lime/image.pkr.hcl | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/modules/packer/custom-image/image.pkr.hcl b/modules/packer/custom-image/image.pkr.hcl index 9098d32b05..2ddd6bad1d 100644 --- a/modules/packer/custom-image/image.pkr.hcl +++ b/modules/packer/custom-image/image.pkr.hcl @@ -70,6 +70,8 @@ locals { : local.on_host_maintenance_default ) + accelerator_type = var.accelerator_type == null ? null : "projects/${var.project_id}/zones/${var.zone}/acceleratorTypes/${var.accelerator_type}" + winrm_username = local.communicator == "winrm" ? "packer_user" : null winrm_insecure = local.communicator == "winrm" ? true : null winrm_use_ssl = local.communicator == "winrm" ? true : null @@ -82,7 +84,7 @@ source "googlecompute" "toolkit_image" { image_family = local.image_family image_labels = var.labels machine_type = var.machine_type - accelerator_type = var.accelerator_type + accelerator_type = local.accelerator_type accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl index 9098d32b05..2ddd6bad1d 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl @@ -70,6 +70,8 @@ locals { : local.on_host_maintenance_default ) + accelerator_type = var.accelerator_type == null ? null : "projects/${var.project_id}/zones/${var.zone}/acceleratorTypes/${var.accelerator_type}" + winrm_username = local.communicator == "winrm" ? "packer_user" : null winrm_insecure = local.communicator == "winrm" ? true : null winrm_use_ssl = local.communicator == "winrm" ? true : null @@ -82,7 +84,7 @@ source "googlecompute" "toolkit_image" { image_family = local.image_family image_labels = var.labels machine_type = var.machine_type - accelerator_type = var.accelerator_type + accelerator_type = local.accelerator_type accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl index 9098d32b05..2ddd6bad1d 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl @@ -70,6 +70,8 @@ locals { : local.on_host_maintenance_default ) + accelerator_type = var.accelerator_type == null ? null : "projects/${var.project_id}/zones/${var.zone}/acceleratorTypes/${var.accelerator_type}" + winrm_username = local.communicator == "winrm" ? "packer_user" : null winrm_insecure = local.communicator == "winrm" ? true : null winrm_use_ssl = local.communicator == "winrm" ? true : null @@ -82,7 +84,7 @@ source "googlecompute" "toolkit_image" { image_family = local.image_family image_labels = var.labels machine_type = var.machine_type - accelerator_type = var.accelerator_type + accelerator_type = local.accelerator_type accelerator_count = var.accelerator_count on_host_maintenance = local.on_host_maintenance disk_size = var.disk_size From 2e2e7050a0625fe470c100069228144348b87c3a Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 20 Jun 2023 17:10:47 -0500 Subject: [PATCH 56/92] Add support for Packer "packages" Treat remote (git-hosted) Packer modules as packages when using Terraform's "//" notation to delinate the root of a package. In practice, the root should be the root of a git repository. --- cmd/deploy.go | 5 +- pkg/modulereader/resreader.go | 17 +++++-- pkg/modulewriter/modulewriter.go | 49 +++++++++++++------ pkg/modulewriter/modulewriter_test.go | 16 +++++- pkg/modulewriter/packerwriter.go | 11 ++--- pkg/shell/packer.go | 44 +++++++++++++++-- pkg/shell/terraform.go | 8 ++- .../expectations/igc_pkr/instructions.txt | 2 +- .../expectations/text_escape/instructions.txt | 2 +- 9 files changed, 120 insertions(+), 34 deletions(-) diff --git a/cmd/deploy.go b/cmd/deploy.go index fe292ebbd6..b06911f4f5 100644 --- a/cmd/deploy.go +++ b/cmd/deploy.go @@ -18,6 +18,7 @@ package cmd import ( "fmt" "hpc-toolkit/pkg/config" + "hpc-toolkit/pkg/modulewriter" "hpc-toolkit/pkg/shell" "log" "path/filepath" @@ -86,7 +87,9 @@ func runDeployCmd(cmd *cobra.Command, args []string) { switch group.Kind { case config.PackerKind: // Packer groups are enforced to have length 1 - moduleDir := filepath.Join(groupDir, string(group.Modules[0].ID)) + subPath, e := modulewriter.DeploymentSource(group.Modules[0]) + cobra.CheckErr(e) + moduleDir := filepath.Join(groupDir, subPath) err = deployPackerGroup(moduleDir) case config.TerraformKind: err = deployTerraformGroup(groupDir) diff --git a/pkg/modulereader/resreader.go b/pkg/modulereader/resreader.go index 80fb4b238e..45ffafdf05 100644 --- a/pkg/modulereader/resreader.go +++ b/pkg/modulereader/resreader.go @@ -25,6 +25,7 @@ import ( "path" "strings" + "github.com/hashicorp/go-getter" "gopkg.in/yaml.v3" ) @@ -135,10 +136,18 @@ func GetModuleInfo(source string, kind string) (ModuleInfo, error) { if err != nil { return ModuleInfo{}, err } - modPath = path.Join(tmpDir, "module") - sourceReader := sourcereader.Factory(source) - if err = sourceReader.GetModule(source, modPath); err != nil { - return ModuleInfo{}, fmt.Errorf("failed to clone git module at %s: %v", source, err) + pkgAddr, subDir := getter.SourceDirSubdir(source) + pkgPath := path.Join(tmpDir, "module") + modPath = path.Join(pkgPath, subDir) + sourceReader := sourcereader.Factory(pkgAddr) + if err = sourceReader.GetModule(pkgAddr, pkgPath); err != nil { + if subDir == "" { + return ModuleInfo{}, err + } + return ModuleInfo{}, + fmt.Errorf("module source %s included \"//\" package syntax; "+ + "the \"//\" should typically be placed at the root of the repository:\n%s", + source, err.Error()) } case sourcereader.IsEmbeddedPath(source) || sourcereader.IsLocalPath(source): diff --git a/pkg/modulewriter/modulewriter.go b/pkg/modulewriter/modulewriter.go index ff905dbb25..60dcef7e32 100644 --- a/pkg/modulewriter/modulewriter.go +++ b/pkg/modulewriter/modulewriter.go @@ -32,6 +32,8 @@ import ( "os" "path" "path/filepath" + + "github.com/hashicorp/go-getter" ) // strings that get re-used throughout this package and others @@ -159,21 +161,26 @@ func createGroupDirs(deploymentPath string, deploymentGroups *[]config.Deploymen // - other // => ./modules/- func DeploymentSource(mod config.Module) (string, error) { - if sourcereader.IsGitPath(mod.Source) && mod.Kind == config.TerraformKind { - return mod.Source, nil + if mod.Kind != config.PackerKind && mod.Kind != config.TerraformKind { + return "", fmt.Errorf("unexpected module kind %#v", mod.Kind) + } + if sourcereader.IsGitPath(mod.Source) { + switch mod.Kind { + case config.TerraformKind: + return mod.Source, nil + case config.PackerKind: + _, subDir := getter.SourceDirSubdir(mod.Source) + return filepath.Join(string(mod.ID), subDir), nil + } } if mod.Kind == config.PackerKind { return string(mod.ID), nil } - if mod.Kind != config.TerraformKind { - return "", fmt.Errorf("unexpected module kind %#v", mod.Kind) - } - if sourcereader.IsEmbeddedPath(mod.Source) { return "./modules/" + filepath.Join("embedded", mod.Source), nil } if !sourcereader.IsLocalPath(mod.Source) { - return "", fmt.Errorf("unuexpected module source %s", mod.Source) + return "", fmt.Errorf("unexpected module source %s", mod.Source) } abs, err := filepath.Abs(mod.Source) @@ -224,17 +231,31 @@ func copySource(deploymentPath string, deploymentGroups *[]config.DeploymentGrou } /* Copy source files */ - ds, err := DeploymentSource(*mod) - if err != nil { - return err + var modPath string + var dst string + if sourcereader.IsGitPath(mod.Source) && mod.Kind == config.PackerKind { + modPath, _ = getter.SourceDirSubdir(mod.Source) + dst = filepath.Join(basePath, string(mod.ID)) + + } else { + modPath = mod.Source + ds, err := DeploymentSource(*mod) + if err != nil { + return err + } + dst = filepath.Join(basePath, ds) } - dst := filepath.Join(basePath, ds) if _, err := os.Stat(dst); err == nil { continue } - reader := sourcereader.Factory(mod.Source) - if err := reader.GetModule(mod.Source, dst); err != nil { - return fmt.Errorf("failed to get module from %s to %s: %v", mod.Source, dst, err) + reader := sourcereader.Factory(modPath) + if err := reader.GetModule(modPath, dst); err != nil { + return fmt.Errorf("failed to get module from %s to %s: %v", modPath, dst, err) + } + // remove .git directory if one exists; we do not want submodule + // git history in deployment directory + if err := os.RemoveAll(filepath.Join(dst, ".git")); err != nil { + return err } } if copyEmbedded { diff --git a/pkg/modulewriter/modulewriter_test.go b/pkg/modulewriter/modulewriter_test.go index 8a28c7f15d..88533a1337 100644 --- a/pkg/modulewriter/modulewriter_test.go +++ b/pkg/modulewriter/modulewriter_test.go @@ -747,10 +747,22 @@ func (s *MySuite) TestDeploymentSource(c *C) { c.Check(s, Equals, "github.com/x/y.git") } { // packer - m := config.Module{Kind: config.PackerKind, Source: "modules/packer/custom-image", ID: "custom-image"} + m := config.Module{Kind: config.PackerKind, Source: "modules/packer/custom-image", ID: "image-id"} s, err := DeploymentSource(m) c.Check(err, IsNil) - c.Check(s, Equals, "custom-image") + c.Check(s, Equals, "image-id") + } + { // remote packer non-package + m := config.Module{Kind: config.PackerKind, Source: "github.com/GoogleCloudPlatform/modules/packer/custom-image", ID: "image-id"} + s, err := DeploymentSource(m) + c.Check(err, IsNil) + c.Check(s, Equals, "image-id") + } + { // remote packer package + m := config.Module{Kind: config.PackerKind, Source: "github.com/GoogleCloudPlatform//modules/packer/custom-image?ref=main", ID: "image-id"} + s, err := DeploymentSource(m) + c.Check(err, IsNil) + c.Check(s, Equals, "image-id/modules/packer/custom-image") } { // embedded core m := config.Module{Kind: config.TerraformKind, Source: "modules/x/y"} diff --git a/pkg/modulewriter/packerwriter.go b/pkg/modulewriter/packerwriter.go index bf347cd63e..7cc3253cc8 100644 --- a/pkg/modulewriter/packerwriter.go +++ b/pkg/modulewriter/packerwriter.go @@ -41,16 +41,15 @@ func (w *PackerWriter) addNumModules(value int) { w.numModules += value } -func printPackerInstructions(w io.Writer, modPath string, modID config.ModuleID, printImportInputs bool) { +func printPackerInstructions(w io.Writer, groupPath string, subPath string, printImportInputs bool) { fmt.Fprintln(w) - fmt.Fprintf(w, "Packer group '%s' was successfully created in directory %s\n", modID, modPath) + fmt.Fprintf(w, "Packer group was successfully created in directory %s\n", groupPath) fmt.Fprintln(w, "To deploy, run the following commands:") fmt.Fprintln(w) - grpPath := filepath.Clean(filepath.Join(modPath, "..")) if printImportInputs { - fmt.Fprintf(w, "ghpc import-inputs %s\n", grpPath) + fmt.Fprintf(w, "ghpc import-inputs %s\n", groupPath) } - fmt.Fprintf(w, "cd %s\n", modPath) + fmt.Fprintf(w, "cd %s\n", filepath.Join(groupPath, subPath)) fmt.Fprintln(w, "packer init .") fmt.Fprintln(w, "packer validate .") fmt.Fprintln(w, "packer build .") @@ -102,7 +101,7 @@ func (w PackerWriter) writeDeploymentGroup( return err } hasIgc := len(pure.Items()) < len(mod.Settings.Items()) - printPackerInstructions(instructionsFile, modPath, mod.ID, hasIgc) + printPackerInstructions(instructionsFile, groupPath, ds, hasIgc) } return nil diff --git a/pkg/shell/packer.go b/pkg/shell/packer.go index 74f82baefa..0d51fe3baa 100644 --- a/pkg/shell/packer.go +++ b/pkg/shell/packer.go @@ -17,8 +17,11 @@ package shell import ( + "bytes" + "io" "os" "os/exec" + "sync" ) // ConfigurePacker errors if packer is not in the user PATH @@ -38,12 +41,47 @@ func ConfigurePacker() error { func ExecPackerCmd(workingDir string, printToScreen bool, args ...string) error { cmd := exec.Command("packer", args...) cmd.Dir = workingDir + stdout, err := cmd.StdoutPipe() + if err != nil { + return err + } + stderr, err := cmd.StderrPipe() + if err != nil { + return err + } + + if err := cmd.Start(); err != nil { + return err + } + + // capture stdout/stderr; print to screen in real-time or upon error + var wg sync.WaitGroup + var outBuf io.ReadWriter + var errBuf io.ReadWriter if printToScreen { - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr + outBuf = os.Stdout + errBuf = os.Stderr + } else { + outBuf = bytes.NewBuffer([]byte{}) + errBuf = bytes.NewBuffer([]byte{}) } + wg.Add(1) + go func() { + defer wg.Done() + io.Copy(outBuf, stdout) + }() + wg.Add(1) + go func() { + defer wg.Done() + io.Copy(errBuf, stderr) + }() + wg.Wait() - if err := cmd.Run(); err != nil { + if err := cmd.Wait(); err != nil { + if !printToScreen { + io.Copy(os.Stdout, outBuf) + io.Copy(os.Stderr, errBuf) + } return err } return nil diff --git a/pkg/shell/terraform.go b/pkg/shell/terraform.go index f51a16da3b..582a3023a1 100644 --- a/pkg/shell/terraform.go +++ b/pkg/shell/terraform.go @@ -334,8 +334,12 @@ func ImportInputs(deploymentGroupDir string, artifactsDir string, expandedBluepr packerGroup := dc.Config.DeploymentGroups[thisGroupIdx] // Packer groups are enforced to have length 1 packerModule := packerGroup.Modules[0] - moduleID := string(packerModule.ID) - outfile = filepath.Join(deploymentGroupDir, moduleID, fmt.Sprintf("%s_inputs.auto.pkrvars.hcl", moduleID)) + modPath, err := modulewriter.DeploymentSource(packerModule) + if err != nil { + return err + } + outfile = filepath.Join(deploymentGroupDir, modPath, + fmt.Sprintf("%s_inputs.auto.pkrvars.hcl", packerModule.ID)) // evaluate Packer settings that contain intergroup references in the // context of deployment variables and intergroup output values diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/instructions.txt b/tools/validate_configs/golden_copies/expectations/igc_pkr/instructions.txt index 88bc6ba912..dbf1500c89 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/instructions.txt +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/instructions.txt @@ -9,7 +9,7 @@ terraform -chdir=golden_copy_deployment/zero validate terraform -chdir=golden_copy_deployment/zero apply ghpc export-outputs golden_copy_deployment/zero -Packer group 'image' was successfully created in directory golden_copy_deployment/one/image +Packer group was successfully created in directory golden_copy_deployment/one To deploy, run the following commands: ghpc import-inputs golden_copy_deployment/one diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/instructions.txt b/tools/validate_configs/golden_copies/expectations/text_escape/instructions.txt index c3df421bd0..49c401feca 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/instructions.txt +++ b/tools/validate_configs/golden_copies/expectations/text_escape/instructions.txt @@ -1,7 +1,7 @@ Advanced Deployment Instructions ================================ -Packer group 'lime' was successfully created in directory golden_copy_deployment/zero/lime +Packer group was successfully created in directory golden_copy_deployment/zero To deploy, run the following commands: cd golden_copy_deployment/zero/lime From 235bfc08615aa64ea923daf06753bca2c5dc67b6 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Tue, 13 Jun 2023 00:16:06 -0700 Subject: [PATCH 57/92] Add community example of how to use filestore with gke --- community/examples/storage-gke.yaml | 78 +++++++++++++++++++++++++++++ examples/README.md | 20 ++++++++ 2 files changed, 98 insertions(+) create mode 100644 community/examples/storage-gke.yaml diff --git a/community/examples/storage-gke.yaml b/community/examples/storage-gke.yaml new file mode 100644 index 0000000000..bc2f93ba64 --- /dev/null +++ b/community/examples/storage-gke.yaml @@ -0,0 +1,78 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +blueprint_name: storage-gke + +vars: + project_id: ## Set GCP Project ID Here ## + deployment_name: storage-gke-01 + region: us-central1 + zone: us-central1-c + + # Cidr block containing the IP of the machine calling terraform. + # The following line must be updated for this example to work. + authorized_cidr: /32 + +deployment_groups: +- group: primary + modules: + - id: network1 + source: modules/network/vpc + settings: + subnetwork_name: gke-subnet + secondary_ranges: + gke-subnet: + - range_name: pods + ip_cidr_range: 10.4.0.0/14 + - range_name: services + ip_cidr_range: 10.0.32.0/20 + + - id: gke_cluster + source: community/modules/scheduler/gke-cluster + use: [network1] + settings: + enable_filestore_csi: true + enable_private_endpoint: false # Allows for access from authorized public IPs + master_authorized_networks: + - display_name: deployment-machine + cidr_block: $(vars.authorized_cidr) + outputs: [instructions] + + - id: debug_pool + source: community/modules/compute/gke-node-pool + use: [gke_cluster] + settings: + zones: [$(vars.zone)] + machine_type: n2-standard-2 + + - id: sharedfs + source: modules/file-system/filestore + use: [network1] + settings: {local_mount: /shared} + + - id: sharedfs-pv + source: community/modules/file-system/gke-persistent-volume + use: [gke_cluster, sharedfs] + + - id: job-template + source: community/modules/compute/gke-job-template + use: [debug_pool, sharedfs-pv] + settings: + image: bash + command: + - bash + - -c + - echo \$(date) >> /shared/timestamp.log; cat /shared/timestamp.log + node_count: 3 + outputs: [instructions] diff --git a/examples/README.md b/examples/README.md index e91257536a..36a5ebd46c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -30,6 +30,7 @@ md_toc github examples/README.md | sed -e "s/\s-\s/ * /" * [hpc-slurm-local-ssd.yaml](#hpc-slurm-local-ssdyaml--) ![community-badge] ![experimental-badge] * [hpc-gke.yaml](#hpc-gkeyaml--) ![community-badge] ![experimental-badge] * [ml-gke](#ml-gkeyaml--) ![community-badge] ![experimental-badge] + * [storage-gke](#storage-gkeyaml--) ![community-badge] ![experimental-badge] * [htc-slurm.yaml](#htc-slurmyaml--) ![community-badge] ![experimental-badge] * [htc-htcondor.yaml](#htc-htcondoryaml--) ![community-badge] ![experimental-badge] * [tutorial-starccm.yaml](#tutorial-starccmyaml--) ![community-badge] ![experimental-badge] @@ -809,6 +810,25 @@ credentials for the created cluster_ and _submit a job calling `nvidia_smi`_. [ml-gke.yaml]: ../community/examples/ml-gke.yaml [`kubernetes-operations`]: ../community/modules/scripts/kubernetes-operations/README.md +### [storage-gke.yaml] ![community-badge] ![experimental-badge] + +This blueprint shows how to use different storage options with GKE in the toolkit. + +The blueprint contains the following: + +* A K8s Job that uses a Filestore as a shared file system between pods. +* More coming in the future... + +> **Note**: The Kubernetes API server will only allow requests from authorized +> networks. The `gke-persistent-volume` module needs access to the Kubernetes +> API server to create a Persistent Volume and a Persistent Volume Claim. **You +> must use the `authorized_cidr` variable to supply an authorized network which +> contains the IP address of the machine deploying the blueprint, for example +> `--vars authorized_cidr=/32`.** You can use a service like +> [whatismyip.com](https://whatismyip.com) to determine your IP address. + +[storage-gke.yaml]: ../community/examples/storage-gke.yaml + ### [htc-htcondor.yaml] ![community-badge] ![experimental-badge] This blueprint provisions an auto-scaling [HTCondor][htcondor] pool based upon From cfaba456420e0ecd31c82ec7dc5be34f0e207a4b Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 20 Jun 2023 17:42:28 -0700 Subject: [PATCH 58/92] Remove excessive error messages (#1485) --- pkg/config/expand.go | 29 +++++++++-------------------- pkg/config/expand_test.go | 36 ++++++++++++------------------------ 2 files changed, 21 insertions(+), 44 deletions(-) diff --git a/pkg/config/expand.go b/pkg/config/expand.go index aa43488a88..3a45eda936 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -45,31 +45,26 @@ var ( // expand expands variables and strings in the yaml config. Used directly by // ExpandConfig for the create and expand commands. func (dc *DeploymentConfig) expand() error { - if err := dc.expandBackends(); err != nil { - return fmt.Errorf("failed to apply default backend to deployment groups: %v", err) - } - - if err := dc.addDefaultValidators(); err != nil { - return fmt.Errorf("failed to update validators when expanding the config: %v", err) - } + dc.expandBackends() + dc.addDefaultValidators() if err := dc.combineLabels(); err != nil { - return fmt.Errorf("failed to update module labels when expanding the config: %v", err) + return err } if err := dc.applyUseModules(); err != nil { - return fmt.Errorf("failed to apply \"use\" modules when expanding the config: %v", err) + return err } if err := dc.applyGlobalVariables(); err != nil { - return fmt.Errorf("failed to apply deployment variables in modules when expanding the config: %v", err) + return err } dc.Config.populateOutputs() return nil } -func (dc *DeploymentConfig) expandBackends() error { +func (dc *DeploymentConfig) expandBackends() { // 1. DEFAULT: use TerraformBackend configuration (if supplied) in each // resource group // 2. If top-level TerraformBackendDefaults is defined, insert that @@ -99,7 +94,6 @@ func (dc *DeploymentConfig) expandBackends() error { } } } - return nil } func getModuleInputMap(inputs []modulereader.VarInfo) map[string]string { @@ -147,7 +141,7 @@ func useModule( mod *Module, useMod Module, settingsToIgnore []string, -) error { +) { modInputsMap := getModuleInputMap(mod.InfoOrDie().Inputs) for _, useOutput := range useMod.InfoOrDie().Outputs { settingName := useOutput.Name @@ -181,7 +175,6 @@ func useModule( mod.addListValue(settingName, v) } } - return nil } // applyUseModules applies variables from modules listed in the "use" field @@ -194,9 +187,7 @@ func (dc *DeploymentConfig) applyUseModules() error { if err != nil { return err } - if err := useModule(m, *used, settingsInBlueprint); err != nil { - return err - } + useModule(m, *used, settingsInBlueprint) } return nil }) @@ -409,7 +400,7 @@ func hasVariable(str string) bool { // this function adds default validators to the blueprint. // default validators are only added for global variables that exist -func (dc *DeploymentConfig) addDefaultValidators() error { +func (dc *DeploymentConfig) addDefaultValidators() { if dc.Config.Validators == nil { dc.Config.Validators = []validatorConfig{} } @@ -484,8 +475,6 @@ func (dc *DeploymentConfig) addDefaultValidators() error { } dc.Config.Validators = append(dc.Config.Validators, v) } - - return nil } // FindAllIntergroupReferences finds all intergroup references within the group diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 319c592563..87f8508ffa 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -31,13 +31,9 @@ func (s *MySuite) TestExpandBackends(c *C) { dc := getDeploymentConfigForTest() deplName := dc.Config.Vars.Get("deployment_name").AsString() - // Simple test: Does Nothing - err := dc.expandBackends() - c.Assert(err, IsNil) - dc.Config.TerraformBackendDefaults = TerraformBackend{Type: "gcs"} - err = dc.expandBackends() - c.Assert(err, IsNil) + dc.expandBackends() + grp := dc.Config.DeploymentGroups[0] c.Assert(grp.TerraformBackend.Type, Not(Equals), "") gotPrefix := grp.TerraformBackend.Configuration.Get("prefix") @@ -49,8 +45,8 @@ func (s *MySuite) TestExpandBackends(c *C) { Name: "group2", } dc.Config.DeploymentGroups = append(dc.Config.DeploymentGroups, newGroup) - err = dc.expandBackends() - c.Assert(err, IsNil) + dc.expandBackends() + newGrp := dc.Config.DeploymentGroups[1] c.Assert(newGrp.TerraformBackend.Type, Not(Equals), "") gotPrefix = newGrp.TerraformBackend.Configuration.Get("prefix") @@ -92,8 +88,7 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{}) setTestModuleInfo(usedMod, modulereader.ModuleInfo{}) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, usedMod, []string{}) c.Check(mod.Settings, DeepEquals, Dict{}) } @@ -104,8 +99,7 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(usedMod, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, usedMod, []string{}) c.Check(mod.Settings, DeepEquals, Dict{}) } @@ -118,8 +112,7 @@ func (s *MySuite) TestUseModule(c *C) { Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, usedMod, []string{}) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": AsProductOfModuleUse(ref, "UsedModule"), }) @@ -135,8 +128,7 @@ func (s *MySuite) TestUseModule(c *C) { Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{"val1"}) - c.Check(err, IsNil) + useModule(&mod, usedMod, []string{"val1"}) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{"val1": ref}) } @@ -151,8 +143,7 @@ func (s *MySuite) TestUseModule(c *C) { Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, usedMod, []string{}) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": AsProductOfModuleUse(ref, "UsedModule")}) } @@ -165,8 +156,7 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(usedMod, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, usedMod, []string{}) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": AsProductOfModuleUse( MustParseExpression(`flatten([module.UsedModule.val1])`).AsValue(), @@ -184,8 +174,7 @@ func (s *MySuite) TestUseModule(c *C) { Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{}) - c.Check(err, IsNil) + useModule(&mod, usedMod, []string{}) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": AsProductOfModuleUse( MustParseExpression(`flatten([module.UsedModule.val1,[module.UsedModule.val1]])`).AsValue(), @@ -203,8 +192,7 @@ func (s *MySuite) TestUseModule(c *C) { Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - err := useModule(&mod, usedMod, []string{"val1"}) - c.Check(err, IsNil) + useModule(&mod, usedMod, []string{"val1"}) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": cty.TupleVal([]cty.Value{ref})}) } From 19b0e9a68502197318970261d7820b7389f99f4a Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 20 Jun 2023 18:26:19 -0700 Subject: [PATCH 59/92] Add rich error messages with position and snippet (#1448) * Add rich error messages with position and snippet * Add `Path` & `BpError` to attribute errors to the BP parts; * Add `YamlCtx` to contain info for rich error rendering; * Update a few errors to use `BpError`. NOTE: The crude implementation of `Path` is improved in experimental #1347 and can be addressed after/before this PR is landed/considered worthy. ```sh $ make && ./ghpc create tst.yaml **************** building ghpc ************************ Error: deployment_name input error, cause: value was not of type string on line 6, column 22: 6: "deployment_name": [] ``` --- cmd/create.go | 23 +++++- cmd/create_test.go | 29 ++++++++ cmd/deploy.go | 2 +- cmd/destroy.go | 2 +- cmd/export.go | 2 +- cmd/import.go | 2 +- pkg/config/config.go | 82 +++++++++----------- pkg/config/config_test.go | 22 +++--- pkg/config/errors.go | 33 +++++++++ pkg/config/yaml.go | 152 ++++++++++++++++++++++++++++++++++++++ pkg/config/yaml_test.go | 152 ++++++++++++++++++++++++++++++++++++++ pkg/shell/terraform.go | 2 +- 12 files changed, 437 insertions(+), 66 deletions(-) create mode 100644 pkg/config/errors.go create mode 100644 pkg/config/yaml.go create mode 100644 pkg/config/yaml_test.go diff --git a/cmd/create.go b/cmd/create.go index 89591bd9c6..6ca8cf0dd2 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -18,6 +18,7 @@ limitations under the License. package cmd import ( + "errors" "fmt" "hpc-toolkit/pkg/config" "hpc-toolkit/pkg/modulewriter" @@ -96,7 +97,7 @@ func printAdvancedInstructionsMessage(deplDir string) { } func expandOrDie(path string) config.DeploymentConfig { - dc, err := config.NewDeploymentConfig(path) + dc, ctx, err := config.NewDeploymentConfig(path) if err != nil { log.Fatal(err) } @@ -120,12 +121,30 @@ func expandOrDie(path string) config.DeploymentConfig { // Expand the blueprint if err := dc.ExpandConfig(); err != nil { - log.Fatal(err) + log.Fatal(renderError(err, ctx)) } return dc } +func renderError(err error, ctx config.YamlCtx) string { + var be config.BpError + if errors.As(err, &be) { + if pos, ok := ctx.PathToPos[be.Path]; ok { + return renderRichError(be.Err, pos, ctx) + } + } + return err.Error() +} + +func renderRichError(err error, pos config.Pos, ctx config.YamlCtx) string { + return fmt.Sprintf(` +Error: %s +on line %d, column %d: +%d: %s +`, err, pos.Line, pos.Column, pos.Line, ctx.Lines[pos.Line-1]) +} + func setCLIVariables(bp *config.Blueprint, s []string) error { for _, cliVar := range s { arr := strings.SplitN(cliVar, "=", 2) diff --git a/cmd/create_test.go b/cmd/create_test.go index 3c4a0ae6be..ed5fda5a15 100644 --- a/cmd/create_test.go +++ b/cmd/create_test.go @@ -15,6 +15,7 @@ package cmd import ( + "errors" "hpc-toolkit/pkg/config" "github.com/zclconf/go-cty/cty" @@ -128,3 +129,31 @@ func (s *MySuite) TestValidationLevels(c *C) { c.Check(setValidationLevel(&bp, "INVALID"), NotNil) } + +func (s *MySuite) TestRenderError(c *C) { + { // simple + err := errors.New("arbuz") + got := renderError(err, config.YamlCtx{}) + c.Check(got, Equals, "arbuz") + } + { // has pos, but context is missing + pth := config.Path{}.Dot("rainbow").Dot("over") + err := config.BpError{Path: pth, Err: errors.New("arbuz")} + got := renderError(err, config.YamlCtx{}) + c.Check(got, Equals, "rainbow.over: arbuz") + } + { // has pos, has context + pth := config.Path{}.Dot("rainbow").Dot("over") + ctx := config.YamlCtx{ + PathToPos: map[config.Path]config.Pos{ + pth: {Line: 2, Column: 3}}, + Lines: []string{"uno", "dos", "tres"}} + err := config.BpError{Path: pth, Err: errors.New("arbuz")} + got := renderError(err, ctx) + c.Check(got, Equals, ` +Error: arbuz +on line 2, column 3: +2: dos +`) + } +} diff --git a/cmd/deploy.go b/cmd/deploy.go index b06911f4f5..7e6cf544f9 100644 --- a/cmd/deploy.go +++ b/cmd/deploy.go @@ -75,7 +75,7 @@ func getApplyBehavior(autoApprove bool) shell.ApplyBehavior { func runDeployCmd(cmd *cobra.Command, args []string) { expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) cobra.CheckErr(err) cobra.CheckErr(shell.ValidateDeploymentDirectory(dc.Config.DeploymentGroups, deploymentRoot)) diff --git a/cmd/destroy.go b/cmd/destroy.go index ddbfb7660a..fecc042a27 100644 --- a/cmd/destroy.go +++ b/cmd/destroy.go @@ -64,7 +64,7 @@ func parseDestroyArgs(cmd *cobra.Command, args []string) error { func runDestroyCmd(cmd *cobra.Command, args []string) error { expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) if err != nil { return err } diff --git a/cmd/export.go b/cmd/export.go index f5be025c08..4311f342d7 100644 --- a/cmd/export.go +++ b/cmd/export.go @@ -87,7 +87,7 @@ func runExportCmd(cmd *cobra.Command, args []string) error { } expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) if err != nil { return err } diff --git a/cmd/import.go b/cmd/import.go index 274438eebc..cf93fa65a6 100644 --- a/cmd/import.go +++ b/cmd/import.go @@ -51,7 +51,7 @@ func runImportCmd(cmd *cobra.Command, args []string) error { } expandedBlueprintFile := filepath.Join(artifactsDir, expandedBlueprintFilename) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) if err != nil { return err } diff --git a/pkg/config/config.go b/pkg/config/config.go index a1875d26e3..6e1a449ea2 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -20,7 +20,6 @@ import ( "fmt" "io/ioutil" "log" - "os" "regexp" "sort" "strings" @@ -434,39 +433,17 @@ func (bp Blueprint) checkMovedModules() error { } // NewDeploymentConfig is a constructor for DeploymentConfig -func NewDeploymentConfig(configFilename string) (DeploymentConfig, error) { - blueprint, err := importBlueprint(configFilename) +func NewDeploymentConfig(configFilename string) (DeploymentConfig, YamlCtx, error) { + bp, ctx, err := importBlueprint(configFilename) if err != nil { - return DeploymentConfig{}, err + return DeploymentConfig{}, YamlCtx{}, err } - return DeploymentConfig{Config: blueprint}, nil -} - -// ImportBlueprint imports the blueprint configuration provided. -func importBlueprint(blueprintFilename string) (Blueprint, error) { - var blueprint Blueprint - - reader, err := os.Open(blueprintFilename) - if err != nil { - return blueprint, fmt.Errorf("%s, filename=%s: %v", - errorMessages["fileLoadError"], blueprintFilename, err) - } - - decoder := yaml.NewDecoder(reader) - decoder.KnownFields(true) - - if err = decoder.Decode(&blueprint); err != nil { - return blueprint, fmt.Errorf(errorMessages["yamlUnmarshalError"], - blueprintFilename, err) - } - // if the validation level has been explicitly set to an invalid value // in YAML blueprint then silently default to validationError - if !isValidValidationLevel(blueprint.ValidationLevel) { - blueprint.ValidationLevel = ValidationError + if !isValidValidationLevel(bp.ValidationLevel) { + bp.ValidationLevel = ValidationError } - - return blueprint, nil + return DeploymentConfig{Config: bp}, ctx, nil } // ExportBlueprint exports the internal representation of a blueprint config @@ -528,14 +505,20 @@ func checkModulesAndGroups(groups []DeploymentGroup) error { if err := grp.Name.Validate(); err != nil { return err } + pg := Path{"deployment_groups"}.At(ig) if seenGroups[grp.Name] { - return fmt.Errorf("%s: %s used more than once", errorMessages["duplicateGroup"], grp.Name) + return BpError{ + pg.Dot("name"), + fmt.Errorf("%s: %s used more than once", errorMessages["duplicateGroup"], grp.Name)} } seenGroups[grp.Name] = true - for _, mod := range grp.Modules { + for im, mod := range grp.Modules { + pm := pg.Dot("modules").At(im) if seenMod[mod.ID] { - return fmt.Errorf("%s: %s used more than once", errorMessages["duplicateID"], mod.ID) + return BpError{ + pm.Dot("id"), + fmt.Errorf("%s: %s used more than once", errorMessages["duplicateID"], mod.ID)} } seenMod[mod.ID] = true @@ -544,9 +527,11 @@ func checkModulesAndGroups(groups []DeploymentGroup) error { grp.Kind = mod.Kind } if grp.Kind != mod.Kind { - return fmt.Errorf( - "mixing modules of differing kinds in a deployment group is not supported: deployment group %s, got %s and %s", - grp.Name, grp.Kind, mod.Kind) + return BpError{ + pm.Dot("kind"), + fmt.Errorf( + "mixing modules of differing kinds in a deployment group is not supported: deployment group %s, got %s and %s", + grp.Name, grp.Kind, mod.Kind)} } } } @@ -663,7 +648,7 @@ type InputValueError struct { cause string } -func (err *InputValueError) Error() string { +func (err InputValueError) Error() string { return fmt.Sprintf("%v input error, cause: %v", err.inputKey, err.cause) } @@ -687,34 +672,35 @@ func isValidLabelValue(value string) bool { // DeploymentName returns the deployment_name from the config and does approperate checks. func (bp *Blueprint) DeploymentName() (string, error) { if !bp.Vars.Has("deployment_name") { - return "", &InputValueError{ + return "", InputValueError{ inputKey: "deployment_name", cause: errorMessages["varNotFound"], } } + path := Path{"vars.deployment_name"} v := bp.Vars.Get("deployment_name") if v.Type() != cty.String { - return "", &InputValueError{ + return "", BpError{path, InputValueError{ inputKey: "deployment_name", cause: errorMessages["valueNotString"], - } + }} } s := v.AsString() if len(s) == 0 { - return "", &InputValueError{ + return "", BpError{path, InputValueError{ inputKey: "deployment_name", cause: errorMessages["valueEmptyString"], - } + }} } // Check that deployment_name is a valid label if !isValidLabelValue(s) { - return "", &InputValueError{ + return "", BpError{path, InputValueError{ inputKey: "deployment_name", cause: errorMessages["labelValueReqs"], - } + }} } return s, nil @@ -723,19 +709,19 @@ func (bp *Blueprint) DeploymentName() (string, error) { // checkBlueprintName returns an error if blueprint_name does not comply with // requirements for correct GCP label values. func (bp *Blueprint) checkBlueprintName() error { - + p := Path{"blueprint_name"} if len(bp.BlueprintName) == 0 { - return &InputValueError{ + return BpError{p, InputValueError{ inputKey: "blueprint_name", cause: errorMessages["valueEmptyString"], - } + }} } if !isValidLabelValue(bp.BlueprintName) { - return &InputValueError{ + return BpError{p, InputValueError{ inputKey: "blueprint_name", cause: errorMessages["labelValueReqs"], - } + }} } return nil diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index b960c16b29..179f5a31d5 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -398,13 +398,13 @@ func (s *MySuite) TestCheckModulesAndGroups(c *C) { { // Duplicate module name same group g := DeploymentGroup{Name: "ice", Modules: []Module{{ID: "pony"}, {ID: "pony"}}} err := checkModulesAndGroups([]DeploymentGroup{g}) - c.Check(err, ErrorMatches, "module IDs must be unique: pony used more than once") + c.Check(err, ErrorMatches, ".*pony used more than once") } { // Duplicate module name different groups ice := DeploymentGroup{Name: "ice", Modules: []Module{{ID: "pony"}}} fire := DeploymentGroup{Name: "fire", Modules: []Module{{ID: "pony"}}} err := checkModulesAndGroups([]DeploymentGroup{ice, fire}) - c.Check(err, ErrorMatches, "module IDs must be unique: pony used more than once") + c.Check(err, ErrorMatches, ".*pony used more than once") } { // Mixing module kinds g := DeploymentGroup{Name: "ice", Modules: []Module{ @@ -412,7 +412,7 @@ func (s *MySuite) TestCheckModulesAndGroups(c *C) { {ID: "zebra", Kind: TerraformKind}, }} err := checkModulesAndGroups([]DeploymentGroup{g}) - c.Check(err, ErrorMatches, "mixing modules of differing kinds in a deployment group is not supported: deployment group ice, got packer and terraform") + c.Check(err, ErrorMatches, ".*got packer and terraform") } } @@ -516,7 +516,7 @@ func (s *MySuite) TestGetModule(c *C) { func (s *MySuite) TestDeploymentName(c *C) { bp := Blueprint{} - var e *InputValueError + var e InputValueError // Is deployment_name a valid string? bp.Vars.Set("deployment_name", cty.StringVal("yellow")) @@ -563,7 +563,7 @@ func (s *MySuite) TestDeploymentName(c *C) { func (s *MySuite) TestCheckBlueprintName(c *C) { dc := getDeploymentConfigForTest() - var e *InputValueError + var e InputValueError // Is blueprint_name a valid string? err := dc.Config.checkBlueprintName() @@ -599,21 +599,21 @@ func (s *MySuite) TestNewBlueprint(c *C) { dc := getDeploymentConfigForTest() outFile := filepath.Join(tmpTestDir, "out_TestNewBlueprint.yaml") c.Assert(dc.ExportBlueprint(outFile), IsNil) - newDC, err := NewDeploymentConfig(outFile) + newDC, _, err := NewDeploymentConfig(outFile) c.Assert(err, IsNil) c.Assert(dc.Config, DeepEquals, newDC.Config) } func (s *MySuite) TestImportBlueprint(c *C) { - obtainedBlueprint, err := importBlueprint(simpleYamlFilename) + bp, _, err := importBlueprint(simpleYamlFilename) c.Assert(err, IsNil) - c.Assert(obtainedBlueprint.BlueprintName, + c.Assert(bp.BlueprintName, Equals, expectedSimpleBlueprint.BlueprintName) c.Assert( - obtainedBlueprint.Vars.Get("labels"), + bp.Vars.Get("labels"), DeepEquals, expectedSimpleBlueprint.Vars.Get("labels")) - c.Assert(obtainedBlueprint.DeploymentGroups[0].Modules[0].ID, + c.Assert(bp.DeploymentGroups[0].Modules[0].ID, Equals, expectedSimpleBlueprint.DeploymentGroups[0].Modules[0].ID) } @@ -723,7 +723,7 @@ dragon: "Lews Therin Telamon"`) file.Close() // should fail on strict unmarshal as field does not match schema - _, err := importBlueprint(filename) + _, _, err := importBlueprint(filename) c.Check(err, NotNil) } diff --git a/pkg/config/errors.go b/pkg/config/errors.go new file mode 100644 index 0000000000..dd9fba7cff --- /dev/null +++ b/pkg/config/errors.go @@ -0,0 +1,33 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" +) + +// BpError is an error wrapper to augment Path +type BpError struct { + Path Path + Err error +} + +func (e BpError) Error() string { + return fmt.Sprintf("%s: %s", e.Path, e.Err) +} + +func (e BpError) Unwrap() error { + return e.Err +} diff --git a/pkg/config/yaml.go b/pkg/config/yaml.go new file mode 100644 index 0000000000..aa171df6f5 --- /dev/null +++ b/pkg/config/yaml.go @@ -0,0 +1,152 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "bufio" + "bytes" + "fmt" + "os" + "regexp" + + "gopkg.in/yaml.v3" +) + +// Path points to concrete location in the blueprint file. +type Path struct { + v string +} + +func (p Path) String() string { + return p.v +} + +// At is a builder method for a path of a child in a sequence. +func (p Path) At(i int) Path { + return Path{fmt.Sprintf("%s[%d]", p.v, i)} +} + +// Dot is a builder method for a path of a child in a mapping. +func (p Path) Dot(k string) Path { + if p.v == "" { + return Path{k} + } + return Path{fmt.Sprintf("%s.%s", p.v, k)} +} + +// Pos is a position in the blueprint file. +type Pos struct { + Line int + Column int +} + +func importBlueprint(f string) (Blueprint, YamlCtx, error) { + data, err := os.ReadFile(f) + if err != nil { + return Blueprint{}, YamlCtx{}, fmt.Errorf("%s, filename=%s: %v", errorMessages["fileLoadError"], f, err) + } + decoder := yaml.NewDecoder(bytes.NewReader(data)) + decoder.KnownFields(true) + + var bp Blueprint + if err = decoder.Decode(&bp); err != nil { + return Blueprint{}, YamlCtx{}, fmt.Errorf(errorMessages["yamlUnmarshalError"], f, err) + } + return bp, newYamlCtx(data), nil +} + +// YamlCtx is a contextual information to render errors. +type YamlCtx struct { + PathToPos map[Path]Pos + Lines []string +} + +func syntheticOutputsNode(name string, ln int, col int) *yaml.Node { + return &yaml.Node{ + Kind: yaml.MappingNode, + Content: []*yaml.Node{ + { + Kind: yaml.ScalarNode, + Value: "name", + Line: ln, + Column: col, + }, + { + Kind: yaml.ScalarNode, + Value: name, + Line: ln, + Column: col, + }, + }, + Line: ln, + Column: col, + } +} + +// normalizeNode is treating variadic YAML syntax, ensuring that +// there is only one (canonical) way to refer to a piece of blueprint. +// Handled cases: +// * Module.outputs: +// ``` +// outputs: +// - name: grog # canonical path to "grog" value is `...outputs[0].name` +// - mork # canonical path to "mork" value is `...outputs[1].name`, NOT `...outputs[1]` +// ``` +func normalizeYamlNode(p Path, n *yaml.Node) *yaml.Node { + switch { + case n.Kind == yaml.ScalarNode && regexp.MustCompile(`^deployment_groups\[\d+\]\.modules\[\d+\]\.outputs\[\d+\]$`).MatchString(p.String()): + return syntheticOutputsNode(n.Value, n.Line, n.Column) + default: + return n + } +} + +func newYamlCtx(data []byte) YamlCtx { + var c nodeCapturer + if err := yaml.Unmarshal(data, &c); err != nil { + panic(err) // shouldn't happen + } + + m := map[Path]Pos{} + var walk func(n *yaml.Node, p Path) + walk = func(n *yaml.Node, p Path) { + n = normalizeYamlNode(p, n) + m[p] = Pos{n.Line, n.Column} + if n.Kind == yaml.MappingNode { + for i := 0; i < len(n.Content); i += 2 { + walk(n.Content[i+1], p.Dot(n.Content[i].Value)) + } + } else if n.Kind == yaml.SequenceNode { + for i, c := range n.Content { + walk(c, p.At(i)) + } + } + } + walk(c.n, Path{""}) + + var lines []string + sc := bufio.NewScanner(bytes.NewReader(data)) + for sc.Scan() { + lines = append(lines, sc.Text()) + } + return YamlCtx{m, lines} +} + +type nodeCapturer struct{ n *yaml.Node } + +func (c *nodeCapturer) UnmarshalYAML(n *yaml.Node) error { + c.n = n + return nil +} diff --git a/pkg/config/yaml_test.go b/pkg/config/yaml_test.go new file mode 100644 index 0000000000..ea327bed1d --- /dev/null +++ b/pkg/config/yaml_test.go @@ -0,0 +1,152 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "bytes" + "testing" + + "github.com/google/go-cmp/cmp" + "gopkg.in/yaml.v3" +) + +func TestYamlCtx(t *testing.T) { + data := ` # line 1 +# comment +blueprint_name: green + +ghpc_version: apricot + +validators: +- validator: clay + inputs: + spice: curry # line 10 +- validator: sand + skip: true + +validation_level: 9000 + +vars: + red: ruby + +deployment_groups: +- group: tiger # line 20 + terraform_backend: + type: yam + configuration: + carrot: rust + kind: terraform + modules: + - id: tan + source: oatmeal + kind: terraform + use: [mocha, coffee] # line 30 + outputs: + - latte + - name: hazelnut + description: almond + sensitive: false + settings: + dijon: pine + +- group: crocodile + modules: # line 40 + - id: green + - id: olive + +terraform_backend_defaults: + type: moss +` + + { // Tests sanity check - data describes valid blueprint. + decoder := yaml.NewDecoder(bytes.NewReader([]byte(data))) + decoder.KnownFields(true) + var bp Blueprint + if err := decoder.Decode(&bp); err != nil { + t.Fatal(err) + } + } + + exp := map[string]Pos{ + "": {3, 1}, + "blueprint_name": {3, 17}, + "ghpc_version": {5, 15}, + + "validators": {8, 1}, + "validators[0]": {8, 3}, + "validators[0].inputs": {10, 5}, + "validators[0].inputs.spice": {10, 12}, + "validators[0].validator": {8, 14}, + "validators[1]": {11, 3}, + "validators[1].skip": {12, 9}, + "validators[1].validator": {11, 14}, + + "validation_level": {14, 19}, + + "vars": {17, 3}, + "vars.red": {17, 8}, + + "deployment_groups": {20, 1}, + "deployment_groups[0]": {20, 3}, + "deployment_groups[0].group": {20, 10}, + + "deployment_groups[0].terraform_backend": {22, 5}, + "deployment_groups[0].terraform_backend.type": {22, 11}, + "deployment_groups[0].terraform_backend.configuration": {24, 7}, + "deployment_groups[0].terraform_backend.configuration.carrot": {24, 15}, + "deployment_groups[0].kind": {25, 9}, + + "deployment_groups[0].modules": {27, 3}, + "deployment_groups[0].modules[0]": {27, 5}, + "deployment_groups[0].modules[0].id": {27, 9}, + "deployment_groups[0].modules[0].source": {28, 13}, + "deployment_groups[0].modules[0].kind": {29, 11}, + "deployment_groups[0].modules[0].use": {30, 10}, + "deployment_groups[0].modules[0].use[0]": {30, 11}, + "deployment_groups[0].modules[0].use[1]": {30, 18}, + "deployment_groups[0].modules[0].outputs": {32, 5}, + "deployment_groups[0].modules[0].outputs[0]": {32, 7}, + "deployment_groups[0].modules[0].outputs[0].name": {32, 7}, // synthetic + "deployment_groups[0].modules[0].outputs[1]": {33, 7}, + "deployment_groups[0].modules[0].outputs[1].name": {33, 13}, + "deployment_groups[0].modules[0].outputs[1].description": {34, 20}, + "deployment_groups[0].modules[0].outputs[1].sensitive": {35, 18}, + "deployment_groups[0].modules[0].settings": {37, 7}, + "deployment_groups[0].modules[0].settings.dijon": {37, 14}, + + "deployment_groups[1]": {39, 3}, + "deployment_groups[1].group": {39, 10}, + "deployment_groups[1].modules": {41, 3}, + "deployment_groups[1].modules[0]": {41, 5}, + "deployment_groups[1].modules[0].id": {41, 9}, + "deployment_groups[1].modules[1]": {42, 5}, + "deployment_groups[1].modules[1].id": {42, 9}, + + "terraform_backend_defaults": {45, 3}, + "terraform_backend_defaults.type": {45, 9}, + } + + ctx := newYamlCtx([]byte(data)) + for path, pos := range exp { + t.Run(path, func(t *testing.T) { + got, ok := ctx.PathToPos[Path{path}] + if !ok { + t.Errorf("%q not found", path) + } else if diff := cmp.Diff(pos, got); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } + }) + } +} diff --git a/pkg/shell/terraform.go b/pkg/shell/terraform.go index 582a3023a1..3b8546b207 100644 --- a/pkg/shell/terraform.go +++ b/pkg/shell/terraform.go @@ -288,7 +288,7 @@ func ExportOutputs(tf *tfexec.Terraform, artifactsDir string, applyBehavior Appl func ImportInputs(deploymentGroupDir string, artifactsDir string, expandedBlueprintFile string) error { deploymentRoot := filepath.Clean(filepath.Join(deploymentGroupDir, "..")) - dc, err := config.NewDeploymentConfig(expandedBlueprintFile) + dc, _, err := config.NewDeploymentConfig(expandedBlueprintFile) if err != nil { return err } From 48b5742f412b8ef78cb2d2fd9cdda6b3e0d3a399 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 21 Jun 2023 13:44:39 -0500 Subject: [PATCH 60/92] Update documentation for GitHub-hosted packages --- modules/README.md | 130 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 38 deletions(-) diff --git a/modules/README.md b/modules/README.md index 7fe1aad833..d040301e39 100644 --- a/modules/README.md +++ b/modules/README.md @@ -237,24 +237,33 @@ at the top level main.tf file. ### Source (Required) -The source is a path or URL that points to the source files for a module. The -actual content of those files is determined by the [kind](#kind-may-be-required) of the -module. - -A source can be a path which may refer to a module embedded in the `ghpc` -binary or a local file. It can also be a URL pointing to a GitHub path -containing a conforming module. +The source is a path or URL that points to the source files for Packer or +Terraform modules. A source can either be a filesystem path or a URL to a git +repository: + +* Filesystem paths + * modules embedded in the `ghpc` executable + * modules in the local filesystem +* Remote modules hosted on github.com or any `git::` repository + * when modules are in a subdirectory of the git repository, a special + double-slash "//" notation can be required as described below + +An important distinction is that git URLs are natively supported by Terraform so +they are not copied to your deployment directory. Packer does not have native +support for git-hosted modules so the Toolkit will copy these modules into the +deployment folder on your behalf. #### Embedded Modules -Embedded modules are embedded in the ghpc binary during compilation and cannot +Embedded modules are added to the ghpc binary during compilation and cannot be edited. To refer to embedded modules, set the source path to -`modules/<>`. +`modules/<>` or `community/modules/<>`. + +The paths match the modules in the repository structure for [core modules](./) +and [community modules](../community/modules/). Because the modules are embedded +during compilation, your local copies may differ unless you recompile ghpc. -The paths match the modules in the repository at compilation time. You can -review the directory structure of [the core modules](./) and -[community modules](../community/modules/) to determine which path to use. For -example, the following code is using the embedded pre-existing-vpc module: +For example, this example snippet uses the embedded pre-existing-vpc module: ```yaml - id: network1 @@ -273,52 +282,97 @@ following module definition refers the local pre-existing-vpc modules. source: ./modules/network/pre-existing-vpc ``` -> **_NOTE:_** This example would have to be run from the HPC Toolkit repository -> directory, otherwise the path would need to be updated to point at the correct -> directory. +> **_NOTE:_** Relative paths (beginning with `.` or `..` must be relative to the +> working directory from which `ghpc` is executed. This example would have to be +> run from a local copy of the HPC Toolkit repository. An alternative is to use +> absolute paths to modules. -#### GitHub Modules +#### GitHub-hosted Modules and Packages -To use a Terraform module available on GitHub, set the source to a path starting -with `github.com` (over HTTPS) or `git@github.com` (over SSH). For instance, the -following module definitions are sourcing the vpc module by pointing at the HPC -Toolkit GitHub repository: - -Get module from GitHub over SSH: +The [Intel DAOS blueprint][pfs-daos.yaml] makes extensive use of GitHub-hosted +Terraform and Packer modules. You may wish to use it as an example reference for +this documentation. -```yaml - - id: network1 - source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc -``` - -Get module from GitHub over HTTPS: +To use a Terraform module available on GitHub, set the source to a path starting +with `github.com` (HTTPS) or `git@github.com` (SSH). For instance, the following +module definition sources the Toolkit vpc module: ```yaml - id: network1 source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc ``` -Both examples above use the [double-slash notation][tfsubdir] (`//`) to indicate -the root directory of the git repository and the remainder of the path indicates -the location of the Terraform module. +This example uses the [double-slash notation][tfsubdir] (`//`) to indicate that +the Toolkit is a "package" of multiple modules whose root directory is the root +of the git repository. The remainder of the path indicates the sub-directory of +the vpc module. -Additionally, [specific revisions of a remote module][tfrev] can be selected by -any valid [git reference][gitref]. Typically, these are a git branch, commit -hash or tag. The [Intel DAOS blueprint][pfs-daos.yaml] makes extensive use -of this feature. For example, to temporarily point to a development copy of the -Toolkit vpc module, use: +The example above uses the default `main` branch of the Toolkit. Specific +[revisions][tfrev] can be selected with any valid [git reference][gitref]. +(git branch, commit hash or tag). If the git reference is a tag or branch, we +recommend setting `&depth=1` to reduce the data transferred over the network. +This option cannot be set when the reference is a commit hash. The following +examples select the vpc module on the active `develop` branch and also an older +release of the filestore module: ```yaml - id: network1 source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/network/vpc?ref=develop + ... + - id: homefs + source: github.com/GoogleCloudPlatform/hpc-toolkit//modules/file-system/filestore?ref=v1.10.0&depth=1 ``` +Because Terraform modules natively support this syntax, ghpc will not copy +GitHub-hosted modules into your deployment folder. Terraform will download them +into a hidden folder when you run `terraform init`. + [tfrev]: https://www.terraform.io/language/modules/sources#selecting-a-revision [gitref]: https://git-scm.com/book/en/v2/Git-Tools-Revision-Selection#_single_revisions [tfsubdir]: https://www.terraform.io/language/modules/sources#modules-in-package-sub-directories [pfs-daos.yaml]: ../community/examples/intel/pfs-daos.yaml -#### Generic Git Modules +##### GitHub-hosted Packer modules + +Packer does not natively support GitHub-hosted modules so `ghpc create` will +copy modules into your deployment folder. + +If the module uses `//` package notation, `ghpc create` will copy the entire +repository to the module path: `deployment_name/group_name/module_id`. However, +when `ghpc deploy` is invoked, it will run Packer from the subdirectory +`deployment_name/group_name/module_id/subdirectory/after/double_slash`. + +Referring back to the [Intel DAOS blueprint][pfs-daos.yaml], we see that it will +create 2 deployment groups at `pfs-daos/daos-client-image` and +`pfs-daos/daos-server-image`. However, Packer will actually be invoked from +a subdirectories ending in `daos-client-image/images` and +`daos-server-image/images`. + +If the module does not use `//` package notation, `ghpc create` will copy +only the final directory in the path to `deployment_name/group_name/module_id`. + +In all cases, `ghpc create` will remove the `.git` directory from the packer +module to ensure that you can manage the entire deployment directory with its +own git versioning. + +##### GitHub over SSH + +Get module from GitHub over SSH: + +```yaml + - id: network1 + source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc +``` + +Specific versions can be selected as for HTTPS: + +```yaml + - id: network1 + source: git@github.com:GoogleCloudPlatform/hpc-toolkit.git//modules/network/vpc?ref=v1.10.0&depth=1 +``` + +##### Generic Git Modules + To use a Terraform module available in a non-GitHub git repository such as gitlab, set the source to a path starting `git::`. Two Standard git protocols are supported, `git::https://` for HTTPS or `git::git@github.com` for SSH. From 2a13cad0ec87a099785867d2292db0560c6d62e7 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 21 Jun 2023 13:44:39 -0500 Subject: [PATCH 61/92] Update Intel DAOS example - use depth parameter on all GitHub URIs - use new support for Packer packages to build DAOS server and client images for the user --- community/examples/intel/pfs-daos.yaml | 69 +++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/community/examples/intel/pfs-daos.yaml b/community/examples/intel/pfs-daos.yaml index 00973524ba..516c8742b8 100644 --- a/community/examples/intel/pfs-daos.yaml +++ b/community/examples/intel/pfs-daos.yaml @@ -21,32 +21,85 @@ vars: deployment_name: pfs-daos region: us-central1 zone: us-central1-c + server_image_family: daos-server-hpc-rocky-8 + client_image_family: daos-client-hpc-rocky-8 # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md +# Note: this blueprint assumes the existence of a default global network and +# subnetwork in the region chosen above + deployment_groups: - group: primary modules: - id: network1 source: modules/network/pre-existing-vpc - # This module creates a DAOS server. Server images MUST be created before running this. - # https://github.com/daos-stack/google-cloud-daos/tree/main/images - # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server +- group: daos-server-image + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/images + - id: daos-server-image + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.0&depth=1 + kind: packer + settings: + daos_version: 2.2.0 + daos_repo_base_url: https://packages.daos.io + daos_packages_repo_file: EL8/packages/x86_64/daos_packages.repo + use_iap: true + enable_oslogin: false + machine_type: n2-standard-32 + source_image_family: hpc-rocky-linux-8 + source_image_project_id: cloud-hpc-image-public + image_guest_os_features: ["GVNIC"] + disk_size: "20" + state_timeout: "10m" + scopes: ["https://www.googleapis.com/auth/cloud-platform"] + use_internal_ip: true + omit_external_ip: false + daos_install_type: server + image_family: $(vars.server_image_family) + +- group: daos-client-image + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/images + - id: daos-client-image + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.0&depth=1 + kind: packer + settings: + daos_version: 2.2.0 + daos_repo_base_url: https://packages.daos.io + daos_packages_repo_file: EL8/packages/x86_64/daos_packages.repo + use_iap: true + enable_oslogin: false + machine_type: n2-standard-32 + source_image_family: hpc-rocky-linux-8 + source_image_project_id: cloud-hpc-image-public + image_guest_os_features: ["GVNIC"] + disk_size: "20" + state_timeout: "10m" + scopes: ["https://www.googleapis.com/auth/cloud-platform"] + use_internal_ip: true + omit_external_ip: false + daos_install_type: client + image_family: $(vars.client_image_family) + +- group: daos-cluster + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/terraform/modules/daos_server - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.0 + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.0&depth=1 use: [network1] settings: number_of_instances: 2 labels: {ghpc_role: file-system} + os_family: $(vars.server_image_family) - # This module creates DAOS clients. Client images MUST be created before running this. - # https://github.com/daos-stack/google-cloud-daos/tree/main/images - # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_client + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/terraform/modules/daos_client - id: daos-client - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.4.0 + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.4.0&depth=1 use: [network1, daos-server] settings: number_of_instances: 2 labels: {ghpc_role: compute} + os_family: $(vars.client_image_family) From 33390b27ce4d7eb8b395dc603c42beffa2016e86 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 21 Jun 2023 13:44:40 -0500 Subject: [PATCH 62/92] Update Intel DAOS with Slurm example - use depth parameter on all GitHub URIs - use new support for Packer packages to build DAOS server and client images for the user --- community/examples/intel/hpc-slurm-daos.yaml | 36 +++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/community/examples/intel/hpc-slurm-daos.yaml b/community/examples/intel/hpc-slurm-daos.yaml index 1cdeaa4760..413178bb91 100644 --- a/community/examples/intel/hpc-slurm-daos.yaml +++ b/community/examples/intel/hpc-slurm-daos.yaml @@ -21,10 +21,14 @@ vars: deployment_name: daos-slurm region: us-central1 zone: us-central1-c + server_image_family: daos-server-hpc-rocky-8 # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md +# Note: this blueprint assumes the existence of a default global network and +# subnetwork in the region chosen above + deployment_groups: - group: primary modules: @@ -37,18 +41,42 @@ deployment_groups: settings: local_mount: "/home" - # This module creates a DAOS server. Server images MUST be created before running this. - # https://github.com/daos-stack/google-cloud-daos/tree/main/images +- group: daos-server-image + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/images + - id: daos-server-image + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.0&depth=1 + kind: packer + settings: + daos_version: 2.2.0 + daos_repo_base_url: https://packages.daos.io + daos_packages_repo_file: EL8/packages/x86_64/daos_packages.repo + use_iap: true + enable_oslogin: false + machine_type: n2-standard-32 + source_image_family: hpc-rocky-linux-8 + source_image_project_id: cloud-hpc-image-public + image_guest_os_features: ["GVNIC"] + disk_size: "20" + state_timeout: "10m" + scopes: ["https://www.googleapis.com/auth/cloud-platform"] + use_internal_ip: true + omit_external_ip: false + daos_install_type: server + image_family: $(vars.server_image_family) + +- group: cluster + modules: # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server - id: daos - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.0 + source: github.com/daos-stack/google-cloud-daos//terraform/modules/daos_server?ref=v0.4.0&depth=1 use: [network1] settings: labels: {ghpc_role: file-system} # The default DAOS settings are optimized for TCO # The following will tune this system for best perf machine_type: "n2-standard-16" - os_disk_size_gb: 20 + os_family: $(vars.server_image_family) daos_disk_count: 4 daos_scm_size: 45 pools: From 95758f121bef2bf6a4d802cbb57eeb2197718fc8 Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Wed, 21 Jun 2023 12:29:38 -0700 Subject: [PATCH 63/92] Update "google" provider in OFE from 3.x to 4.x (#1470) * Update "google" provider in OFE from 3.x to 4.x * Update "cloud-storage" module reference to version 4.x --- community/front-end/ofe/tf/README.md | 8 ++++---- community/front-end/ofe/tf/main.tf | 2 +- community/front-end/ofe/tf/network/README.md | 4 ++-- community/front-end/ofe/tf/network/versions.tf | 2 +- community/front-end/ofe/tf/versions.tf | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/community/front-end/ofe/tf/README.md b/community/front-end/ofe/tf/README.md index f0c320cd96..8c92f61916 100644 --- a/community/front-end/ofe/tf/README.md +++ b/community/front-end/ofe/tf/README.md @@ -18,22 +18,22 @@ limitations under the License. | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 0.13 | -| [google](#requirement\_google) | ~> 3.0 | -| [google-beta](#requirement\_google-beta) | ~> 3.0 | +| [google](#requirement\_google) | ~> 4.0 | +| [google-beta](#requirement\_google-beta) | ~> 4.0 | | [null](#requirement\_null) | >= 1.0 | ## Providers | Name | Version | |------|---------| -| [google](#provider\_google) | ~> 3.0 | +| [google](#provider\_google) | ~> 4.0 | | [null](#provider\_null) | >= 1.0 | ## Modules | Name | Source | Version | |------|--------|---------| -| [control\_bucket](#module\_control\_bucket) | terraform-google-modules/cloud-storage/google | ~> 2.2 | +| [control\_bucket](#module\_control\_bucket) | terraform-google-modules/cloud-storage/google | ~> 4.0 | | [network](#module\_network) | ./network | n/a | | [pubsub](#module\_pubsub) | terraform-google-modules/pubsub/google | ~> 1.8 | | [service\_account](#module\_service\_account) | terraform-google-modules/service-accounts/google | ~> 4.1 | diff --git a/community/front-end/ofe/tf/main.tf b/community/front-end/ofe/tf/main.tf index 2efd9f7689..89711f2b65 100644 --- a/community/front-end/ofe/tf/main.tf +++ b/community/front-end/ofe/tf/main.tf @@ -64,7 +64,7 @@ module "service_account" { module "control_bucket" { source = "terraform-google-modules/cloud-storage/google" - version = "~> 2.2" + version = "~> 4.0" project_id = var.project_id names = ["storage"] diff --git a/community/front-end/ofe/tf/network/README.md b/community/front-end/ofe/tf/network/README.md index 94e1b6be47..ab79cd8e4e 100644 --- a/community/front-end/ofe/tf/network/README.md +++ b/community/front-end/ofe/tf/network/README.md @@ -18,13 +18,13 @@ limitations under the License. | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 0.13 | -| [google](#requirement\_google) | ~> 3.0 | +| [google](#requirement\_google) | ~> 4.0 | ## Providers | Name | Version | |------|---------| -| [google](#provider\_google) | ~> 3.0 | +| [google](#provider\_google) | ~> 4.0 | ## Modules diff --git a/community/front-end/ofe/tf/network/versions.tf b/community/front-end/ofe/tf/network/versions.tf index 6fdc70dca1..1e39af0ca1 100644 --- a/community/front-end/ofe/tf/network/versions.tf +++ b/community/front-end/ofe/tf/network/versions.tf @@ -20,7 +20,7 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 3.0" + version = "~> 4.0" } } } diff --git a/community/front-end/ofe/tf/versions.tf b/community/front-end/ofe/tf/versions.tf index 6151b7f184..c067e0ccc1 100644 --- a/community/front-end/ofe/tf/versions.tf +++ b/community/front-end/ofe/tf/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 3.0" + version = "~> 4.0" } google-beta = { source = "hashicorp/google-beta" - version = "~> 3.0" + version = "~> 4.0" } null = { version = ">= 1.0" From cecde50ea6057afcd8abbacd447dc62a1da02909 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 21 Jun 2023 13:44:40 -0500 Subject: [PATCH 64/92] Update documentation for Intel DAOS examples --- community/examples/intel/README.md | 31 +++++++++++++----------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/community/examples/intel/README.md b/community/examples/intel/README.md index a17223f30f..825dba5410 100644 --- a/community/examples/intel/README.md +++ b/community/examples/intel/README.md @@ -228,18 +228,15 @@ ghpc create community/examples/intel/pfs-daos.yaml \ [--backend-config bucket=] ``` -This will create a set of directories containing Terraform modules and Packer -templates. - -The `--backend-config` option is not required but recommended. It will save the terraform state in a pre-existing [Google Cloud Storage bucket][bucket]. For more information see [Setting up a remote terraform state][backend]. - -Follow `ghpc` instructions to deploy the environment +This will create the deployment directory containing Terraform modules and +Packer templates. The `--backend-config` option is not required but recommended. +It will save the terraform state in a pre-existing [Google Cloud Storage +bucket][bucket]. For more information see [Setting up a remote terraform +state][backend]. Use `ghpc deploy` to provision your DAOS storage cluster: - ```shell - terraform -chdir=pfs-daos/primary init - terraform -chdir=pfs-daos/primary validate - terraform -chdir=pfs-daos/primary apply - ``` +```text +ghpc deploy pfs-daos --auto-approve +``` [backend]: ../../../examples/README.md#optional-setting-up-a-remote-terraform-state [bucket]: https://cloud.google.com/storage/docs/creating-buckets @@ -396,7 +393,7 @@ See the [DFuse (DAOS FUSE)](https://docs.daos.io/v2.2/user/filesystem/?h=dfuse#d Delete the remaining infrastructure ```shell -terraform -chdir=pfs-daos/primary destroy +ghpc destroy pfs-daos --auto-approve ``` ## DAOS Server with Slurm cluster @@ -465,11 +462,9 @@ The `--backend-config` option is not required but recommended. It will save the Follow `ghpc` instructions to deploy the environment - ```shell - terraform -chdir=daos-slurm/primary init - terraform -chdir=daos-slurm/primary validate - terraform -chdir=daos-slurm/primary apply - ``` +```text +ghpc deploy daos-slurm --auto-approve +``` [backend]: ../../../examples/README.md#optional-setting-up-a-remote-terraform-state [bucket]: https://cloud.google.com/storage/docs/creating-buckets @@ -609,5 +604,5 @@ have been shutdown and deleted by the Slurm autoscaler. Delete the remaining infrastructure with `terraform`: ```shell -terraform -chdir=daos-slurm/primary destroy +ghpc destroy daos-slurm --auto-approve ``` From 6a3d90d5950aeea2fe96d10bb0558fa13ea11a5f Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Wed, 21 Jun 2023 13:06:30 -0700 Subject: [PATCH 65/92] Use strict `Path` builder to reduce human error. (#1489) --- cmd/create.go | 2 +- cmd/create_test.go | 20 +++--- pkg/config/config.go | 21 +++--- pkg/config/path.go | 139 ++++++++++++++++++++++++++++++++++++++++ pkg/config/path_test.go | 78 ++++++++++++++++++++++ pkg/config/yaml.go | 53 ++++++++------- pkg/config/yaml_test.go | 126 ++++++++++++++++++------------------ 7 files changed, 329 insertions(+), 110 deletions(-) create mode 100644 pkg/config/path.go create mode 100644 pkg/config/path_test.go diff --git a/cmd/create.go b/cmd/create.go index 6ca8cf0dd2..5035f6bb72 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -130,7 +130,7 @@ func expandOrDie(path string) config.DeploymentConfig { func renderError(err error, ctx config.YamlCtx) string { var be config.BpError if errors.As(err, &be) { - if pos, ok := ctx.PathToPos[be.Path]; ok { + if pos, ok := ctx.Pos(be.Path); ok { return renderRichError(be.Err, pos, ctx) } } diff --git a/cmd/create_test.go b/cmd/create_test.go index ed5fda5a15..9dc0ce78cd 100644 --- a/cmd/create_test.go +++ b/cmd/create_test.go @@ -137,23 +137,23 @@ func (s *MySuite) TestRenderError(c *C) { c.Check(got, Equals, "arbuz") } { // has pos, but context is missing - pth := config.Path{}.Dot("rainbow").Dot("over") + ctx := config.NewYamlCtx([]byte(``)) + pth := config.Root.Vars.Dot("kale") err := config.BpError{Path: pth, Err: errors.New("arbuz")} - got := renderError(err, config.YamlCtx{}) - c.Check(got, Equals, "rainbow.over: arbuz") + got := renderError(err, ctx) + c.Check(got, Equals, "vars.kale: arbuz") } { // has pos, has context - pth := config.Path{}.Dot("rainbow").Dot("over") - ctx := config.YamlCtx{ - PathToPos: map[config.Path]config.Pos{ - pth: {Line: 2, Column: 3}}, - Lines: []string{"uno", "dos", "tres"}} + ctx := config.NewYamlCtx([]byte(` +vars: + kale: dos`)) + pth := config.Root.Vars.Dot("kale") err := config.BpError{Path: pth, Err: errors.New("arbuz")} got := renderError(err, ctx) c.Check(got, Equals, ` Error: arbuz -on line 2, column 3: -2: dos +on line 3, column 9: +3: kale: dos `) } } diff --git a/pkg/config/config.go b/pkg/config/config.go index 6e1a449ea2..29b530b97c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -505,20 +505,16 @@ func checkModulesAndGroups(groups []DeploymentGroup) error { if err := grp.Name.Validate(); err != nil { return err } - pg := Path{"deployment_groups"}.At(ig) + pg := Root.Groups.At(ig) if seenGroups[grp.Name] { - return BpError{ - pg.Dot("name"), - fmt.Errorf("%s: %s used more than once", errorMessages["duplicateGroup"], grp.Name)} + return BpError{pg.Name, fmt.Errorf("%s: %s used more than once", errorMessages["duplicateGroup"], grp.Name)} } seenGroups[grp.Name] = true for im, mod := range grp.Modules { - pm := pg.Dot("modules").At(im) + pm := pg.Modules.At(im) if seenMod[mod.ID] { - return BpError{ - pm.Dot("id"), - fmt.Errorf("%s: %s used more than once", errorMessages["duplicateID"], mod.ID)} + return BpError{pm.ID, fmt.Errorf("%s: %s used more than once", errorMessages["duplicateID"], mod.ID)} } seenMod[mod.ID] = true @@ -528,7 +524,7 @@ func checkModulesAndGroups(groups []DeploymentGroup) error { } if grp.Kind != mod.Kind { return BpError{ - pm.Dot("kind"), + pm.Kind, fmt.Errorf( "mixing modules of differing kinds in a deployment group is not supported: deployment group %s, got %s and %s", grp.Name, grp.Kind, mod.Kind)} @@ -678,7 +674,7 @@ func (bp *Blueprint) DeploymentName() (string, error) { } } - path := Path{"vars.deployment_name"} + path := Root.Vars.Dot("deployment_name") v := bp.Vars.Get("deployment_name") if v.Type() != cty.String { return "", BpError{path, InputValueError{ @@ -709,16 +705,15 @@ func (bp *Blueprint) DeploymentName() (string, error) { // checkBlueprintName returns an error if blueprint_name does not comply with // requirements for correct GCP label values. func (bp *Blueprint) checkBlueprintName() error { - p := Path{"blueprint_name"} if len(bp.BlueprintName) == 0 { - return BpError{p, InputValueError{ + return BpError{Root.BlueprintName, InputValueError{ inputKey: "blueprint_name", cause: errorMessages["valueEmptyString"], }} } if !isValidLabelValue(bp.BlueprintName) { - return BpError{p, InputValueError{ + return BpError{Root.BlueprintName, InputValueError{ inputKey: "blueprint_name", cause: errorMessages["labelValueReqs"], }} diff --git a/pkg/config/path.go b/pkg/config/path.go new file mode 100644 index 0000000000..98d48fd383 --- /dev/null +++ b/pkg/config/path.go @@ -0,0 +1,139 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" + "reflect" +) + +// Path is unique identifier of a piece of configuration. +type Path interface { + String() string + Parent() Path +} + +type basePath struct { + InternalPrev Path + InternalPiece string +} + +func (p basePath) Parent() Path { return p.InternalPrev } + +func (p basePath) String() string { + pref := "" + if p.Parent() != nil { + pref = p.Parent().String() + } + return fmt.Sprintf("%s%s", pref, p.InternalPiece) +} + +type arrayPath[E any] struct{ basePath } + +func (p arrayPath[E]) At(i int) E { + var e E + initPath(&e, &p, fmt.Sprintf("[%d]", i)) + return e +} + +type mapPath[E any] struct{ basePath } + +func (p mapPath[E]) Dot(k string) E { + var e E + initPath(&e, &p, fmt.Sprintf(".%s", k)) + return e +} + +func initPath(p any, prev any, piece string) { + r := reflect.Indirect(reflect.ValueOf(p)) + ty := reflect.TypeOf(p).Elem() + if !r.FieldByName("InternalPiece").IsValid() || !r.FieldByName("InternalPrev").IsValid() { + panic(fmt.Sprintf("%s does not embed basePath", ty.Name())) + } + if _, ok := prev.(Path); prev != nil && !ok { + panic(fmt.Sprintf("prev is not a Path: %#v", p)) + } + + r.FieldByName("InternalPiece").SetString(piece) + if prev != nil { + r.FieldByName("InternalPrev").Set(reflect.ValueOf(prev)) + } + + for i := 0; i < ty.NumField(); i++ { + tag, ok := ty.Field(i).Tag.Lookup("path") + if !ok { + continue + } + initPath(r.Field(i).Addr().Interface(), p, tag) + } +} + +type rootPath struct { + basePath + BlueprintName basePath `path:"blueprint_name"` + GhpcVersion basePath `path:"ghpc_version"` + Validators arrayPath[validatorCfgPath] `path:"validators"` + ValidationLevel basePath `path:"validation_level"` + Vars dictPath `path:"vars"` + Groups arrayPath[groupPath] `path:"deployment_groups"` + Backend backendPath `path:"terraform_backend_defaults"` +} + +type validatorCfgPath struct { + basePath + Validator basePath `path:".validator"` + Inputs dictPath `path:".inputs"` + Skip basePath `path:".skip"` +} + +type dictPath struct{ mapPath[basePath] } + +type backendPath struct { + basePath + Type basePath `path:".type"` + Configuration dictPath `path:".configuration"` +} + +type groupPath struct { + basePath + Name basePath `path:".group"` + Backend backendPath `path:".terraform_backend"` + Modules arrayPath[modulePath] `path:".modules"` + Kind basePath `path:".kind"` +} + +type modulePath struct { + basePath + Source basePath `path:".source"` + Kind basePath `path:".kind"` + ID basePath `path:".id"` + Use arrayPath[backendPath] `path:".use"` + Outputs arrayPath[outputPath] `path:".outputs"` + Settings dictPath `path:".settings"` +} + +type outputPath struct { + basePath + Name basePath `path:".name"` + Description basePath `path:".description"` + Sensitive basePath `path:".sensitive"` +} + +// Root is a starting point for creating a Blueprint Path +var Root rootPath + +func init() { + initPath(&Root, nil, "") +} diff --git a/pkg/config/path_test.go b/pkg/config/path_test.go new file mode 100644 index 0000000000..922483aa90 --- /dev/null +++ b/pkg/config/path_test.go @@ -0,0 +1,78 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "testing" +) + +func TestPath(t *testing.T) { + type test struct { + p Path + want string + } + r := Root + m := r.Groups.At(3).Modules.At(1) + tests := []test{ + {r, ""}, + {r.BlueprintName, "blueprint_name"}, + {r.GhpcVersion, "ghpc_version"}, + {r.Validators, "validators"}, + {r.ValidationLevel, "validation_level"}, + {r.Vars, "vars"}, + {r.Groups, "deployment_groups"}, + {r.Backend, "terraform_backend_defaults"}, + + {r.Validators.At(2), "validators[2]"}, + {r.Validators.At(2).Validator, "validators[2].validator"}, + {r.Validators.At(2).Skip, "validators[2].skip"}, + {r.Validators.At(2).Inputs, "validators[2].inputs"}, + {r.Validators.At(2).Inputs.Dot("zebra"), "validators[2].inputs.zebra"}, + + {r.Vars.Dot("red"), "vars.red"}, + + {r.Groups.At(3), "deployment_groups[3]"}, + {r.Groups.At(3).Name, "deployment_groups[3].group"}, + {r.Groups.At(3).Kind, "deployment_groups[3].kind"}, + {r.Groups.At(3).Backend, "deployment_groups[3].terraform_backend"}, + {r.Groups.At(3).Modules, "deployment_groups[3].modules"}, + {r.Groups.At(3).Modules.At(1), "deployment_groups[3].modules[1]"}, + // m := r.Groups.At(3).Modules.At(1) + {m.Source, "deployment_groups[3].modules[1].source"}, + {m.ID, "deployment_groups[3].modules[1].id"}, + {m.Kind, "deployment_groups[3].modules[1].kind"}, + {m.Use, "deployment_groups[3].modules[1].use"}, + {m.Use.At(6), "deployment_groups[3].modules[1].use[6]"}, + {m.Outputs, "deployment_groups[3].modules[1].outputs"}, + {m.Outputs.At(2), "deployment_groups[3].modules[1].outputs[2]"}, + {m.Outputs.At(2).Name, "deployment_groups[3].modules[1].outputs[2].name"}, + {m.Outputs.At(2).Description, "deployment_groups[3].modules[1].outputs[2].description"}, + {m.Outputs.At(2).Sensitive, "deployment_groups[3].modules[1].outputs[2].sensitive"}, + {m.Settings, "deployment_groups[3].modules[1].settings"}, + {m.Settings.Dot("lime"), "deployment_groups[3].modules[1].settings.lime"}, + + {r.Backend.Type, "terraform_backend_defaults.type"}, + {r.Backend.Configuration, "terraform_backend_defaults.configuration"}, + {r.Backend.Configuration.Dot("goo"), "terraform_backend_defaults.configuration.goo"}, + } + for _, tc := range tests { + t.Run(tc.want, func(t *testing.T) { + got := tc.p.String() + if got != tc.want { + t.Errorf("\ngot : %q\nwant: %q", got, tc.want) + } + }) + } +} diff --git a/pkg/config/yaml.go b/pkg/config/yaml.go index aa171df6f5..b6f3fc6974 100644 --- a/pkg/config/yaml.go +++ b/pkg/config/yaml.go @@ -24,26 +24,20 @@ import ( "gopkg.in/yaml.v3" ) -// Path points to concrete location in the blueprint file. -type Path struct { - v string -} - -func (p Path) String() string { - return p.v -} +// yPath is a helper for YamlCtx to build "Path". It's agnostic to the Blueprint structure. +type yPath string // At is a builder method for a path of a child in a sequence. -func (p Path) At(i int) Path { - return Path{fmt.Sprintf("%s[%d]", p.v, i)} +func (p yPath) At(i int) yPath { + return yPath(fmt.Sprintf("%s[%d]", p, i)) } // Dot is a builder method for a path of a child in a mapping. -func (p Path) Dot(k string) Path { - if p.v == "" { - return Path{k} +func (p yPath) Dot(k string) yPath { + if p == "" { + return yPath(k) } - return Path{fmt.Sprintf("%s.%s", p.v, k)} + return yPath(fmt.Sprintf("%s.%s", p, k)) } // Pos is a position in the blueprint file. @@ -64,15 +58,21 @@ func importBlueprint(f string) (Blueprint, YamlCtx, error) { if err = decoder.Decode(&bp); err != nil { return Blueprint{}, YamlCtx{}, fmt.Errorf(errorMessages["yamlUnmarshalError"], f, err) } - return bp, newYamlCtx(data), nil + return bp, NewYamlCtx(data), nil } // YamlCtx is a contextual information to render errors. type YamlCtx struct { - PathToPos map[Path]Pos + pathToPos map[yPath]Pos Lines []string } +// Pos returns a position of a given path if one is found. +func (c YamlCtx) Pos(p Path) (Pos, bool) { + pos, ok := c.pathToPos[yPath(p.String())] + return pos, ok +} + func syntheticOutputsNode(name string, ln int, col int) *yaml.Node { return &yaml.Node{ Kind: yaml.MappingNode, @@ -104,24 +104,31 @@ func syntheticOutputsNode(name string, ln int, col int) *yaml.Node { // - name: grog # canonical path to "grog" value is `...outputs[0].name` // - mork # canonical path to "mork" value is `...outputs[1].name`, NOT `...outputs[1]` // ``` -func normalizeYamlNode(p Path, n *yaml.Node) *yaml.Node { +func normalizeYamlNode(p yPath, n *yaml.Node) *yaml.Node { + fmt.Printf("node: %#v, path: %#v", n, string(p)) switch { - case n.Kind == yaml.ScalarNode && regexp.MustCompile(`^deployment_groups\[\d+\]\.modules\[\d+\]\.outputs\[\d+\]$`).MatchString(p.String()): + case n.Kind == yaml.ScalarNode && regexp.MustCompile(`^deployment_groups\[\d+\]\.modules\[\d+\]\.outputs\[\d+\]$`).MatchString(string(p)): return syntheticOutputsNode(n.Value, n.Line, n.Column) default: return n } } -func newYamlCtx(data []byte) YamlCtx { +// NewYamlCtx creates a new YamlCtx from a given YAML data. +// NOTE: The data should be a valid blueprint YAML (previously used to parse Blueprint), +// this function will panic if it's not valid YAML and doesn't validate Blueprint structure. +func NewYamlCtx(data []byte) YamlCtx { var c nodeCapturer if err := yaml.Unmarshal(data, &c); err != nil { panic(err) // shouldn't happen } + if c.n == nil { + return YamlCtx{} // empty + } - m := map[Path]Pos{} - var walk func(n *yaml.Node, p Path) - walk = func(n *yaml.Node, p Path) { + m := map[yPath]Pos{} + var walk func(n *yaml.Node, p yPath) + walk = func(n *yaml.Node, p yPath) { n = normalizeYamlNode(p, n) m[p] = Pos{n.Line, n.Column} if n.Kind == yaml.MappingNode { @@ -134,7 +141,7 @@ func newYamlCtx(data []byte) YamlCtx { } } } - walk(c.n, Path{""}) + walk(c.n, "") var lines []string sc := bufio.NewScanner(bytes.NewReader(data)) diff --git a/pkg/config/yaml_test.go b/pkg/config/yaml_test.go index ea327bed1d..5d344d38f4 100644 --- a/pkg/config/yaml_test.go +++ b/pkg/config/yaml_test.go @@ -79,72 +79,72 @@ terraform_backend_defaults: } } - exp := map[string]Pos{ - "": {3, 1}, - "blueprint_name": {3, 17}, - "ghpc_version": {5, 15}, - - "validators": {8, 1}, - "validators[0]": {8, 3}, - "validators[0].inputs": {10, 5}, - "validators[0].inputs.spice": {10, 12}, - "validators[0].validator": {8, 14}, - "validators[1]": {11, 3}, - "validators[1].skip": {12, 9}, - "validators[1].validator": {11, 14}, - - "validation_level": {14, 19}, - - "vars": {17, 3}, - "vars.red": {17, 8}, - - "deployment_groups": {20, 1}, - "deployment_groups[0]": {20, 3}, - "deployment_groups[0].group": {20, 10}, - - "deployment_groups[0].terraform_backend": {22, 5}, - "deployment_groups[0].terraform_backend.type": {22, 11}, - "deployment_groups[0].terraform_backend.configuration": {24, 7}, - "deployment_groups[0].terraform_backend.configuration.carrot": {24, 15}, - "deployment_groups[0].kind": {25, 9}, - - "deployment_groups[0].modules": {27, 3}, - "deployment_groups[0].modules[0]": {27, 5}, - "deployment_groups[0].modules[0].id": {27, 9}, - "deployment_groups[0].modules[0].source": {28, 13}, - "deployment_groups[0].modules[0].kind": {29, 11}, - "deployment_groups[0].modules[0].use": {30, 10}, - "deployment_groups[0].modules[0].use[0]": {30, 11}, - "deployment_groups[0].modules[0].use[1]": {30, 18}, - "deployment_groups[0].modules[0].outputs": {32, 5}, - "deployment_groups[0].modules[0].outputs[0]": {32, 7}, - "deployment_groups[0].modules[0].outputs[0].name": {32, 7}, // synthetic - "deployment_groups[0].modules[0].outputs[1]": {33, 7}, - "deployment_groups[0].modules[0].outputs[1].name": {33, 13}, - "deployment_groups[0].modules[0].outputs[1].description": {34, 20}, - "deployment_groups[0].modules[0].outputs[1].sensitive": {35, 18}, - "deployment_groups[0].modules[0].settings": {37, 7}, - "deployment_groups[0].modules[0].settings.dijon": {37, 14}, - - "deployment_groups[1]": {39, 3}, - "deployment_groups[1].group": {39, 10}, - "deployment_groups[1].modules": {41, 3}, - "deployment_groups[1].modules[0]": {41, 5}, - "deployment_groups[1].modules[0].id": {41, 9}, - "deployment_groups[1].modules[1]": {42, 5}, - "deployment_groups[1].modules[1].id": {42, 9}, - - "terraform_backend_defaults": {45, 3}, - "terraform_backend_defaults.type": {45, 9}, + type test struct { + path Path + want Pos + } + tests := []test{ + {Root, Pos{3, 1}}, + {Root.BlueprintName, Pos{3, 17}}, + {Root.GhpcVersion, Pos{5, 15}}, + {Root.Validators, Pos{8, 1}}, + {Root.Validators.At(0), Pos{8, 3}}, + {Root.Validators.At(0).Inputs, Pos{10, 5}}, + {Root.Validators.At(0).Inputs.Dot("spice"), Pos{10, 12}}, + {Root.Validators.At(0).Validator, Pos{8, 14}}, + {Root.Validators.At(1), Pos{11, 3}}, + {Root.Validators.At(1).Skip, Pos{12, 9}}, + {Root.Validators.At(1).Validator, Pos{11, 14}}, + {Root.ValidationLevel, Pos{14, 19}}, + {Root.Vars, Pos{17, 3}}, + {Root.Vars.Dot("red"), Pos{17, 8}}, + {Root.Groups, Pos{20, 1}}, + {Root.Groups.At(0), Pos{20, 3}}, + {Root.Groups.At(0).Name, Pos{20, 10}}, + + {Root.Groups.At(0).Backend, Pos{22, 5}}, + {Root.Groups.At(0).Backend.Type, Pos{22, 11}}, + {Root.Groups.At(0).Backend.Configuration, Pos{24, 7}}, + {Root.Groups.At(0).Backend.Configuration.Dot("carrot"), Pos{24, 15}}, + {Root.Groups.At(0).Kind, Pos{25, 9}}, + + {Root.Groups.At(0).Modules, Pos{27, 3}}, + {Root.Groups.At(0).Modules.At(0), Pos{27, 5}}, + {Root.Groups.At(0).Modules.At(0).ID, Pos{27, 9}}, + {Root.Groups.At(0).Modules.At(0).Source, Pos{28, 13}}, + {Root.Groups.At(0).Modules.At(0).Kind, Pos{29, 11}}, + {Root.Groups.At(0).Modules.At(0).Use, Pos{30, 10}}, + {Root.Groups.At(0).Modules.At(0).Use.At(0), Pos{30, 11}}, + {Root.Groups.At(0).Modules.At(0).Use.At(1), Pos{30, 18}}, + {Root.Groups.At(0).Modules.At(0).Outputs, Pos{32, 5}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(0), Pos{32, 7}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(0).Name, Pos{32, 7}}, // synthetic + {Root.Groups.At(0).Modules.At(0).Outputs.At(1), Pos{33, 7}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(1).Name, Pos{33, 13}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(1).Description, Pos{34, 20}}, + {Root.Groups.At(0).Modules.At(0).Outputs.At(1).Sensitive, Pos{35, 18}}, + {Root.Groups.At(0).Modules.At(0).Settings, Pos{37, 7}}, + {Root.Groups.At(0).Modules.At(0).Settings.Dot("dijon"), Pos{37, 14}}, + + {Root.Groups.At(1), Pos{39, 3}}, + {Root.Groups.At(1).Name, Pos{39, 10}}, + {Root.Groups.At(1).Modules, Pos{41, 3}}, + {Root.Groups.At(1).Modules.At(0), Pos{41, 5}}, + {Root.Groups.At(1).Modules.At(0).ID, Pos{41, 9}}, + {Root.Groups.At(1).Modules.At(1), Pos{42, 5}}, + {Root.Groups.At(1).Modules.At(1).ID, Pos{42, 9}}, + + {Root.Backend, Pos{45, 3}}, + {Root.Backend.Type, Pos{45, 9}}, } - ctx := newYamlCtx([]byte(data)) - for path, pos := range exp { - t.Run(path, func(t *testing.T) { - got, ok := ctx.PathToPos[Path{path}] + ctx := NewYamlCtx([]byte(data)) + for _, tc := range tests { + t.Run(tc.path.String(), func(t *testing.T) { + got, ok := ctx.Pos(tc.path) if !ok { - t.Errorf("%q not found", path) - } else if diff := cmp.Diff(pos, got); diff != "" { + t.Errorf("%q not found", tc.path.String()) + } else if diff := cmp.Diff(tc.want, got); diff != "" { t.Errorf("diff (-want +got):\n%s", diff) } }) From 0f05faa505a72ea610395fbca2aa4498a00a9d96 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Wed, 21 Jun 2023 15:42:47 -0700 Subject: [PATCH 66/92] Remove `settingsToIgnore` from `useModule` (#1486) --- pkg/config/expand.go | 38 +++++++++++++++----------------------- pkg/config/expand_test.go | 38 +++++++++++++++++++------------------- 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/pkg/config/expand.go b/pkg/config/expand.go index 3a45eda936..253c17108f 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -134,45 +134,38 @@ func (mod *Module) addListValue(settingName string, value cty.Value) { // a list, in which case output values are appended and flattened using HCL. // // mod: "using" module as defined above -// useMod: "used" module as defined above -// settingsToIgnore: a list of module settings not to modify for any reason; -// typical usage will be to leave explicit blueprint settings unmodified -func useModule( - mod *Module, - useMod Module, - settingsToIgnore []string, -) { +// use: "used" module as defined above +func useModule(mod *Module, use Module) { modInputsMap := getModuleInputMap(mod.InfoOrDie().Inputs) - for _, useOutput := range useMod.InfoOrDie().Outputs { - settingName := useOutput.Name - - // explicitly ignore these settings (typically those in blueprint) - if slices.Contains(settingsToIgnore, settingName) { - continue - } + for _, useOutput := range use.InfoOrDie().Outputs { + setting := useOutput.Name // Skip settings that do not have matching module inputs - inputType, ok := modInputsMap[settingName] + inputType, ok := modInputsMap[setting] if !ok { continue } + alreadySet := mod.Settings.Has(setting) + if alreadySet && len(IsProductOfModuleUse(mod.Settings.Get(setting))) == 0 { + continue // set explicitly, skip + } + // skip settings that are not of list type, but already have a value // these were probably added by a previous call to this function - alreadySet := mod.Settings.Has(settingName) isList := strings.HasPrefix(inputType, "list") if alreadySet && !isList { continue } v := AsProductOfModuleUse( - ModuleRef(useMod.ID, settingName).AsExpression().AsValue(), - useMod.ID) + ModuleRef(use.ID, setting).AsExpression().AsValue(), + use.ID) if !isList { - mod.Settings.Set(settingName, v) + mod.Settings.Set(setting, v) } else { - mod.addListValue(settingName, v) + mod.addListValue(setting, v) } } } @@ -181,13 +174,12 @@ func useModule( // when/if applicable func (dc *DeploymentConfig) applyUseModules() error { return dc.Config.WalkModules(func(m *Module) error { - settingsInBlueprint := maps.Keys(m.Settings.Items()) for _, u := range m.Use { used, err := dc.Config.Module(u) if err != nil { return err } - useModule(m, *used, settingsInBlueprint) + useModule(m, *used) } return nil }) diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 87f8508ffa..13341497a2 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -72,7 +72,7 @@ func (s *MySuite) TestAddListValue(c *C) { func (s *MySuite) TestUseModule(c *C) { // Setup - usedMod := Module{ + used := Module{ ID: "UsedModule", Source: "usedSource", } @@ -86,9 +86,9 @@ func (s *MySuite) TestUseModule(c *C) { mod := Module{ID: "lime", Source: "modSource"} setTestModuleInfo(mod, modulereader.ModuleInfo{}) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{}) + setTestModuleInfo(used, modulereader.ModuleInfo{}) - useModule(&mod, usedMod, []string{}) + useModule(&mod, used) c.Check(mod.Settings, DeepEquals, Dict{}) } @@ -96,10 +96,10 @@ func (s *MySuite) TestUseModule(c *C) { mod := Module{ID: "lime", Source: "limeTree"} setTestModuleInfo(mod, modulereader.ModuleInfo{}) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - useModule(&mod, usedMod, []string{}) + useModule(&mod, used) c.Check(mod.Settings, DeepEquals, Dict{}) } @@ -108,11 +108,11 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{varInfoNumber}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - useModule(&mod, usedMod, []string{}) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": AsProductOfModuleUse(ref, "UsedModule"), }) @@ -124,11 +124,11 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{varInfoNumber}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - useModule(&mod, usedMod, []string{"val1"}) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{"val1": ref}) } @@ -139,11 +139,11 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{varInfoNumber}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - useModule(&mod, usedMod, []string{}) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": AsProductOfModuleUse(ref, "UsedModule")}) } @@ -153,10 +153,10 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{{Name: "val1", Type: "list"}}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - useModule(&mod, usedMod, []string{}) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": AsProductOfModuleUse( MustParseExpression(`flatten([module.UsedModule.val1])`).AsValue(), @@ -166,19 +166,19 @@ func (s *MySuite) TestUseModule(c *C) { { // Pass: Setting exists, Input is List, Output is not a list // Assume setting was not set in blueprint mod := Module{ID: "lime", Source: "limeTree"} - mod.Settings.Set("val1", cty.TupleVal([]cty.Value{ref})) + mod.Settings.Set("val1", AsProductOfModuleUse(cty.TupleVal([]cty.Value{ref}), "other")) setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{{Name: "val1", Type: "list"}}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - useModule(&mod, usedMod, []string{}) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": AsProductOfModuleUse( MustParseExpression(`flatten([module.UsedModule.val1,[module.UsedModule.val1]])`).AsValue(), - "UsedModule")}) + "other", "UsedModule")}) } { // Pass: Setting exists, Input is List, Output is not a list @@ -188,11 +188,11 @@ func (s *MySuite) TestUseModule(c *C) { setTestModuleInfo(mod, modulereader.ModuleInfo{ Inputs: []modulereader.VarInfo{{Name: "val1", Type: "list"}}, }) - setTestModuleInfo(usedMod, modulereader.ModuleInfo{ + setTestModuleInfo(used, modulereader.ModuleInfo{ Outputs: []modulereader.OutputInfo{{Name: "val1"}}, }) - useModule(&mod, usedMod, []string{"val1"}) + useModule(&mod, used) c.Check(mod.Settings.Items(), DeepEquals, map[string]cty.Value{ "val1": cty.TupleVal([]cty.Value{ref})}) } From e7edd8c356be3ab251dd29ffbff889f611ed8103 Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Wed, 21 Jun 2023 21:39:30 -0700 Subject: [PATCH 67/92] Allow providing short names for image project (#1472) * Allow providing short references for image project * Updated examples to use short image project names --- .../examples/hpc-slurm-chromedesktop.yaml | 2 +- community/examples/hpc-slurm-ubuntu2004.yaml | 2 +- .../schedmd-slurm-gcp-v5-node-group/README.md | 2 +- .../schedmd-slurm-gcp-v5-node-group/main.tf | 14 ++------- .../source_image_logic.tf | 29 +++++++++++++++++++ .../variables.tf | 2 +- .../schedmd-slurm-gcp-v5-controller/README.md | 2 +- .../schedmd-slurm-gcp-v5-controller/main.tf | 13 ++------- .../source_image_logic.tf | 29 +++++++++++++++++++ .../variables.tf | 2 +- .../schedmd-slurm-gcp-v5-login/README.md | 2 +- .../schedmd-slurm-gcp-v5-login/main.tf | 13 ++------- .../source_image_logic.tf | 29 +++++++++++++++++++ .../schedmd-slurm-gcp-v5-login/variables.tf | 2 +- examples/hpc-enterprise-slurm.yaml | 2 +- tools/duplicate-diff.py | 7 ++++- .../test_configs/node-groups.yaml | 6 ++-- 17 files changed, 114 insertions(+), 44 deletions(-) create mode 100644 community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf create mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf create mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf diff --git a/community/examples/hpc-slurm-chromedesktop.yaml b/community/examples/hpc-slurm-chromedesktop.yaml index b3a0ded462..959edfa77b 100644 --- a/community/examples/hpc-slurm-chromedesktop.yaml +++ b/community/examples/hpc-slurm-chromedesktop.yaml @@ -54,7 +54,7 @@ deployment_groups: disable_public_ips: false instance_image: family: slurm-gcp-5-7-debian-11 - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public guest_accelerator: - type: nvidia-tesla-t4-vws count: 1 diff --git a/community/examples/hpc-slurm-ubuntu2004.yaml b/community/examples/hpc-slurm-ubuntu2004.yaml index 8cb8abc9da..10754d2731 100644 --- a/community/examples/hpc-slurm-ubuntu2004.yaml +++ b/community/examples/hpc-slurm-ubuntu2004.yaml @@ -25,7 +25,7 @@ vars: # Please refer to the following link for the latest images: # https://github.com/SchedMD/slurm-gcp/blob/master/docs/images.md#supported-operating-systems family: slurm-gcp-5-7-ubuntu-2004-lts - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public deployment_groups: diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md index f455751776..2ad32a6f22 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md @@ -134,7 +134,7 @@ No modules. | [enable\_spot\_vm](#input\_enable\_spot\_vm) | Enable the partition to use spot VMs (https://cloud.google.com/spot-vms). | `bool` | `false` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See
https://cloud.google.com/compute/docs/gpus more details.
- type : the GPU type, e.g. nvidia-tesla-t4, nvidia-a100-80gb, nvidia-tesla-a100, etc
- count : number of GPUs

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
object({
count = number,
type = string
})
| `null` | no | | [guest\_accelerator](#input\_guest\_accelerator) | Alternative method of providing 'var.gpu' with a consistent naming scheme to
other HPC Toolkit modules.

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
list(object({
type = string,
count = number
}))
| `null` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the node group VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "projects/schedmd-slurm-public/global/images/family"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the node group VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "schedmd-slurm-public"
}
| no | | [instance\_template](#input\_instance\_template) | Self link to a custom instance template. If set, other VM definition
variables such as machine\_type and instance\_image will be ignored in favor
of the provided instance template.

For more information on creating custom images for the instance template
that comply with Slurm on GCP see the "Slurm on GCP Custom Images" section
in docs/vm-images.md. | `string` | `null` | no | | [labels](#input\_labels) | Labels to add to partition compute instances. Key-value pairs. | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Compute Platform machine type to use for this partition compute nodes. | `string` | `"c2-standard-60"` | no | diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf index c70e26a501..601f64df15 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/main.tf @@ -20,14 +20,6 @@ locals { } locals { - - # Handle VM image format from 2 sources, prioritize source_image* variables - # over instance_image - source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" - source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") - source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") - source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") - enable_public_ip_access_config = var.disable_public_ips ? [] : [{ nat_ip = null, network_tier = null }] access_config = length(var.access_config) == 0 ? local.enable_public_ip_access_config : var.access_config @@ -70,9 +62,9 @@ locals { on_host_maintenance = var.on_host_maintenance preemptible = var.preemptible shielded_instance_config = var.shielded_instance_config - source_image_family = local.source_image_family - source_image_project = local.source_image_project - source_image = local.source_image + source_image_family = local.source_image_family # requires source_image_logic.tf + source_image_project = local.source_image_project_normalized # requires source_image_logic.tf + source_image = local.source_image # requires source_image_logic.tf tags = var.tags access_config = local.access_config service_account = var.service_account != null ? var.service_account : { diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf new file mode 100644 index 0000000000..cb6932c358 --- /dev/null +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf @@ -0,0 +1,29 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + # Handle VM image format from 2 sources, prioritize source_image* variables + # over instance_image + source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" + source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") + source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") + source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") + source_image_project_normalized = ( + local.source_image != "" || strcontains(local.source_image_project, "/") + ? local.source_image_project + : "projects/${local.source_image_project}/global/images/family" + ) +} diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf index 78146b86e3..b77caa08a3 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf @@ -98,7 +98,7 @@ variable "instance_image" { type = map(string) default = { family = "slurm-gcp-5-7-hpc-centos-7" - project = "projects/schedmd-slurm-public/global/images/family" + project = "schedmd-slurm-public" } validation { diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md index 05806bafb2..ba42bd22e0 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/README.md @@ -205,7 +205,7 @@ limitations under the License. | [epilog\_scripts](#input\_epilog\_scripts) | List of scripts to be used for Epilog. Programs for the slurmd to execute
on every node when a user's job completes.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Epilog. |
list(object({
filename = string
content = string
}))
| `[]` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See
https://cloud.google.com/compute/docs/gpus more details.
- type : the GPU type, e.g. nvidia-tesla-t4, nvidia-a100-80gb, nvidia-tesla-a100, etc
- count : number of GPUs

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
object({
type = string
count = number
})
| `null` | no | | [guest\_accelerator](#input\_guest\_accelerator) | Alternative method of providing 'var.gpu' with a consistent naming scheme to
other HPC Toolkit modules.

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
list(object({
type = string,
count = number
}))
| `null` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "projects/schedmd-slurm-public/global/images/family"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "schedmd-slurm-public"
}
| no | | [instance\_template](#input\_instance\_template) | Self link to a custom instance template. If set, other VM definition
variables such as machine\_type and instance\_image will be ignored in favor
of the provided instance template.

For more information on creating custom images for the instance template
that comply with Slurm on GCP see the "Slurm on GCP Custom Images" section
in docs/vm-images.md. | `string` | `null` | no | | [labels](#input\_labels) | Labels, provided as a map. | `map(string)` | `{}` | no | | [login\_startup\_scripts\_timeout](#input\_login\_startup\_scripts\_timeout) | The timeout (seconds) applied to the login startup script. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf index c535e18740..73ef3e2453 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/main.tf @@ -36,13 +36,6 @@ locals { enable_public_ip_access_config = var.disable_controller_public_ips ? [] : [{ nat_ip = null, network_tier = null }] access_config = length(var.access_config) == 0 ? local.enable_public_ip_access_config : var.access_config - # Handle VM image format from 2 sources, prioritize source_image* variables - # over instance_image - source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" - source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") - source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") - source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") - additional_disks = [ for ad in var.additional_disks : { disk_name = ad.disk_name @@ -121,9 +114,9 @@ module "slurm_controller_template" { region = var.region shielded_instance_config = var.shielded_instance_config slurm_instance_role = "controller" - source_image_family = local.source_image_family - source_image_project = local.source_image_project - source_image = local.source_image + source_image_family = local.source_image_family # requires source_image_logic.tf + source_image_project = local.source_image_project_normalized # requires source_image_logic.tf + source_image = local.source_image # requires source_image_logic.tf network = var.network_self_link == null ? "" : var.network_self_link subnetwork_project = var.subnetwork_project == null ? "" : var.subnetwork_project subnetwork = var.subnetwork_self_link == null ? "" : var.subnetwork_self_link diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf new file mode 100644 index 0000000000..cb6932c358 --- /dev/null +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf @@ -0,0 +1,29 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + # Handle VM image format from 2 sources, prioritize source_image* variables + # over instance_image + source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" + source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") + source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") + source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") + source_image_project_normalized = ( + local.source_image != "" || strcontains(local.source_image_project, "/") + ? local.source_image_project + : "projects/${local.source_image_project}/global/images/family" + ) +} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf index 25e473327c..fb20beb52b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/variables.tf @@ -540,7 +540,7 @@ variable "instance_image" { type = map(string) default = { family = "slurm-gcp-5-7-hpc-centos-7" - project = "projects/schedmd-slurm-public/global/images/family" + project = "schedmd-slurm-public" } validation { diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md index 251b39609b..421a9e8ee6 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/README.md @@ -114,7 +114,7 @@ limitations under the License. | [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Enable the Shielded VM configuration. Note: the instance image must support option. | `bool` | `false` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See
https://cloud.google.com/compute/docs/gpus more details.
- type : the GPU type, e.g. nvidia-tesla-t4, nvidia-a100-80gb, nvidia-tesla-a100, etc
- count : number of GPUs

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
object({
type = string
count = number
})
| `null` | no | | [guest\_accelerator](#input\_guest\_accelerator) | Alternative method of providing 'var.gpu' with a consistent naming scheme to
other HPC Toolkit modules.

If both 'var.gpu' and 'var.guest\_accelerator' are set, 'var.gpu' will be used. |
list(object({
type = string,
count = number
}))
| `null` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm login node VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "projects/schedmd-slurm-public/global/images/family"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm login node VM instances. This
value is overridden if any of `source_image`, `source_image_family` or
`source_image_project` are set.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-5-7-hpc-centos-7",
"project": "schedmd-slurm-public"
}
| no | | [instance\_template](#input\_instance\_template) | Self link to a custom instance template. If set, other VM definition
variables such as machine\_type and instance\_image will be ignored in favor
of the provided instance template.

For more information on creating custom images for the instance template
that comply with Slurm on GCP see the "Slurm on GCP Custom Images" section
in docs/vm-images.md. | `string` | `null` | no | | [labels](#input\_labels) | Labels, provided as a map. | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Machine type to create. | `string` | `"n2-standard-2"` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf index a807147875..da2fb1b759 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/main.tf @@ -32,13 +32,6 @@ locals { enable_public_ip_access_config = var.disable_login_public_ips ? [] : [{ nat_ip = null, network_tier = null }] access_config = length(var.access_config) == 0 ? local.enable_public_ip_access_config : var.access_config - # Handle VM image format from 2 sources, prioritize source_image* variables - # over instance_image - source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" - source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") - source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") - source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") - additional_disks = [ for ad in var.additional_disks : { disk_name = ad.disk_name @@ -81,9 +74,9 @@ module "slurm_login_template" { region = var.region shielded_instance_config = var.shielded_instance_config slurm_instance_role = "login" - source_image_family = local.source_image_family - source_image_project = local.source_image_project - source_image = local.source_image + source_image_family = local.source_image_family # requires source_image_logic.tf + source_image_project = local.source_image_project_normalized # requires source_image_logic.tf + source_image = local.source_image # requires source_image_logic.tf network = var.network_self_link == null ? "" : var.network_self_link subnetwork_project = var.subnetwork_project == null ? "" : var.subnetwork_project subnetwork = var.subnetwork_self_link == null ? "" : var.subnetwork_self_link diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf new file mode 100644 index 0000000000..cb6932c358 --- /dev/null +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf @@ -0,0 +1,29 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + # Handle VM image format from 2 sources, prioritize source_image* variables + # over instance_image + source_image_input_used = var.source_image != "" || var.source_image_family != "" || var.source_image_project != "" + source_image = local.source_image_input_used ? var.source_image : lookup(var.instance_image, "name", "") + source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") + source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") + source_image_project_normalized = ( + local.source_image != "" || strcontains(local.source_image_project, "/") + ? local.source_image_project + : "projects/${local.source_image_project}/global/images/family" + ) +} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf index c571599f8b..44ed037c5c 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/variables.tf @@ -298,7 +298,7 @@ variable "instance_image" { type = map(string) default = { family = "slurm-gcp-5-7-hpc-centos-7" - project = "projects/schedmd-slurm-public/global/images/family" + project = "schedmd-slurm-public" } validation { diff --git a/examples/hpc-enterprise-slurm.yaml b/examples/hpc-enterprise-slurm.yaml index 09e112c06e..35ed922618 100644 --- a/examples/hpc-enterprise-slurm.yaml +++ b/examples/hpc-enterprise-slurm.yaml @@ -25,7 +25,7 @@ vars: # Visit https://github.com/SchedMD/slurm-gcp/blob/master/docs/images.md#published-image-family # for a list of valid family options with Slurm family: schedmd-v5-slurm-22-05-9-hpc-centos-7 - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public # Set to true for active cluster reconfiguration. # Note that setting this option requires additional dependencies to be installed locally. # https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/community/modules/scheduler/schedmd-slurm-gcp-v5-controller#description diff --git a/tools/duplicate-diff.py b/tools/duplicate-diff.py index a0730fcaef..068d2e476e 100644 --- a/tools/duplicate-diff.py +++ b/tools/duplicate-diff.py @@ -43,7 +43,12 @@ ], [ "community/modules/compute/gke-node-pool/threads_per_core_calc.tf", - "modules/compute/vm-instance/threads_per_core_calc.tf" + "modules/compute/vm-instance/threads_per_core_calc.tf", + ], + [ + "community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf", + "community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf", + "community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf", ], ] diff --git a/tools/validate_configs/test_configs/node-groups.yaml b/tools/validate_configs/test_configs/node-groups.yaml index 39101fae61..8f22e60acc 100644 --- a/tools/validate_configs/test_configs/node-groups.yaml +++ b/tools/validate_configs/test_configs/node-groups.yaml @@ -65,7 +65,7 @@ deployment_groups: machine_type: c2-standard-30 instance_image: family: slurm-gcp-5-7-debian-11 - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public - id: node_group2 source: community/modules/compute/schedmd-slurm-gcp-v5-node-group @@ -74,7 +74,7 @@ deployment_groups: machine_type: c2-standard-60 instance_image: name: slurm-gcp-dev-hpc-centos-7-1684970018 - project: projects/schedmd-slurm-public/global/images + project: schedmd-slurm-public - id: node_group3 source: community/modules/compute/schedmd-slurm-gcp-v5-node-group @@ -83,7 +83,7 @@ deployment_groups: machine_type: c2d-standard-112 instance_image: family: slurm-gcp-5-7-hpc-centos-7 - project: projects/schedmd-slurm-public/global/images/family + project: schedmd-slurm-public enable_smt: true - id: node_group4 From ff95d8c44304ff9a6efc7ecd2df3aba1def78f63 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 22 Jun 2023 10:49:35 -0500 Subject: [PATCH 68/92] Remove debug output from create command --- pkg/config/yaml.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/config/yaml.go b/pkg/config/yaml.go index b6f3fc6974..a6ba2335e5 100644 --- a/pkg/config/yaml.go +++ b/pkg/config/yaml.go @@ -105,7 +105,6 @@ func syntheticOutputsNode(name string, ln int, col int) *yaml.Node { // - mork # canonical path to "mork" value is `...outputs[1].name`, NOT `...outputs[1]` // ``` func normalizeYamlNode(p yPath, n *yaml.Node) *yaml.Node { - fmt.Printf("node: %#v, path: %#v", n, string(p)) switch { case n.Kind == yaml.ScalarNode && regexp.MustCompile(`^deployment_groups\[\d+\]\.modules\[\d+\]\.outputs\[\d+\]$`).MatchString(string(p)): return syntheticOutputsNode(n.Value, n.Line, n.Column) From 04c2787820da0161c38b0751f703a31a1f8b79fb Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 22 Jun 2023 10:56:37 -0700 Subject: [PATCH 69/92] Add custom unmarshaller for Module.Use to improve error message (#1473) * New error message: ``` failed to parse the blueprint in tst.yaml, check YAML syntax for errors, err=line 32: `use` must be a list of module ids ``` * Move YAML-related code `pkg/config` into `yaml*.go`. NOTE to reviewer: Most of the changed lines a just moved. The new code is in following functions: * `(ms *ModuleIDs) UnmarshalYAML`; * `TestModuleKindUnmarshalYAML`; * `TestModuleIDsUnmarshalYAML`. --- pkg/config/config.go | 27 ++----- pkg/config/config_test.go | 14 ++-- pkg/config/dict.go | 119 --------------------------- pkg/config/dict_test.go | 104 ------------------------ pkg/config/expand_test.go | 2 +- pkg/config/yaml.go | 145 +++++++++++++++++++++++++++++++++ pkg/config/yaml_test.go | 165 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 324 insertions(+), 252 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 29b530b97c..eec9ecf95a 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -178,24 +178,6 @@ var TerraformKind = ModuleKind{kind: "terraform"} // PackerKind is the kind for Packer modules (should be treated as const) var PackerKind = ModuleKind{kind: "packer"} -// UnmarshalYAML implements a custom unmarshaler from YAML string to ModuleKind -func (mk *ModuleKind) UnmarshalYAML(n *yaml.Node) error { - var kind string - const yamlErrorMsg string = "block beginning at line %d: %s" - - err := n.Decode(&kind) - if err == nil && IsValidModuleKind(kind) { - mk.kind = kind - return nil - } - return fmt.Errorf(yamlErrorMsg, n.Line, "kind must be \"packer\" or \"terraform\" or removed from YAML") -} - -// MarshalYAML implements a custom marshaler from ModuleKind to YAML string -func (mk ModuleKind) MarshalYAML() (interface{}, error) { - return mk.String(), nil -} - // IsValidModuleKind ensures that the user has specified a supported kind func IsValidModuleKind(kind string) bool { return kind == TerraformKind.String() || kind == PackerKind.String() || @@ -288,12 +270,15 @@ func (v *validatorConfig) check(name validatorName, requiredInputs []string) err // ModuleID is a unique identifier for a module in a blueprint type ModuleID string +// ModuleIDs is a list of ModuleID +type ModuleIDs []ModuleID + // Module stores YAML definition of an HPC cluster component defined in a blueprint type Module struct { Source string Kind ModuleKind ID ModuleID - Use []ModuleID `yaml:"use,omitempty"` + Use ModuleIDs `yaml:"use,omitempty"` Outputs []modulereader.OutputInfo `yaml:"outputs,omitempty"` Settings Dict `yaml:"settings,omitempty"` // DEPRECATED fields, keep in the struct for backwards compatibility @@ -357,7 +342,7 @@ func (bp *Blueprint) setGlobalLabels() { // listUnusedModules provides a list modules that are in the // "use" field, but not actually used. -func (m Module) listUnusedModules() []ModuleID { +func (m Module) listUnusedModules() ModuleIDs { used := map[ModuleID]bool{} // Recurse through objects/maps/lists checking each element for having `ProductOfModuleUse` mark. cty.Walk(m.Settings.AsObject(), func(p cty.Path, v cty.Value) (bool, error) { @@ -367,7 +352,7 @@ func (m Module) listUnusedModules() []ModuleID { return true, nil }) - unused := []ModuleID{} + unused := ModuleIDs{} for _, w := range m.Use { if !used[w] { unused = append(unused, w) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 179f5a31d5..2b987e9061 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -316,7 +316,7 @@ func getMultiGroupDeploymentConfig() DeploymentConfig { matchingIntragroupName1: cty.StringVal("explicit-intra-value"), matchingIntragroupName2: ModuleRef(mod0.ID, matchingIntragroupName2).AsExpression().AsValue(), }), - Use: []ModuleID{mod0.ID}, + Use: ModuleIDs{mod0.ID}, } setTestModuleInfo(mod1, testModuleInfo1) @@ -329,7 +329,7 @@ func getMultiGroupDeploymentConfig() DeploymentConfig { ID: "TestModule2", Kind: TerraformKind, Source: testModuleSource2, - Use: []ModuleID{mod0.ID}, + Use: ModuleIDs{mod0.ID}, } setTestModuleInfo(mod2, testModuleInfo2) @@ -419,25 +419,25 @@ func (s *MySuite) TestCheckModulesAndGroups(c *C) { func (s *MySuite) TestListUnusedModules(c *C) { { // No modules in "use" m := Module{ID: "m"} - c.Check(m.listUnusedModules(), DeepEquals, []ModuleID{}) + c.Check(m.listUnusedModules(), DeepEquals, ModuleIDs{}) } { // Useful m := Module{ ID: "m", - Use: []ModuleID{"w"}, + Use: ModuleIDs{"w"}, Settings: NewDict(map[string]cty.Value{ "x": AsProductOfModuleUse(cty.True, "w")})} - c.Check(m.listUnusedModules(), DeepEquals, []ModuleID{}) + c.Check(m.listUnusedModules(), DeepEquals, ModuleIDs{}) } { // Unused m := Module{ ID: "m", - Use: []ModuleID{"w", "u"}, + Use: ModuleIDs{"w", "u"}, Settings: NewDict(map[string]cty.Value{ "x": AsProductOfModuleUse(cty.True, "w")})} - c.Check(m.listUnusedModules(), DeepEquals, []ModuleID{"u"}) + c.Check(m.listUnusedModules(), DeepEquals, ModuleIDs{"u"}) } } diff --git a/pkg/config/dict.go b/pkg/config/dict.go index 62bbf3b619..8e789488a8 100644 --- a/pkg/config/dict.go +++ b/pkg/config/dict.go @@ -15,13 +15,9 @@ package config import ( - "encoding/json" "fmt" "github.com/zclconf/go-cty/cty" - "github.com/zclconf/go-cty/cty/gocty" - ctyJson "github.com/zclconf/go-cty/cty/json" - "gopkg.in/yaml.v3" ) // Dict maps string key to cty.Value. @@ -91,121 +87,6 @@ func (d Dict) IsZero() bool { return len(d.m) == 0 } -// YamlValue is wrapper around cty.Value to handle YAML unmarshal. -type YamlValue struct { - v cty.Value -} - -// Unwrap returns wrapped cty.Value. -func (y YamlValue) Unwrap() cty.Value { - return y.v -} - -// UnmarshalYAML implements custom YAML unmarshaling. -func (y *YamlValue) UnmarshalYAML(n *yaml.Node) error { - var err error - switch n.Kind { - case yaml.ScalarNode: - err = y.unmarshalScalar(n) - case yaml.MappingNode: - err = y.unmarshalObject(n) - case yaml.SequenceNode: - err = y.unmarshalTuple(n) - default: - err = fmt.Errorf("line %d: cannot decode node with unknown kind %d", n.Line, n.Kind) - } - return err -} - -func (y *YamlValue) unmarshalScalar(n *yaml.Node) error { - var s interface{} - if err := n.Decode(&s); err != nil { - return err - } - ty, err := gocty.ImpliedType(s) - if err != nil { - return err - } - if y.v, err = gocty.ToCtyValue(s, ty); err != nil { - return err - } - - if l, is := IsYamlExpressionLiteral(y.v); is { // HCL literal - var e Expression - if e, err = ParseExpression(l); err != nil { - return err - } - y.v = e.AsValue() - } else if y.v.Type() == cty.String && hasVariable(y.v.AsString()) { // "simple" variable - e, err := SimpleVarToExpression(y.v.AsString()) - if err != nil { - return err - } - y.v = e.AsValue() - } - return nil -} - -func (y *YamlValue) unmarshalObject(n *yaml.Node) error { - var my map[string]YamlValue - if err := n.Decode(&my); err != nil { - return err - } - mv := map[string]cty.Value{} - for k, y := range my { - mv[k] = y.v - } - y.v = cty.ObjectVal(mv) - return nil -} - -func (y *YamlValue) unmarshalTuple(n *yaml.Node) error { - var ly []YamlValue - if err := n.Decode(&ly); err != nil { - return err - } - lv := []cty.Value{} - for _, y := range ly { - lv = append(lv, y.v) - } - y.v = cty.TupleVal(lv) - return nil -} - -// UnmarshalYAML implements custom YAML unmarshaling. -func (d *Dict) UnmarshalYAML(n *yaml.Node) error { - var m map[string]YamlValue - if err := n.Decode(&m); err != nil { - return err - } - for k, y := range m { - d.Set(k, y.v) - } - return nil -} - -// MarshalYAML implements custom YAML marshaling. -func (d Dict) MarshalYAML() (interface{}, error) { - o, _ := cty.Transform(d.AsObject(), func(p cty.Path, v cty.Value) (cty.Value, error) { - if e, is := IsExpressionValue(v); is { - return e.makeYamlExpressionValue(), nil - } - return v, nil - }) - - j := ctyJson.SimpleJSONValue{Value: o} - b, err := j.MarshalJSON() - if err != nil { - return nil, fmt.Errorf("failed to marshal JSON: %v", err) - } - var g interface{} - err = json.Unmarshal(b, &g) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal JSON: %v", err) - } - return g, nil -} - // Eval returns a copy of this Dict, where all Expressions // are evaluated and replaced by result of evaluation. func (d Dict) Eval(bp Blueprint) (Dict, error) { diff --git a/pkg/config/dict_test.go b/pkg/config/dict_test.go index 5405c9f382..9626eea62e 100644 --- a/pkg/config/dict_test.go +++ b/pkg/config/dict_test.go @@ -20,7 +20,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/zclconf/go-cty-debug/ctydebug" "github.com/zclconf/go-cty/cty" - "gopkg.in/yaml.v3" ) func TestZeroValueValid(t *testing.T) { @@ -82,109 +81,6 @@ func TestItemsAreCopy(t *testing.T) { } } -func TestYAMLDecode(t *testing.T) { - yml := ` -s1: "red" -s2: pink -m1: {} -m2: - m2f1: green - m2f2: [1, 0.2, -3, false] - gv: $(vars.gold) - mv: $(lime.bloom) - hl: ((3 + 9)) -` - want := Dict{} - want. - Set("s1", cty.StringVal("red")). - Set("s2", cty.StringVal("pink")). - Set("m1", cty.EmptyObjectVal). - Set("m2", cty.ObjectVal(map[string]cty.Value{ - "m2f1": cty.StringVal("green"), - "m2f2": cty.TupleVal([]cty.Value{ - cty.NumberIntVal(1), - cty.NumberFloatVal(0.2), - cty.NumberIntVal(-3), - cty.False, - }), - "gv": MustParseExpression("var.gold").AsValue(), - "mv": MustParseExpression("module.lime.bloom").AsValue(), - "hl": MustParseExpression("3 + 9").AsValue(), - })) - var got Dict - if err := yaml.Unmarshal([]byte(yml), &got); err != nil { - t.Fatalf("failed to decode: %v", err) - } - if diff := cmp.Diff(want.Items(), got.Items(), ctydebug.CmpOptions); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - -func TestMarshalYAML(t *testing.T) { - d := Dict{} - d. - Set("s1", cty.StringVal("red")). - Set("m1", cty.EmptyObjectVal). - Set("m2", cty.ObjectVal(map[string]cty.Value{ - "m2f1": cty.StringVal("green"), - "m2f2": cty.TupleVal([]cty.Value{ - cty.NumberIntVal(1), - cty.NumberFloatVal(0.2), - cty.NumberIntVal(-3), - cty.False, - MustParseExpression("7 + 4").AsValue(), - }), - })) - want := map[string]interface{}{ - "s1": "red", - "m1": map[string]interface{}{}, - "m2": map[string]interface{}{ - "m2f1": "green", - "m2f2": []interface{}{1.0, 0.2, -3.0, false, "((7 + 4))"}, - }, - } - got, err := d.MarshalYAML() - if err != nil { - t.Fatalf("failed to marshal: %v", err) - } - if diff := cmp.Diff(want, got); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - -func TestYAMLMarshalIntAsInt(t *testing.T) { - d := Dict{} - d.Set("zebra", cty.NumberIntVal(5)) - want := "zebra: 5\n" - got, err := yaml.Marshal(d) - if err != nil { - t.Fatalf("failed to marshal: %v", err) - } - if diff := cmp.Diff(want, string(got)); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - -func TestYAMLDecodeWithAlias(t *testing.T) { - yml := ` -pony: &passtime -- eat -- sleep -zebra: *passtime -` - want := Dict{} - want. - Set("pony", cty.TupleVal([]cty.Value{cty.StringVal("eat"), cty.StringVal("sleep")})). - Set("zebra", cty.TupleVal([]cty.Value{cty.StringVal("eat"), cty.StringVal("sleep")})) - var got Dict - if err := yaml.Unmarshal([]byte(yml), &got); err != nil { - t.Fatalf("failed to decode: %v", err) - } - if diff := cmp.Diff(want.Items(), got.Items(), ctydebug.CmpOptions); diff != "" { - t.Errorf("diff (-want +got):\n%s", diff) - } -} - func TestEval(t *testing.T) { bp := Blueprint{ Vars: NewDict(map[string]cty.Value{ diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 13341497a2..09f908130f 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -211,7 +211,7 @@ func (s *MySuite) TestApplyUseModules(c *C) { using := Module{ ID: "usingModule", Source: "path/using", - Use: []ModuleID{"usedModule"}, + Use: ModuleIDs{"usedModule"}, } used := Module{ID: "usedModule", Source: "path/used"} diff --git a/pkg/config/yaml.go b/pkg/config/yaml.go index a6ba2335e5..b2656aaddc 100644 --- a/pkg/config/yaml.go +++ b/pkg/config/yaml.go @@ -17,10 +17,14 @@ package config import ( "bufio" "bytes" + "encoding/json" "fmt" "os" "regexp" + "github.com/zclconf/go-cty/cty" + "github.com/zclconf/go-cty/cty/gocty" + ctyJson "github.com/zclconf/go-cty/cty/json" "gopkg.in/yaml.v3" ) @@ -156,3 +160,144 @@ func (c *nodeCapturer) UnmarshalYAML(n *yaml.Node) error { c.n = n return nil } + +// UnmarshalYAML implements a custom unmarshaler from YAML string to ModuleKind +func (mk *ModuleKind) UnmarshalYAML(n *yaml.Node) error { + var kind string + err := n.Decode(&kind) + if err == nil && IsValidModuleKind(kind) { + mk.kind = kind + return nil + } + return fmt.Errorf("line %d: kind must be \"packer\" or \"terraform\" or removed from YAML", n.Line) +} + +// MarshalYAML implements a custom marshaler from ModuleKind to YAML string +func (mk ModuleKind) MarshalYAML() (interface{}, error) { + return mk.String(), nil +} + +// UnmarshalYAML is a custom unmarshaler for Module.Use, that will print nice error message. +func (ms *ModuleIDs) UnmarshalYAML(n *yaml.Node) error { + var ids []ModuleID + if err := n.Decode(&ids); err != nil { + return fmt.Errorf("line %d: `use` must be a list of module ids", n.Line) + } + *ms = ids + return nil +} + +// YamlValue is wrapper around cty.Value to handle YAML unmarshal. +type YamlValue struct { + v cty.Value +} + +// Unwrap returns wrapped cty.Value. +func (y YamlValue) Unwrap() cty.Value { + return y.v +} + +// UnmarshalYAML implements custom YAML unmarshaling. +func (y *YamlValue) UnmarshalYAML(n *yaml.Node) error { + var err error + switch n.Kind { + case yaml.ScalarNode: + err = y.unmarshalScalar(n) + case yaml.MappingNode: + err = y.unmarshalObject(n) + case yaml.SequenceNode: + err = y.unmarshalTuple(n) + default: + err = fmt.Errorf("line %d: cannot decode node with unknown kind %d", n.Line, n.Kind) + } + return err +} + +func (y *YamlValue) unmarshalScalar(n *yaml.Node) error { + var s interface{} + if err := n.Decode(&s); err != nil { + return err + } + ty, err := gocty.ImpliedType(s) + if err != nil { + return err + } + if y.v, err = gocty.ToCtyValue(s, ty); err != nil { + return err + } + + if l, is := IsYamlExpressionLiteral(y.v); is { // HCL literal + var e Expression + if e, err = ParseExpression(l); err != nil { + return err + } + y.v = e.AsValue() + } else if y.v.Type() == cty.String && hasVariable(y.v.AsString()) { // "simple" variable + e, err := SimpleVarToExpression(y.v.AsString()) + if err != nil { + return err + } + y.v = e.AsValue() + } + return nil +} + +func (y *YamlValue) unmarshalObject(n *yaml.Node) error { + var my map[string]YamlValue + if err := n.Decode(&my); err != nil { + return err + } + mv := map[string]cty.Value{} + for k, y := range my { + mv[k] = y.v + } + y.v = cty.ObjectVal(mv) + return nil +} + +func (y *YamlValue) unmarshalTuple(n *yaml.Node) error { + var ly []YamlValue + if err := n.Decode(&ly); err != nil { + return err + } + lv := []cty.Value{} + for _, y := range ly { + lv = append(lv, y.v) + } + y.v = cty.TupleVal(lv) + return nil +} + +// UnmarshalYAML implements custom YAML unmarshaling. +func (d *Dict) UnmarshalYAML(n *yaml.Node) error { + var m map[string]YamlValue + if err := n.Decode(&m); err != nil { + return err + } + for k, y := range m { + d.Set(k, y.v) + } + return nil +} + +// MarshalYAML implements custom YAML marshaling. +func (d Dict) MarshalYAML() (interface{}, error) { + o, _ := cty.Transform(d.AsObject(), func(p cty.Path, v cty.Value) (cty.Value, error) { + if e, is := IsExpressionValue(v); is { + return e.makeYamlExpressionValue(), nil + } + return v, nil + }) + + j := ctyJson.SimpleJSONValue{Value: o} + b, err := j.MarshalJSON() + if err != nil { + return nil, fmt.Errorf("failed to marshal JSON: %v", err) + } + var g interface{} + err = json.Unmarshal(b, &g) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %v", err) + } + return g, nil +} diff --git a/pkg/config/yaml_test.go b/pkg/config/yaml_test.go index 5d344d38f4..22ca7bbb9e 100644 --- a/pkg/config/yaml_test.go +++ b/pkg/config/yaml_test.go @@ -19,6 +19,8 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/zclconf/go-cty-debug/ctydebug" + "github.com/zclconf/go-cty/cty" "gopkg.in/yaml.v3" ) @@ -150,3 +152,166 @@ terraform_backend_defaults: }) } } + +func TestModuleKindUnmarshalYAML(t *testing.T) { + type test struct { + input string + want ModuleKind + err bool + } + tests := []test{ + {"", UnknownKind, false}, + {"terraform", TerraformKind, false}, + {"packer", PackerKind, false}, + + {"unknown", ModuleKind{}, true}, + {"[]", ModuleKind{}, true}, + {"{]", ModuleKind{}, true}, + } + for _, tc := range tests { + t.Run(tc.input, func(t *testing.T) { + var got ModuleKind + err := yaml.Unmarshal([]byte(tc.input), &got) + if tc.err != (err != nil) { + t.Fatalf("got unexpected error: %s", err) + } + + if tc.want != got { + t.Errorf("want:%#v:\ngot%#v", tc.want, got) + } + }) + } +} + +func TestModuleIDsUnmarshalYAML(t *testing.T) { + type test struct { + input string + want ModuleIDs + err bool + } + tests := []test{ + {"[green, red]", ModuleIDs{"green", "red"}, false}, + {"[]", ModuleIDs{}, false}, + + {"green", nil, true}, + {"44", nil, true}, + {"{}", nil, true}, + {"[[]]", nil, true}, + } + for _, tc := range tests { + t.Run(tc.input, func(t *testing.T) { + var got ModuleIDs + err := yaml.Unmarshal([]byte(tc.input), &got) + if tc.err != (err != nil) { + t.Fatalf("got unexpected error: %s", err) + } + + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } + }) + } +} + +func TestDictUnmarshalYAML(t *testing.T) { + yml := ` +s1: "red" +s2: pink +m1: {} +m2: + m2f1: green + m2f2: [1, 0.2, -3, false] + gv: $(vars.gold) + mv: $(lime.bloom) + hl: ((3 + 9)) +` + want := Dict{} + want. + Set("s1", cty.StringVal("red")). + Set("s2", cty.StringVal("pink")). + Set("m1", cty.EmptyObjectVal). + Set("m2", cty.ObjectVal(map[string]cty.Value{ + "m2f1": cty.StringVal("green"), + "m2f2": cty.TupleVal([]cty.Value{ + cty.NumberIntVal(1), + cty.NumberFloatVal(0.2), + cty.NumberIntVal(-3), + cty.False, + }), + "gv": MustParseExpression("var.gold").AsValue(), + "mv": MustParseExpression("module.lime.bloom").AsValue(), + "hl": MustParseExpression("3 + 9").AsValue(), + })) + var got Dict + if err := yaml.Unmarshal([]byte(yml), &got); err != nil { + t.Fatalf("failed to decode: %v", err) + } + if diff := cmp.Diff(want.Items(), got.Items(), ctydebug.CmpOptions); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestDictMarshalYAML(t *testing.T) { + d := Dict{} + d. + Set("s1", cty.StringVal("red")). + Set("m1", cty.EmptyObjectVal). + Set("m2", cty.ObjectVal(map[string]cty.Value{ + "m2f1": cty.StringVal("green"), + "m2f2": cty.TupleVal([]cty.Value{ + cty.NumberIntVal(1), + cty.NumberFloatVal(0.2), + cty.NumberIntVal(-3), + cty.False, + MustParseExpression("7 + 4").AsValue(), + }), + })) + want := map[string]interface{}{ + "s1": "red", + "m1": map[string]interface{}{}, + "m2": map[string]interface{}{ + "m2f1": "green", + "m2f2": []interface{}{1.0, 0.2, -3.0, false, "((7 + 4))"}, + }, + } + got, err := d.MarshalYAML() + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestYAMLValueMarshalIntAsInt(t *testing.T) { + d := Dict{} + d.Set("zebra", cty.NumberIntVal(5)) + want := "zebra: 5\n" + got, err := yaml.Marshal(d) + if err != nil { + t.Fatalf("failed to marshal: %v", err) + } + if diff := cmp.Diff(want, string(got)); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestYAMLValueUnmarshalWithAlias(t *testing.T) { + yml := ` +pony: &passtime +- eat +- sleep +zebra: *passtime +` + want := Dict{} + want. + Set("pony", cty.TupleVal([]cty.Value{cty.StringVal("eat"), cty.StringVal("sleep")})). + Set("zebra", cty.TupleVal([]cty.Value{cty.StringVal("eat"), cty.StringVal("sleep")})) + var got Dict + if err := yaml.Unmarshal([]byte(yml), &got); err != nil { + t.Fatalf("failed to decode: %v", err) + } + if diff := cmp.Diff(want.Items(), got.Items(), ctydebug.CmpOptions); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} From a84ce30763ffd83e3715480a92c5dc7a1111e801 Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Thu, 22 Jun 2023 11:45:46 -0700 Subject: [PATCH 70/92] Use `regexall` instead of `strcontains` to stay compatible with terraform 1.2.x (#1493) --- .../schedmd-slurm-gcp-v5-node-group/source_image_logic.tf | 2 +- .../schedmd-slurm-gcp-v5-controller/source_image_logic.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf index cb6932c358..8e5a1ea5ec 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/source_image_logic.tf @@ -22,7 +22,7 @@ locals { source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") source_image_project_normalized = ( - local.source_image != "" || strcontains(local.source_image_project, "/") + local.source_image != "" || length(regexall("/", local.source_image_project)) > 0 ? local.source_image_project : "projects/${local.source_image_project}/global/images/family" ) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf index cb6932c358..8e5a1ea5ec 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf @@ -22,7 +22,7 @@ locals { source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") source_image_project_normalized = ( - local.source_image != "" || strcontains(local.source_image_project, "/") + local.source_image != "" || length(regexall("/", local.source_image_project)) > 0 ? local.source_image_project : "projects/${local.source_image_project}/global/images/family" ) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf index cb6932c358..8e5a1ea5ec 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf @@ -22,7 +22,7 @@ locals { source_image_family = local.source_image_input_used ? var.source_image_family : lookup(var.instance_image, "family", "") source_image_project = local.source_image_input_used ? var.source_image_project : lookup(var.instance_image, "project", "") source_image_project_normalized = ( - local.source_image != "" || strcontains(local.source_image_project, "/") + local.source_image != "" || length(regexall("/", local.source_image_project)) > 0 ? local.source_image_project : "projects/${local.source_image_project}/global/images/family" ) From a0d0de1b05f5b4cd49746ebfab174159044ee5cd Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 23 Jun 2023 09:35:49 -0700 Subject: [PATCH 71/92] Add `automatic_restart` to `vm-instance` (#1288) Keep current logic around `compact_placement || spot`, but treat it as a default value, respecting user choise if `automatic_restart` was set explicitly. https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#automatic_restart https://cloud.google.com/compute/docs/instances/setting-vm-host-options#settingoptions --- modules/compute/vm-instance/README.md | 1 + modules/compute/vm-instance/main.tf | 8 +++++++- modules/compute/vm-instance/variables.tf | 6 ++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index da47291117..3df4b8303b 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -175,6 +175,7 @@ limitations under the License. |------|-------------|------|---------|:--------:| | [add\_deployment\_name\_before\_prefix](#input\_add\_deployment\_name\_before\_prefix) | If true, the names of VMs and disks will always be prefixed with `deployment_name` to enable uniqueness across deployments.
See `name_prefix` for further details on resource naming behavior. | `bool` | `false` | no | | [auto\_delete\_boot\_disk](#input\_auto\_delete\_boot\_disk) | Controls if boot disk should be auto-deleted when instance is deleted. | `bool` | `true` | no | +| [automatic\_restart](#input\_automatic\_restart) | Specifies if the instance should be restarted if it was terminated by Compute Engine (not a user). | `bool` | `null` | no | | [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image that supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no | | [deployment\_name](#input\_deployment\_name) | Name of the deployment, will optionally be used name resources according to `name_prefix` | `string` | n/a | yes | | [disable\_public\_ips](#input\_disable\_public\_ips) | If set to true, instances will not have public IPs | `bool` | `false` | no | diff --git a/modules/compute/vm-instance/main.tf b/modules/compute/vm-instance/main.tf index 73a80351cb..18d34ed4f2 100644 --- a/modules/compute/vm-instance/main.tf +++ b/modules/compute/vm-instance/main.tf @@ -43,9 +43,15 @@ locals { # both of these must be false if either compact placement or preemptible/spot instances are used # automatic restart is tolerant of GPUs while on host maintenance is not - automatic_restart = local.compact_placement || var.spot ? false : null + automatic_restart_default = local.compact_placement || var.spot ? false : null on_host_maintenance_default = local.compact_placement || var.spot || local.gpu_attached ? "TERMINATE" : "MIGRATE" + automatic_restart = ( + var.automatic_restart != null + ? var.automatic_restart + : local.automatic_restart_default + ) + on_host_maintenance = ( var.on_host_maintenance != null ? var.on_host_maintenance diff --git a/modules/compute/vm-instance/variables.tf b/modules/compute/vm-instance/variables.tf index 8632a1c260..e513dfcf70 100644 --- a/modules/compute/vm-instance/variables.tf +++ b/modules/compute/vm-instance/variables.tf @@ -245,6 +245,12 @@ variable "guest_accelerator" { default = null } +variable "automatic_restart" { + description = "Specifies if the instance should be restarted if it was terminated by Compute Engine (not a user)." + type = bool + default = null +} + variable "on_host_maintenance" { description = "Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE`" type = string From 33eeaf5d0cfceea0258225b653f6f4c708db4b96 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 23 Jun 2023 11:06:55 -0700 Subject: [PATCH 72/92] Add `pipefail` to Makefile to preven swallowing failed tests (#1498) --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 2b19fde9c5..51079dad71 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ MIN_GOLANG_VERSION=1.18 # for building ghpc terraform-format packer-format \ check-tflint check-pre-commit +SHELL=/bin/bash -o pipefail ENG = ./cmd/... ./pkg/... TERRAFORM_FOLDERS=$(shell find ./modules ./community/modules ./tools -type f -name "*.tf" -not -path '*/\.*' -exec dirname "{}" \; | sort -u) PACKER_FOLDERS=$(shell find ./modules ./community/modules ./tools -type f -name "*.pkr.hcl" -not -path '*/\.*' -exec dirname "{}" \; | sort -u) From 7a3b1a45f113ea6670970607690b9af598b9b380 Mon Sep 17 00:00:00 2001 From: wenyhu-google <127439859+wenyhu-google@users.noreply.github.com> Date: Tue, 20 Jun 2023 10:22:10 -0700 Subject: [PATCH 73/92] Use Batch HPC CentOS images as default image --- modules/scheduler/batch-job-template/README.md | 2 +- modules/scheduler/batch-job-template/variables.tf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/scheduler/batch-job-template/README.md b/modules/scheduler/batch-job-template/README.md index 06459e8f6e..b4ab8a7939 100644 --- a/modules/scheduler/batch-job-template/README.md +++ b/modules/scheduler/batch-job-template/README.md @@ -150,7 +150,7 @@ limitations under the License. | [deployment\_name](#input\_deployment\_name) | Name of the deployment, used for the job\_id | `string` | n/a | yes | | [enable\_public\_ips](#input\_enable\_public\_ips) | If set to true, instances will have public IPs | `bool` | `true` | no | | [gcloud\_version](#input\_gcloud\_version) | The version of the gcloud cli being used. Used for output instructions. Valid inputs are `"alpha"`, `"beta"` and "" (empty string for default version) | `string` | `"alpha"` | no | -| [image](#input\_image) | Google Cloud Batch compute node image. Ignored if `instance_template` is provided. |
object({
family = string
project = string
})
|
{
"family": "hpc-centos-7",
"project": "cloud-hpc-image-public"
}
| no | +| [image](#input\_image) | Google Cloud Batch compute node image. Ignored if `instance_template` is provided. |
object({
family = string
project = string
})
|
{
"family": "batch-hpc-centos-7-official",
"project": "batch-custom-image"
}
| no | | [instance\_template](#input\_instance\_template) | Compute VM instance template self-link to be used for Google Cloud Batch compute node. If provided, a number of other variables will be ignored as noted by `Ignored if instance_template is provided` in descriptions. | `string` | `null` | no | | [job\_filename](#input\_job\_filename) | The filename of the generated job template file. Will default to `cloud-batch-.json` if not specified | `string` | `null` | no | | [job\_id](#input\_job\_id) | An id for the Google Cloud Batch job. Used for output instructions and file naming. Defaults to deployment name. | `string` | `null` | no | diff --git a/modules/scheduler/batch-job-template/variables.tf b/modules/scheduler/batch-job-template/variables.tf index aa0b053112..19f8426030 100644 --- a/modules/scheduler/batch-job-template/variables.tf +++ b/modules/scheduler/batch-job-template/variables.tf @@ -173,8 +173,8 @@ variable "image" { project = string }) default = { - family = "hpc-centos-7" - project = "cloud-hpc-image-public" + family = "batch-hpc-centos-7-official" + project = "batch-custom-image" } } From fd64928027be642c54d1e7037933723afc8303cb Mon Sep 17 00:00:00 2001 From: Mark Olson <115657904+mark-olson@users.noreply.github.com> Date: Fri, 23 Jun 2023 15:43:37 -0700 Subject: [PATCH 74/92] Update google-cloud-daos version from v0.4.0 to v0.4.1 in community examples google-cloud-daos v0.4.1 was released to fix 2 bugs. See https://github.com/daos-stack/google-cloud-daos/releases/tag/v0.4.1 This change updates the version number referenced in the community/examples/intel/*-daos.yaml blueprints. Made a few minor updates in the community/examples/intel/README.md Signed-off-by: Mark Olson <115657904+mark-olson@users.noreply.github.com> --- community/examples/intel/README.md | 73 ++++++++------------ community/examples/intel/hpc-slurm-daos.yaml | 6 +- community/examples/intel/pfs-daos.yaml | 16 ++--- 3 files changed, 38 insertions(+), 57 deletions(-) diff --git a/community/examples/intel/README.md b/community/examples/intel/README.md index 825dba5410..128f797ad3 100644 --- a/community/examples/intel/README.md +++ b/community/examples/intel/README.md @@ -185,42 +185,28 @@ terraform -chdir=hpc-intel-select/primary destroy ## DAOS Cluster The [pfs-daos.yaml](pfs-daos.yaml) blueprint describes an environment with -- A [managed instance group][mig] with four DAOS server instances -- A [managed instance group][mig] with two DAOS client instances +- Two DAOS server instances +- Two DAOS client instances -For more information, please refer to the [Google Cloud DAOS repo on GitHub][google-cloud-daos]. - -> **_NOTE:_** The [pre-deployment steps in the google-cloud-daos/README.md][pre-deployment] must be completed prior to running this HPC Toolkit example. - -[mig]: https://cloud.google.com/compute/docs/instance-groups -[google-cloud-daos]: https://github.com/daos-stack/google-cloud-daos -[pre-deployment]: https://github.com/daos-stack/google-cloud-daos#pre-deployment-steps +The [pfs-daos.yaml](pfs-daos.yaml) blueprint uses a Packer template and Terraform modules from the [Google Cloud DAOS][google-cloud-daos] repository. Identify a project to work in and substitute its unique id wherever you see `<>` in the instructions below. ### Initial Setup for DAOS Cluster -Before provisioning any infrastructure in this project you should follow the -Toolkit guidance to enable [APIs][apis] and establish minimum resource -[quotas][quotas]. In particular, the following APIs should be enabled - -- [compute.googleapis.com](https://cloud.google.com/compute/docs/reference/rest/v1#service:-compute.googleapis.com) (Google Compute Engine) -- [secretmanager.googleapis.com](https://cloud.google.com/secret-manager/docs/reference/rest#service:-secretmanager.googleapis.com) (Secret manager, for secure mode) +Before provisioning the DAOS cluster you must follow the steps listed in the [Google Cloud DAOS Pre-deployment Guide][pre-deployment_guide]. -[apis]: ../../../README.md#enable-gcp-apis -[quotas]: ../../../README.md#gcp-quotas +Skip the "Build DAOS Images" step at the end of the [Pre-deployment Guide][pre-deployment_guide]. The [pfs-daos.yaml](pfs-daos.yaml) blueprint will build the images as part of the deployment. -The following available quota is required in the region used by the cluster: +The Pre-deployment Guide provides instructions for enabling service accounts, APIs, establishing minimum resource quotas and other necessary steps to prepare your project. -- C2 CPUs: 32 (16 per client node) -- N2 CPUs: 144 (36 per server node) -- PD-SSD: 120GB (20GB per client and server) -- Local SSD: 4 \* 16 \* 375 = 24,000GB (6TB per server) +[google-cloud-daos]: https://github.com/daos-stack/google-cloud-daos +[pre-deployment_guide]: https://github.com/daos-stack/google-cloud-daos/blob/main/docs/pre-deployment_guide.md ### Deploy the DAOS Cluster -Use `ghpc` to provision the blueprint +After completing the steps in the [Pre-deployment Guide][pre-deployment_guide] use `ghpc` to provision the blueprint ```text ghpc create community/examples/intel/pfs-daos.yaml \ @@ -363,9 +349,8 @@ The `cont1` container is now mounted on `${HOME}/daos/cont1` Create a 20GiB file which will be stored in the DAOS filesystem. ```bash -pushd ${HOME}/daos/cont1 time LD_PRELOAD=/usr/lib64/libioil.so \ -dd if=/dev/zero of=./test20GiB.img iflag=fullblock bs=1G count=20 +dd if=/dev/zero of="${HOME}/daos/cont1/test20GiB.img" iflag=fullblock bs=1G count=20 ``` See the [File System](https://docs.daos.io/v2.2/user/filesystem/) section of the DAOS User Guide for more information about DFuse. @@ -406,39 +391,35 @@ The blueprint uses modules from - [community/modules/scheduler/SchedMD-slurm-on-gcp-login-node][SchedMD-slurm-on-gcp-login-node] - [community/modules/compute/SchedMD-slurm-on-gcp-partition][SchedMD-slurm-on-gcp-partition] -> **_NOTE:_** The [pre-deployment steps in the google-cloud-daos/README.md][pre-deployment] must be completed prior to running this HPC Toolkit example. - -[mig]: https://cloud.google.com/compute/docs/instance-groups -[google-cloud-daos]: https://github.com/daos-stack/google-cloud-daos -[pre-deployment]: https://github.com/daos-stack/google-cloud-daos#pre-deployment-steps -[apis]: ../../../README.md#enable-gcp-apis -[SchedMD-slurm-on-gcp-controller]: ../../modules/scheduler/SchedMD-slurm-on-gcp-controller -[SchedMD-slurm-on-gcp-login-node]: ../../modules/scheduler/SchedMD-slurm-on-gcp-login-node -[SchedMD-slurm-on-gcp-partition]: ../../modules/compute/SchedMD-slurm-on-gcp-partition +The blueprint also uses a Packer template from the [Google Cloud DAOS][google-cloud-daos] repository. Identify a project to work in and substitute its unique id wherever you see `<>` in the instructions below. ### Initial Setup for the DAOS/Slurm cluster -Before provisioning any infrastructure in this project you should follow the -Toolkit guidance to enable [APIs][apis] and establish minimum resource -[quotas][quotas]. In particular, the following APIs should be enabled +Before provisioning the DAOS cluster you must follow the steps listed in the [Google Cloud DAOS Pre-deployment Guide][pre-deployment_guide]. -- [compute.googleapis.com](https://cloud.google.com/compute/docs/reference/rest/v1#service:-compute.googleapis.com) (Google Compute Engine) -- [secretmanager.googleapis.com](https://cloud.google.com/secret-manager/docs/reference/rest#service:-secretmanager.googleapis.com) (Secret manager, for secure mode) +Skip the "Build DAOS Images" step at the end of the [Pre-deployment Guide][pre-deployment_guide]. The [hpc-slurm-daos.yaml](hpc-slurm-daos.yaml) blueprint will build the DAOS server image as part of the deployment. + +The Pre-deployment Guide provides instructions for enabling service accounts, APIs, establishing minimum resource quotas and other necessary steps to prepare your project for DAOS server deployment. + +[google-cloud-daos]: https://github.com/daos-stack/google-cloud-daos +[pre-deployment_guide]: https://github.com/daos-stack/google-cloud-daos/blob/main/docs/pre-deployment_guide.md +[packer-template]: https://github.com/daos-stack/google-cloud-daos/blob/main/images/daos.pkr.hcl [apis]: ../../../README.md#enable-gcp-apis -[quotas]: ../../../README.md#gcp-quotas +[SchedMD-slurm-on-gcp-controller]: ../../modules/scheduler/SchedMD-slurm-on-gcp-controller +[SchedMD-slurm-on-gcp-login-node]: ../../modules/scheduler/SchedMD-slurm-on-gcp-login-node +[SchedMD-slurm-on-gcp-partition]: ../../modules/compute/SchedMD-slurm-on-gcp-partition -And the following available quota is required in the region used by the cluster: +Follow the Toolkit guidance to enable [APIs][apis] and establish minimum resource [quotas][quotas] for Slurm. + +[apis]: ../../../README.md#enable-gcp-apis +[quotas]: ../../../README.md#gcp-quotas -For DAOS: -- N2 CPUs: 64 (16 per server node) -- PD-SSD: 80GB (20GB per server) -- Local SSD: 4 \* 4 \* 375 = 6,000GB (1.5TB per server) +The following available quota is required in the region used by Slurm: -For Slurm: - Filestore: 2560GB - C2 CPUs: 6000 (fully-scaled "compute" partition) - This quota is not necessary at initial deployment, but will be required to diff --git a/community/examples/intel/hpc-slurm-daos.yaml b/community/examples/intel/hpc-slurm-daos.yaml index 413178bb91..46b4f02dd7 100644 --- a/community/examples/intel/hpc-slurm-daos.yaml +++ b/community/examples/intel/hpc-slurm-daos.yaml @@ -43,9 +43,9 @@ deployment_groups: - group: daos-server-image modules: - # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/images + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/images - id: daos-server-image - source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.0&depth=1 + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.1&depth=1 kind: packer settings: daos_version: 2.2.0 @@ -69,7 +69,7 @@ deployment_groups: modules: # more info: https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server - id: daos - source: github.com/daos-stack/google-cloud-daos//terraform/modules/daos_server?ref=v0.4.0&depth=1 + source: github.com/daos-stack/google-cloud-daos//terraform/modules/daos_server?ref=v0.4.1&depth=1 use: [network1] settings: labels: {ghpc_role: file-system} diff --git a/community/examples/intel/pfs-daos.yaml b/community/examples/intel/pfs-daos.yaml index 516c8742b8..648aba9403 100644 --- a/community/examples/intel/pfs-daos.yaml +++ b/community/examples/intel/pfs-daos.yaml @@ -38,9 +38,9 @@ deployment_groups: - group: daos-server-image modules: - # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/images + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/images - id: daos-server-image - source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.0&depth=1 + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.1&depth=1 kind: packer settings: daos_version: 2.2.0 @@ -62,9 +62,9 @@ deployment_groups: - group: daos-client-image modules: - # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/images + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/images - id: daos-client-image - source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.0&depth=1 + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.1&depth=1 kind: packer settings: daos_version: 2.2.0 @@ -86,18 +86,18 @@ deployment_groups: - group: daos-cluster modules: - # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/terraform/modules/daos_server + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/terraform/modules/daos_server - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.0&depth=1 + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.1&depth=1 use: [network1] settings: number_of_instances: 2 labels: {ghpc_role: file-system} os_family: $(vars.server_image_family) - # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.0/terraform/modules/daos_client + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/terraform/modules/daos_client - id: daos-client - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.4.0&depth=1 + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.4.1&depth=1 use: [network1, daos-server] settings: number_of_instances: 2 From a813bbd8bb1eec20c261585ac13d17fa633c1319 Mon Sep 17 00:00:00 2001 From: Carlos Boneti Date: Thu, 22 Jun 2023 22:55:25 -0700 Subject: [PATCH 75/92] VM Instance replace_triggered by instance_image --- modules/compute/vm-instance/README.md | 23 +++++++++- modules/compute/vm-instance/main.tf | 9 ++++ modules/compute/vm-instance/versions.tf | 5 ++- tools/validate_configs/test_configs/vm.yaml | 48 +++++++++++++++++++++ 4 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 tools/validate_configs/test_configs/vm.yaml diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index da47291117..eef1747459 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -122,6 +122,24 @@ Use the following settings for spread placement: More information on GPU support in `vm-instance` and other HPC Toolkit modules can be found at [docs/gpu-support.md](../../../docs/gpu-support.md) +## Lifecycle + +The `vm-instance` module will be replaced when the `instance_image` variable is +changed and `terraform apply` is run on the deployment group folder or +`ghpc deploy` is run. However, it will not be automatically replaced if a new +image is created in a family. + +To selectively replace the vm-instance(s), consider running terraform +`apply -replace` such as: + +> See https://developer.hashicorp.com/terraform/cli/commands/plan#replace-address for precise syntax terraform apply -replace=ADDRESS + +```shell +terraform state list +# search for the module ID and resource +terraform apply -replace="address" +``` + ## License @@ -143,9 +161,10 @@ limitations under the License. | Name | Version | |------|---------| -| [terraform](#requirement\_terraform) | >= 0.14.0 | +| [terraform](#requirement\_terraform) | >= 1.2.0 | | [google](#requirement\_google) | >= 4.42 | | [google-beta](#requirement\_google-beta) | >= 4.12 | +| [null](#requirement\_null) | >= 1.0 | ## Providers @@ -153,6 +172,7 @@ limitations under the License. |------|---------| | [google](#provider\_google) | >= 4.42 | | [google-beta](#provider\_google-beta) | >= 4.12 | +| [null](#provider\_null) | >= 1.0 | ## Modules @@ -167,6 +187,7 @@ limitations under the License. | [google-beta_google_compute_instance.compute_vm](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_compute_instance) | resource | | [google_compute_disk.boot_disk](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource | | [google_compute_resource_policy.placement_policy](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_resource_policy) | resource | +| [null_resource.image](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [google_compute_image.compute_image](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | ## Inputs diff --git a/modules/compute/vm-instance/main.tf b/modules/compute/vm-instance/main.tf index da72a6f604..50b05817de 100644 --- a/modules/compute/vm-instance/main.tf +++ b/modules/compute/vm-instance/main.tf @@ -85,6 +85,13 @@ data "google_compute_image" "compute_image" { project = var.instance_image.project } +resource "null_resource" "image" { + triggers = { + image = var.instance_image.family, + project = var.instance_image.project + } +} + resource "google_compute_disk" "boot_disk" { project = var.project_id @@ -98,6 +105,8 @@ resource "google_compute_disk" "boot_disk" { zone = var.zone lifecycle { + replace_triggered_by = [null_resource.image] + ignore_changes = [ image ] diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 8c5e31c035..9fa052445e 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -25,6 +25,9 @@ terraform { source = "hashicorp/google-beta" version = ">= 4.12" } + null = { + version = ">= 1.0" + } } provider_meta "google" { module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.19.1" @@ -33,5 +36,5 @@ terraform { module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.19.1" } - required_version = ">= 0.14.0" + required_version = ">= 1.2.0" } diff --git a/tools/validate_configs/test_configs/vm.yaml b/tools/validate_configs/test_configs/vm.yaml new file mode 100644 index 0000000000..b8e194d5c7 --- /dev/null +++ b/tools/validate_configs/test_configs/vm.yaml @@ -0,0 +1,48 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +blueprint_name: simple-vm + +vars: + project_id: ## Set GCP Project ID Here ## + deployment_name: simple-vm + region: us-central1 + zone: us-central1-c + +# Documentation for each of the modules used below can be found at +# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md + +deployment_groups: +- group: primary + modules: + - id: network1 + source: modules/network/pre-existing-vpc + + - source: ./modules/compute/vm-instance + id: compute_instances + use: [network1] + settings: + name_prefix: client-vm + instance_count: 1 + machine_type: n2-standard-2 + instance_image: + project: ubuntu-os-cloud + family: ubuntu-2004-lts + # The following can be uncommented to test that changing an image definition triggers recreation. + # Create this image by running: + # gcloud compute images create myubuntu-1 --source-image-family ubuntu-2004-lts \ + # --source-image-project=ubuntu-os-cloud --family myubuntu --project + # project: $(vars.project_id) + # family: myubuntu From 07b2b4df3bc914330a38c20965743f43db82528a Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 23 Jun 2023 22:01:16 -0700 Subject: [PATCH 76/92] Remove "failed tests" check from `enforce_coverage` since it doesn't work (#1499) Rely on `pipefail` instead --- tools/enforce_coverage.pl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tools/enforce_coverage.pl b/tools/enforce_coverage.pl index 5e1e2eecfe..7a69ed8ce6 100755 --- a/tools/enforce_coverage.pl +++ b/tools/enforce_coverage.pl @@ -21,7 +21,6 @@ my $cmdmin = 40; my $shellmin = 15; my $failed_coverage = 0; -my $failed_tests = 0; while (<>){ print $_; @@ -32,14 +31,8 @@ } elsif ( $_ =~ /coverage: (\d+\.\d)%/ ) { $failed_coverage++ if ($1 < $min); } - if ($_ =~ /\d+ passed, (\d+) FAILED/){ - $failed_tests += $1; - } -} -if ($failed_tests > 0) { - print STDERR "$failed_tests test(s) failed.\n"; - exit 1 } + if ($failed_coverage > 0) { print STDERR "Coverage must be above $cmdmin% for ./cmd and $min% for other packages, $failed_coverage packages were below that.\n"; exit 1 From 0b831aed2a2b74a74de4ce34421226f5f0753fcf Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 26 Jun 2023 09:57:38 -0500 Subject: [PATCH 77/92] Drop coverage requirement for pkg/shell Nearly all effective unit testing of pkg/shell requires a mock infrastructure we don't have. Current unit tests concentrate on failing effectively when Terraform or Packer are not present. --- tools/enforce_coverage.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/enforce_coverage.pl b/tools/enforce_coverage.pl index 7a69ed8ce6..5f78aae300 100755 --- a/tools/enforce_coverage.pl +++ b/tools/enforce_coverage.pl @@ -19,7 +19,7 @@ # TODO: raise ./cmd min coverage to 80% after tests are written my $min = 80; my $cmdmin = 40; -my $shellmin = 15; +my $shellmin = 0; my $failed_coverage = 0; while (<>){ From e4df977a0b60cc8134320b66fdf9454d18ba3b97 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 26 Jun 2023 11:02:21 -0500 Subject: [PATCH 78/92] Update OFE requirements --- community/front-end/ofe/requirements.txt | 70 ++++++++++++------------ 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/community/front-end/ofe/requirements.txt b/community/front-end/ofe/requirements.txt index 0fe12c2c0e..608a70d6e3 100644 --- a/community/front-end/ofe/requirements.txt +++ b/community/front-end/ofe/requirements.txt @@ -1,64 +1,64 @@ -archspec==0.2.0 -argcomplete==3.0.5 -asgiref==3.6.0 -astroid==2.15.1 +archspec==0.2.1 +argcomplete==3.1.1 +asgiref==3.7.2 +astroid==2.15.5 backports.zoneinfo==0.2.1 cachetools==5.3.1 -certifi==2022.12.7 +certifi==2023.5.7 cffi==1.15.1 cfgv==3.3.1 charset-normalizer==3.1.0 click==8.1.3 -cryptography==41.0.0 +cryptography==41.0.1 decorator==5.1.1 defusedxml==0.7.1 dill==0.3.6 distlib==0.3.6 # django-revproxy==0.11.0 released but not yet in pypi git+https://github.com/jazzband/django-revproxy.git@d2234005135dc0771b7c4e0bb0465664ccfa5787 -Django==4.1.9 +Django==4.2.2 django-allauth==0.54.0 -django-extensions==3.2.1 +django-extensions==3.2.3 djangorestframework==3.14.0 -filelock==3.10.7 -google-api-core==2.11.0 -google-api-python-client==2.83.0 -google-auth==2.17.1 +filelock==3.12.2 +google-api-core==2.11.1 +google-api-python-client==2.90.0 +google-auth==2.20.0 google-auth-httplib2==0.1.0 -google-cloud-billing==1.10.1 +google-cloud-billing==1.11.0 google-cloud-core==2.3.2 -google-cloud-pubsub==2.15.2 +google-cloud-pubsub==2.17.1 google-cloud-storage==2.9.0 google-crc32c==1.5.0 -google-resumable-media==2.4.1 -googleapis-common-protos==1.59.0 +google-resumable-media==2.5.0 +googleapis-common-protos==1.59.1 grafana-api==1.0.3 grpc-google-iam-v1==0.12.6 -grpcio==1.53.0 -grpcio-status==1.53.0 +grpcio==1.56.0 +grpcio-status==1.56.0 h11==0.14.0 httplib2==0.22.0 -identify==2.5.22 +identify==2.5.24 idna==3.4 isort==5.12.0 lazy-object-proxy==1.9.0 -libcst==0.4.9 +libcst==1.0.1 mccabe==0.7.0 mypy-extensions==1.0.0 -nodeenv==1.7.0 +nodeenv==1.8.0 oauthlib==3.2.2 -platformdirs==3.2.0 -pre-commit==3.2.1 -proto-plus==1.22.2 -protobuf==4.22.3 +platformdirs==3.8.0 +pre-commit==3.3.3 +proto-plus==1.22.3 +protobuf==4.23.3 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 -PyJWT==2.6.0 -pylint==2.17.1 +PyJWT==2.7.0 +pylint==2.17.4 pylint-django==2.5.3 -pylint-plugin-utils==0.7 -pyparsing==3.0.9 +pylint-plugin-utils==0.8.2 +pyparsing==3.1.0 python3-openid==3.2.0 pytz==2023.3 PyYAML==6.0 @@ -67,18 +67,18 @@ requests-oauthlib==1.3.1 retry==0.9.2 rsa==4.9 semantic-version==2.10.0 -setuptools-rust==1.5.2 +setuptools-rust==1.6.0 six==1.16.0 sqlparse==0.4.4 toml==0.10.2 tomli==2.0.1 tomlkit==0.11.8 typing-inspect==0.9.0 -typing_extensions==4.5.0 +typing_extensions==4.6.3 uritemplate==4.1.1 -urllib3==2.0.2 -uvicorn==0.21.1 -virtualenv==20.21.0 +urllib3==1.26.16 +uvicorn==0.22.0 +virtualenv==20.23.1 wrapt==1.15.0 xmltodict==0.13.0 -yq==3.1.1 +yq==3.2.2 From 2f18a70b60050fd13b9a9ad4ff1a1ba4f74b8936 Mon Sep 17 00:00:00 2001 From: Mark Olson <115657904+mark-olson@users.noreply.github.com> Date: Mon, 26 Jun 2023 10:16:05 -0700 Subject: [PATCH 79/92] Updated Intel-DAOS file-system module README.md Removed examples that show version numbers so that future updates to versions in the community/examples/intel/*.yaml blueprints do not require the community/modules/file-system/Intel-DAOS/README.md file to also be updated. Removed information about needing to build images prior to deployment since images are now built by the HPC Toolkit as part of the deployment. Signed-off-by: Mark Olson <115657904+mark-olson@users.noreply.github.com> --- .../modules/file-system/Intel-DAOS/README.md | 111 ++++++------------ 1 file changed, 38 insertions(+), 73 deletions(-) diff --git a/community/modules/file-system/Intel-DAOS/README.md b/community/modules/file-system/Intel-DAOS/README.md index 4875f2c656..f4b5370740 100644 --- a/community/modules/file-system/Intel-DAOS/README.md +++ b/community/modules/file-system/Intel-DAOS/README.md @@ -2,99 +2,64 @@ This module allows creating an instance of Distributed Asynchronous Object Storage ([DAOS](https://docs.daos.io/)) on Google Cloud Platform ([GCP](https://cloud.google.com/)). -For more information, please refer to the [Google Cloud DAOS repo on GitHub](https://github.com/daos-stack/google-cloud-daos). +> **_NOTE:_** +> DAOS on GCP does not require an HPC Toolkit wrapper. +> Terraform modules are sourced directly from GitHub. +> It will not work as a [local or embedded module](../../../../modules/README.md#embedded-modules). -For more information on this and other network storage options in the Cloud HPC -Toolkit, see the extended [Network Storage documentation](../../../../docs/network_storage.md). +Terraform modules for DAOS servers and clients are located in the [Google Cloud DAOS repo on GitHub](https://github.com/daos-stack/google-cloud-daos). -> **_NOTE:_** DAOS on GCP does not require an HPC Toolkit wrapper and, therefore, sources directly from GitHub. It will not work as a [local or embedded module](../../../../modules/README.md#embedded-modules). +DAOS Terraform module parameters can be found in the README.md files in each module directory. -## Examples - -Working examples of a DAOS deployment and how it can be used in conjunction with Slurm [can be found in the community examples folder](../../../examples/intel/). - -A full list of server module parameters can be found at [the DAOS Server module README](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server). - -### DAOS Server Images - -In order to use the DAOS server terraform module a DAOS server image must be created as instructed in the *images* directory [here](https://github.com/daos-stack/google-cloud-daos/tree/main/images). - -DAOS server images must be built from the same tagged version of the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository that is specified in the `source:` attribute for modules used in the [community examples](../../../examples/intel/). - -For example, in the following snippet taken from the [community/example/intel/pfs-daos.yml](../../../examples/intel/pfs-daos.yaml) the `source:` attribute specifies v0.3.0 of the daos_server terraform module +- [DAOS Server module](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_server#readme) +- [DAOS Client module](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/modules/daos_client#readme) -```yaml - - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.3.0 - use: [network1] - settings: - number_of_instances: 2 - labels: {ghpc_role: file-system} -``` - -In order to use the daos_server module v0.3.0 , you need to - -1. Clone the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repo and check out v0.3.0 -2. Follow the instructions in the images/README.md directory to build a DAOS server image - -## Recommended settings - -By default, the DAOS system is created with 4 servers will be configured for best cost per GB (TCO, see below), the system will be formated at the server side using [`dmg format`](https://github.com/daos-stack/google-cloud-daos/tree/develop/terraform/examples/daos_cluster#format-storage) but no pool or containers will be created. +For more information on this and other network storage options in the Cloud HPC Toolkit, see the extended [Network Storage documentation](../../../../docs/network_storage.md). +## Examples -The following settings will configure this [system for TCO](https://github.com/daos-stack/google-cloud-daos/tree/main/terraform/examples/daos_cluster#the-terraformtfvarstcoexample-file) (default): +The [community examples folder](../../../examples/intel/) contains two example blueprints for deploying DAOS. -```yaml - - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.3.0 - use: [network1] - settings: - labels: {ghpc_role: file-system} - number_of_instances : 4 # number of DAOS server instances - machine_type : "n2-custom-36-215040" - os_disk_size_gb : 20 - daos_disk_count : 16 - daos_scm_size : 180 -``` +- [community/examples/intel/pfs-daos.yml](../../../examples/intel/pfs-daos.yml) + Blueprint for deploying a DAOS cluster consisting of servers and clients. + After deploying this example the DAOS storage system will be formatted but no pools or containers will exist. + The instructions in the [community/examples/intel/README.md](../../../examples/intel/README.md#create-a-daos-pool-and-container) describe how to -The following settings will configure this system for [best performance](https://github.com/daos-stack/google-cloud-daos/tree/develop/terraform/examples/daos_cluster#the-terraformtfvarsperfexample-file): + - Deploy a DAOS cluster + - Manage storage (create a [pool](https://docs.daos.io/v2.2/overview/storage/?h=container#daos-pool) and a [container](https://docs.daos.io/v2.2/overview/storage/?h=container#daos-container)) + - Mount a container on a client + - Store a large file in a DAOS container -```yaml - - id: daos-server - source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.3.0 - use: [network1] - settings: - labels: {ghpc_role: file-system} - # The default DAOS settings are optimized for TCO - # The following will tune this system for best perf - machine_type : "n2-standard-16" - os_disk_size_gb : 20 - daos_disk_count : 4 - daos_scm_size : 45 -``` +- [community/examples/intel/hpc-slurm-daos.yaml](../../../examples/intel/hpc-slurm-daos.yaml) + Blueprint for deploying a Slurm cluster and DAOS storage with 4 servers. + The Slurm compute nodes are configured as DAOS clients and have the ability to use the DAOS filesystem. + The instructions in the [community/examples/intel/README.md](../../../examples/intel/README.md#deploy-the-daosslurm-cluster) describe how to deploy the Slurm cluster and run a job which uses the DAOS file system. ## Support Content in the [google-cloud-daos](https://github.com/daos-stack/google-cloud-daos) repository is licensed under the [Apache License Version 2.0](https://github.com/daos-stack/google-cloud-daos/blob/main/LICENSE) open-source license. -[DAOS](https://github.com/daos-stack/daos) is being distributed under the BSD-2-Clause-Patent open-source license. +[DAOS](https://github.com/daos-stack/daos) is distributed under the BSD-2-Clause-Patent open-source license. + +Intel Corporation provides two options for technical support: -Intel Corporation provides several ways for the users to get technical support: +1. Community Support -1. Community support is available to everybody through Jira and via the DAOS channel for the Google Cloud users on Slack. + Community support is available to anyone through Jira and via the DAOS channel for Google Cloud users on Slack. - To access Jira, please follow these steps: + JIRA: https://daosio.atlassian.net/jira/software/c/projects/DAOS/issues/ - - Navigate to https://daosio.atlassian.net/jira/software/c/projects/DAOS/issues/ + - An Atlassian account is not needed for read only access to Jira. + - An Atlassian account is required to create and update tickets. + To create an account follow the steps at https://support.atlassian.com/atlassian-account/docs/create-an-atlassian-account. - - You will need to request access to DAOS Jira to be able to create and update tickets. An Atlassian account is required for this type of access. Read-only access is available without an account. - - If you do not have an Atlassian account, follow the steps at https://support.atlassian.com/atlassian-account/docs/create-an-atlassian-account/ to create one. + Slack: https://daos-stack.slack.com/archives/C03GLTLHA59 - To access the Slack channel for DAOS on Google Cloud, please follow this link https://daos-stack.slack.com/archives/C03GLTLHA59 + Community support is provided on a best-effort basis. - > This type of support is provided on a best-effort basis, and it does not have any SLA attached. +2. Commercial L3 Support -2. Commercial L3 support is available on an on-demand basis. Please get in touch with Intel Corporation to obtain more information. + Commercial L3 support is available on an on-demand basis. - - You may inquire about the L3 support via the Slack channel (https://daos-stack.slack.com/archives/C03GLTLHA59) + Contact Intel Corporation to obtain more information about Commercial L3 support. -[here](https://github.com/daos-stack/google-cloud-daos/tree/main/images) + You may inquire about L3 support via the [Slack channel](https://daos-stack.slack.com/archives/C03GLTLHA59). From 86bd567878d0bea42d086ec8ce510be1f6383568 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Mon, 26 Jun 2023 14:21:26 -0700 Subject: [PATCH 80/92] Remove `getAbsSourcePath` as it re-implements `filepath.Abs` (#1505) --- pkg/deploymentio/local.go | 21 +++++---------------- pkg/deploymentio/local_test.go | 13 ------------- 2 files changed, 5 insertions(+), 29 deletions(-) diff --git a/pkg/deploymentio/local.go b/pkg/deploymentio/local.go index 573b3c801e..6366f37f9a 100644 --- a/pkg/deploymentio/local.go +++ b/pkg/deploymentio/local.go @@ -16,10 +16,8 @@ package deploymentio import ( "fmt" - "log" "os" "path/filepath" - "strings" "github.com/otiai10/copy" ) @@ -35,18 +33,6 @@ func mkdirWrapper(directory string) error { return nil } -func getAbsSourcePath(sourcePath string) string { - if strings.HasPrefix(sourcePath, "/") { // Absolute Path Already - return sourcePath - } - // Otherwise base it off of the CWD - cwd, err := os.Getwd() - if err != nil { - log.Fatalf("deploymentio: %v", err) - } - return filepath.Join(cwd, sourcePath) -} - // CreateDirectory creates the directory func (b *Local) CreateDirectory(directory string) error { if _, err := os.Stat(directory); !os.IsNotExist(err) { @@ -58,9 +44,12 @@ func (b *Local) CreateDirectory(directory string) error { return mkdirWrapper(directory) } -// CopyFromPath copyes the source file to the destination file +// CopyFromPath copies the source file to the destination file func (b *Local) CopyFromPath(src string, dst string) error { - absPath := getAbsSourcePath(src) + absPath, err := filepath.Abs(src) + if err != nil { + return err + } return copy.Copy(absPath, dst) } diff --git a/pkg/deploymentio/local_test.go b/pkg/deploymentio/local_test.go index a58e384815..7b865b423c 100644 --- a/pkg/deploymentio/local_test.go +++ b/pkg/deploymentio/local_test.go @@ -40,19 +40,6 @@ func (s *MySuite) TestCreateDirectoryLocal(c *C) { c.Assert(err, IsNil) } -func (s *MySuite) TestGetAbsSourcePath(c *C) { - // Already abs path - gotPath := getAbsSourcePath(testDir) - c.Assert(gotPath, Equals, testDir) - - // Relative path - relPath := "relative/path" - cwd, err := os.Getwd() - c.Assert(err, IsNil) - gotPath = getAbsSourcePath(relPath) - c.Assert(gotPath, Equals, filepath.Join(cwd, relPath)) -} - func (s *MySuite) TestCopyFromPathLocal(c *C) { deploymentio := GetDeploymentioLocal() testSrcFilename := filepath.Join(testDir, "testSrc") From eb32fe0ef27d1bf84b48af2f5e19baf26e0045a7 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 27 Jun 2023 10:14:43 -0700 Subject: [PATCH 81/92] Fix broken link (#1511) --- modules/file-system/pre-existing-network-storage/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/file-system/pre-existing-network-storage/README.md b/modules/file-system/pre-existing-network-storage/README.md index f2d9b8031c..2580169785 100644 --- a/modules/file-system/pre-existing-network-storage/README.md +++ b/modules/file-system/pre-existing-network-storage/README.md @@ -81,7 +81,7 @@ Both of these steps are automatically handled with the use of the `use` command in a selection of HPC Toolkit modules. See the [compatibility matrix][matrix] in the network storage doc for a complete list of supported modules. -[matrix]: ../../../../docs/network_storage.md#compatibility-matrix +[matrix]: ../../../docs/network_storage.md#compatibility-matrix ## License From 99fbd73657fea997e390d62b24072bfa3d616f1b Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 27 Jun 2023 13:08:19 -0500 Subject: [PATCH 82/92] Document scope of Windows support in HPC Toolkit --- docs/vm-images.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/vm-images.md b/docs/vm-images.md index 048f6e8a38..4cfca0b1cb 100644 --- a/docs/vm-images.md +++ b/docs/vm-images.md @@ -2,6 +2,7 @@ * [HPC CentOS 7 VM Image](#hpc-centos-7-vm-image) * [Ubuntu](#ubuntu) +* [Windows](#windows) * [Other Images](#other-images) ## HPC CentOS 7 VM Image @@ -57,6 +58,20 @@ settings: [omnia-install]: ../community/modules/scripts/omnia-install/README.md [hpc-slurm-ubuntu2004.yaml]: ../community/examples/hpc-slurm-ubuntu2004.yaml +## Windows + +The HPC Toolkit provides limited support for building custom VM images based on +the [Windows images][windows-images] published by Google. The custom VM images +can be used in blueprints so long as the underlying scheduler and workload +supports Windows. Windows solutions do not receive the same level of testing as +Linux solutions so you should anticipate that there will not be functionality +parity. Please file [issues] when encountering specific problems and [feature +requests][features] when requesting new functionality. + +[windows-images]: https://cloud.google.com/compute/docs/images/os-details#windows_server +[issues]: https://github.com/GoogleCloudPlatform/hpc-toolkit/issues +[features]: https://github.com/GoogleCloudPlatform/hpc-toolkit/discussions/categories/ideas-and-feature-requests + ## Other Images The HPC Toolkit strives to provide flexibility wherever possible. It is possible From 1dc82d75ac9a123b9db7c2db26f7dee82e8a3e22 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 27 Jun 2023 12:23:58 -0700 Subject: [PATCH 83/92] Enable `flatten` and `merge` functions in `Eval`. (#1513) To enable `use` on lists for packer modules * Enable `flatten` and `merge` functions in `Eval`; * Remove special handling of packer module `labels`. --- pkg/config/expand.go | 59 ++++--------------- pkg/config/expand_test.go | 26 +------- pkg/config/expression.go | 13 ++++ pkg/config/expression_test.go | 52 ++++++++++++++++ .../.ghpc/artifacts/expanded_blueprint.yaml | 8 +-- .../.ghpc/artifacts/expanded_blueprint.yaml | 12 ++-- 6 files changed, 91 insertions(+), 79 deletions(-) diff --git a/pkg/config/expand.go b/pkg/config/expand.go index 253c17108f..b8236d843e 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -47,10 +47,7 @@ var ( func (dc *DeploymentConfig) expand() error { dc.expandBackends() dc.addDefaultValidators() - - if err := dc.combineLabels(); err != nil { - return err - } + dc.combineLabels() if err := dc.applyUseModules(); err != nil { return err @@ -210,7 +207,7 @@ func getRole(source string) string { // combineLabels sets defaults for labels based on other variables and merges // the global labels defined in Vars with module setting labels. It also // determines the role and sets it for each module independently. -func (dc *DeploymentConfig) combineLabels() error { +func (dc *DeploymentConfig) combineLabels() { vars := &dc.Config.Vars defaults := map[string]cty.Value{ blueprintLabel: cty.StringVal(dc.Config.BlueprintName), @@ -223,59 +220,29 @@ func (dc *DeploymentConfig) combineLabels() error { gl := mergeMaps(defaults, vars.Get(labels).AsValueMap()) vars.Set(labels, cty.ObjectVal(gl)) - return dc.Config.WalkModules(func(mod *Module) error { - return combineModuleLabels(mod, *dc) + dc.Config.WalkModules(func(mod *Module) error { + combineModuleLabels(mod, *dc) + return nil }) } -func combineModuleLabels(mod *Module, dc DeploymentConfig) error { +func combineModuleLabels(mod *Module, dc DeploymentConfig) { labels := "labels" - if !moduleHasInput(*mod, labels) { - return nil // no op + return // no op } - cur := mod.Settings.Get(labels) - extra := map[string]cty.Value{roleLabel: cty.StringVal(getRole(mod.Source))} - - if mod.Kind == TerraformKind { - mod.Settings.Set(labels, mergeLabelsTf(extra, cur)) - } else if mod.Kind == PackerKind { - gl := dc.Config.Vars.Get(labels).AsValueMap() - merged, err := mergeLabelsPkr(gl, extra, cur) - if err != nil { - return err - } - mod.Settings.Set(labels, merged) - } - return nil -} - -// Terraform labels are `merge(var.labels, {ghpc_role="foo"}, [module labels])` -func mergeLabelsTf(extra map[string]cty.Value, cur cty.Value) cty.Value { + extra := map[string]cty.Value{ + roleLabel: cty.StringVal(getRole(mod.Source))} args := []cty.Value{ - GlobalRef("labels").AsExpression().AsValue(), + GlobalRef(labels).AsExpression().AsValue(), cty.ObjectVal(extra), } - if !cur.IsNull() { - args = append(args, cur) + if !mod.Settings.Get(labels).IsNull() { + args = append(args, mod.Settings.Get(labels)) } - return FunctionCallExpression("merge", args...).AsValue() -} -// Packer doesn't support `merge`, so merge it here. -func mergeLabelsPkr(global map[string]cty.Value, extra map[string]cty.Value, cur cty.Value) (cty.Value, error) { - modLabels := map[string]cty.Value{} - if !cur.IsNull() { - ty := cur.Type() - if !ty.IsObjectType() && !ty.IsMapType() { - return cty.NilVal, fmt.Errorf("%s,labels type: %s", errorMessages["settingsLabelType"], ty.FriendlyName()) - } - if cur.AsValueMap() != nil { - modLabels = cur.AsValueMap() - } - } - return cty.ObjectVal(mergeMaps(global, extra, modLabels)), nil + mod.Settings.Set(labels, FunctionCallExpression("merge", args...).AsValue()) } // mergeMaps takes an arbitrary number of maps, and returns a single map that contains diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 09f908130f..12e96ed563 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -261,7 +261,6 @@ func (s *MySuite) TestCombineLabels(c *C) { coral := Module{ Source: "blue/salmon", - Kind: TerraformKind, ID: "coral", Settings: NewDict(map[string]cty.Value{ "labels": cty.ObjectVal(map[string]cty.Value{ @@ -273,21 +272,13 @@ func (s *MySuite) TestCombineLabels(c *C) { setTestModuleInfo(coral, infoWithLabels) // has no labels set - khaki := Module{Source: "brown/oak", Kind: TerraformKind, ID: "khaki"} + khaki := Module{Source: "brown/oak", ID: "khaki"} setTestModuleInfo(khaki, infoWithLabels) // has no labels set, also module has no labels input - silver := Module{Source: "ivory/black", Kind: TerraformKind, ID: "silver"} + silver := Module{Source: "ivory/black", ID: "silver"} setTestModuleInfo(silver, modulereader.ModuleInfo{Inputs: []modulereader.VarInfo{}}) - orange := Module{Source: "red/velvet", Kind: PackerKind, ID: "orange", Settings: NewDict(map[string]cty.Value{ - "labels": cty.ObjectVal(map[string]cty.Value{ - "olive": cty.StringVal("teal"), - "ghpc_deployment": cty.StringVal("navy"), - }), - })} - setTestModuleInfo(orange, infoWithLabels) - dc := DeploymentConfig{ Config: Blueprint{ BlueprintName: "simple", @@ -296,11 +287,10 @@ func (s *MySuite) TestCombineLabels(c *C) { }), DeploymentGroups: []DeploymentGroup{ {Name: "lime", Modules: []Module{coral, khaki, silver}}, - {Name: "pink", Modules: []Module{orange}}, }, }, } - c.Check(dc.combineLabels(), IsNil) + dc.combineLabels() // Were global labels created? c.Check(dc.Config.Vars.Get("labels"), DeepEquals, cty.ObjectVal(map[string]cty.Value{ @@ -333,16 +323,6 @@ func (s *MySuite) TestCombineLabels(c *C) { // No labels input silver = lime.Modules[2] c.Check(silver.Settings.Get("labels"), DeepEquals, cty.NilVal) - - // Packer, include global include explicitly - // Keep overridden ghpc_deployment=navy - orange = dc.Config.DeploymentGroups[1].Modules[0] - c.Check(orange.Settings.Get("labels"), DeepEquals, cty.ObjectVal(map[string]cty.Value{ - "ghpc_blueprint": cty.StringVal("simple"), - "ghpc_deployment": cty.StringVal("navy"), - "ghpc_role": cty.StringVal("red"), - "olive": cty.StringVal("teal"), - })) } func (s *MySuite) TestApplyGlobalVariables(c *C) { diff --git a/pkg/config/expression.go b/pkg/config/expression.go index f54f790f88..a46ce9379f 100644 --- a/pkg/config/expression.go +++ b/pkg/config/expression.go @@ -23,6 +23,8 @@ import ( "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/hcl/v2/hclwrite" "github.com/zclconf/go-cty/cty" + "github.com/zclconf/go-cty/cty/function" + "github.com/zclconf/go-cty/cty/function/stdlib" ) // Reference is data struct that represents a reference to a variable. @@ -254,6 +256,7 @@ type BaseExpression struct { func (e BaseExpression) Eval(bp Blueprint) (cty.Value, error) { ctx := hcl.EvalContext{ Variables: map[string]cty.Value{"var": bp.Vars.AsObject()}, + Functions: functions(), } v, diag := e.e.Value(&ctx) if diag.HasErrors() { @@ -414,6 +417,9 @@ func TokensForValue(val cty.Value) hclwrite.Tokens { // FunctionCallExpression is a helper to build function call expression. func FunctionCallExpression(n string, args ...cty.Value) Expression { + if _, ok := functions()[n]; !ok { + panic("unknown function " + n) + } ta := make([]hclwrite.Tokens, len(args)) for i, a := range args { ta[i] = TokensForValue(a) @@ -421,3 +427,10 @@ func FunctionCallExpression(n string, args ...cty.Value) Expression { toks := hclwrite.TokensForFunctionCall(n, ta...) return MustParseExpression(string(toks.Bytes())) } + +func functions() map[string]function.Function { + return map[string]function.Function{ + "flatten": stdlib.FlattenFunc, + "merge": stdlib.MergeFunc, + } +} diff --git a/pkg/config/expression_test.go b/pkg/config/expression_test.go index 9205568ebc..be6206a1ee 100644 --- a/pkg/config/expression_test.go +++ b/pkg/config/expression_test.go @@ -21,6 +21,7 @@ import ( "github.com/hashicorp/hcl/v2" "github.com/hashicorp/hcl/v2/hclsyntax" "github.com/hashicorp/hcl/v2/hclwrite" + "github.com/zclconf/go-cty-debug/ctydebug" "github.com/zclconf/go-cty/cty" ) @@ -184,3 +185,54 @@ func TestTokensForValueWithLiteral(t *testing.T) { t.Errorf("diff (-want +got):\n%s", diff) } } + +func TestFlattenFunctionCallExpression(t *testing.T) { + bp := Blueprint{Vars: NewDict(map[string]cty.Value{ + "three": cty.NumberIntVal(3), + })} + expr := FunctionCallExpression("flatten", cty.TupleVal([]cty.Value{ + cty.TupleVal([]cty.Value{cty.NumberIntVal(1), cty.NumberIntVal(2)}), + GlobalRef("three").AsExpression().AsValue(), + })) + + want := cty.TupleVal([]cty.Value{ + cty.NumberIntVal(1), + cty.NumberIntVal(2), + cty.NumberIntVal(3)}) + + got, err := expr.Eval(bp) + if err != nil { + t.Errorf("got unexpected error: %s", err) + } + if diff := cmp.Diff(want, got, ctydebug.CmpOptions); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} + +func TestMergeFunctionCallExpression(t *testing.T) { + bp := Blueprint{Vars: NewDict(map[string]cty.Value{ + "fix": cty.ObjectVal(map[string]cty.Value{ + "two": cty.NumberIntVal(2), + }), + })} + expr := FunctionCallExpression("merge", + cty.ObjectVal(map[string]cty.Value{ + "one": cty.NumberIntVal(1), + "two": cty.NumberIntVal(3), + }), + GlobalRef("fix").AsExpression().AsValue(), + ) + + want := cty.ObjectVal(map[string]cty.Value{ + "one": cty.NumberIntVal(1), + "two": cty.NumberIntVal(2), + }) + + got, err := expr.Eval(bp) + if err != nil { + t.Errorf("got unexpected error: %s", err) + } + if diff := cmp.Diff(want, got, ctydebug.CmpOptions); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } +} diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml index 0e64f594dc..378b866568 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml @@ -113,10 +113,10 @@ deployment_groups: - script settings: deployment_name: ((var.deployment_name )) - labels: - ghpc_blueprint: igc - ghpc_deployment: golden_copy_deployment - ghpc_role: packer + labels: |- + ((merge(var.labels, { + ghpc_role = "packer" + }))) project_id: ((var.project_id )) startup_script: ((module.script.startup_script)) subnetwork_name: ((module.network0.subnetwork_name)) diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml index 969ebc56fe..1b6f913cd5 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/text_escape/.ghpc/artifacts/expanded_blueprint.yaml @@ -45,12 +45,12 @@ deployment_groups: deployment_name: ((var.deployment_name)) image_family: \$(zebra/to(ad image_name: \((cat /dog)) - labels: - brown: \$(fox) - ghpc_blueprint: text_escape - ghpc_deployment: golden_copy_deployment - ghpc_role: packer - ñred: ñblue + labels: |- + ((merge(var.labels, { + ghpc_role = "packer" + }, { + brown = "$(fox)" + }))) project_id: ((var.project_id)) subnetwork_name: \$(purple zone: ((var.zone)) From 5989d37dc23ba2b6a406f0754327107f6105861a Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Tue, 27 Jun 2023 21:15:24 +0000 Subject: [PATCH 84/92] Reduce size of Spack builder as max default is to use 16 cores --- examples/serverless-batch-mpi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/serverless-batch-mpi.yaml b/examples/serverless-batch-mpi.yaml index 660d6c7aa7..ba3abda1c0 100644 --- a/examples/serverless-batch-mpi.yaml +++ b/examples/serverless-batch-mpi.yaml @@ -134,7 +134,7 @@ deployment_groups: settings: name_prefix: spack-builder add_deployment_name_before_prefix: true - machine_type: c2-standard-30 + machine_type: c2-standard-16 ### Batch Modules ### - id: batch-job From 6ab312135646d3862c28c8a0818a4de0d7bf3783 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 27 Jun 2023 14:54:32 -0700 Subject: [PATCH 85/92] Stop using deprecated variables in `vpc` and `batch-login-node` (#1514) --- modules/network/vpc/main.tf | 25 +++++++--------------- modules/scheduler/batch-login-node/main.tf | 14 ++---------- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/modules/network/vpc/main.tf b/modules/network/vpc/main.tf index 657816b507..c330c35f99 100644 --- a/modules/network/vpc/main.tf +++ b/modules/network/vpc/main.tf @@ -19,9 +19,8 @@ locals { subnetwork_name = var.subnetwork_name == null ? "${var.deployment_name}-primary-subnet" : var.subnetwork_name # define a default subnetwork for cases in which no explicit subnetworks are - # defined in var.primary_subnetwork or var.subnetworks - default_primary_subnetwork_new_bits = coalesce(try(var.primary_subnetwork.new_bits, var.subnetwork_size), var.default_primary_subnetwork_size) - default_primary_subnetwork_cidr_block = cidrsubnet(var.network_address_range, local.default_primary_subnetwork_new_bits, 0) + # defined in var.subnetworks + default_primary_subnetwork_cidr_block = cidrsubnet(var.network_address_range, var.default_primary_subnetwork_size, 0) default_primary_subnetwork = { subnet_name = local.subnetwork_name subnet_ip = local.default_primary_subnetwork_cidr_block @@ -34,22 +33,14 @@ locals { } # Identify user-supplied primary subnetwork - # (1) explicit var.primary_subnetwork - # (2) explicit var.subnetworks[0] - # (3) implicit local default subnetwork - input_primary_subnetwork = try(coalesce( - var.primary_subnetwork, - try(var.subnetworks[0], null) - ), local.default_primary_subnetwork) + # (1) explicit var.subnetworks[0] + # (2) implicit local default subnetwork + input_primary_subnetwork = coalesce(try(var.subnetworks[0], null), local.default_primary_subnetwork) # Identify user-supplied additional subnetworks - # (1) explicit var.additional_subnetworks - # (2) explicit var.subnetworks[1:end] - # (3) empty list - input_additional_subnetworks = try(coalescelist( - var.additional_subnetworks, - try(slice(var.subnetworks, 1, length(var.subnetworks)), []), - ), []) + # (1) explicit var.subnetworks[1:end] + # (2) empty list + input_additional_subnetworks = try(slice(var.subnetworks, 1, length(var.subnetworks)), []) # at this point we have constructed a list of subnetworks but need to extract # user-provided CIDR blocks or calculate them from user-provided new_bits diff --git a/modules/scheduler/batch-login-node/main.tf b/modules/scheduler/batch-login-node/main.tf index 8cd6840d14..c3fe8d5326 100644 --- a/modules/scheduler/batch-login-node/main.tf +++ b/modules/scheduler/batch-login-node/main.tf @@ -24,17 +24,7 @@ data "google_compute_instance_template" "batch_instance_template" { } locals { - - # Handle directly created job data (deprecated). All of job_id, job_template_contents and job_filename must be set. - default_job_data = var.job_template_contents == null || var.job_id == null || var.job_filename == null ? [] : [{ - id = var.job_id - filename = var.job_filename - template_contents = var.job_template_contents - }] - - job_data = concat(local.default_job_data, var.job_data) - - job_template_runners = [for job in local.job_data : { + job_template_runners = [for job in var.job_data : { content = job.template_contents destination = "${var.batch_job_directory}/${job.filename}" type = "data" @@ -51,7 +41,7 @@ locals { login_metadata = merge(local.instance_template_metadata, local.startup_metadata, local.oslogin_metadata) - batch_command_instructions = join("\n", [for job in local.job_data : <<-EOT + batch_command_instructions = join("\n", [for job in var.job_data : <<-EOT ## For job: ${job.id} ## Submit your job from login node: From 34004ac79a947f64f84e0c6cd746121492413398 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 27 Jun 2023 18:09:31 -0500 Subject: [PATCH 86/92] Add simple windows-startup-script module A simple module that will curate scripts for customizing Windows VMs. Because the Packer module presently only supports PowerShell (PS1) scripts, this implementation starts with only them. Future work might consider a more general solution such as the (Linux) startup-script module's solution to iterate over scripts stored in Cloud Storage. --- .../scripts/windows-startup-script/README.md | 104 ++++++++++++++++ .../files/install_gpu_driver.ps1 | 111 ++++++++++++++++++ .../scripts/windows-startup-script/main.tf | 23 ++++ .../scripts/windows-startup-script/outputs.tf | 20 ++++ .../windows-startup-script/variables.tf | 21 ++++ .../windows-startup-script/versions.tf | 23 ++++ modules/README.md | 4 + .../golden_copies/configs/igc_pkr.yaml | 8 ++ .../.ghpc/artifacts/expanded_blueprint.yaml | 13 ++ .../expectations/igc_pkr/zero/main.tf | 15 ++- .../expectations/igc_pkr/zero/outputs.tf | 6 + 11 files changed, 344 insertions(+), 4 deletions(-) create mode 100644 community/modules/scripts/windows-startup-script/README.md create mode 100644 community/modules/scripts/windows-startup-script/files/install_gpu_driver.ps1 create mode 100644 community/modules/scripts/windows-startup-script/main.tf create mode 100644 community/modules/scripts/windows-startup-script/outputs.tf create mode 100644 community/modules/scripts/windows-startup-script/variables.tf create mode 100644 community/modules/scripts/windows-startup-script/versions.tf diff --git a/community/modules/scripts/windows-startup-script/README.md b/community/modules/scripts/windows-startup-script/README.md new file mode 100644 index 0000000000..d5e4585bd6 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/README.md @@ -0,0 +1,104 @@ +## Description + +This module contains a set of scripts to be used in customizing Windows VMs at +boot or during image building. Please note that the installation of NVIDIA GPU +drivers takes, at minimum, 30-60 minutes. It is therefore recommended to build +a custom image and reuse it as shown below, rather than install GPU drivers at +boot time. + +> NOTE: the output `windows_startup_ps1` must be passed explicitly as shown +> below when used with Packer modules. This is due to a limitation in the `use` +> keyword and inputs of type `list` in Packer modules; this does not impact +> Terraform modules + +### NVIDIA Drivers and CUDA Toolkit + +Many Google Cloud VM families include or can have NVIDIA GPUs attached to them. +This module supports GPU applications by enabling you to easily install +a compatible release of NVIDIA drivers and of the CUDA Toolkit. The script is +the [solution recommended by our documentation][docs] and is [directly sourced +from GitHub][script-src]. + +[docs]: https://cloud.google.com/compute/docs/gpus/install-drivers-gpu#windows +[script-src]: https://github.com/GoogleCloudPlatform/compute-gpu-installation/blob/24dac3004360e0696c49560f2da2cd60fcb80107/windows/install_gpu_driver.ps1 + +```yaml +- group: primary + modules: + - id: network1 + source: modules/network/vpc + settings: + enable_iap_rdp_ingress: true + enable_iap_winrm_ingress: true + + - id: windows_startup + source: community/modules/scripts/windows-startup-script + settings: + install_nvidia_driver: true + +- group: packer + modules: + - id: image + source: modules/packer/custom-image + kind: packer + use: + - network1 + - windows_startup + settings: + source_image_family: windows-2016 + machine_type: n1-standard-8 + accelerator_count: 1 + accelerator_type: nvidia-tesla-t4 + disk_size: 75 + disk_type: pd-ssd + omit_external_ip: false + state_timeout: 15m +``` + +## License + + +Copyright 2023 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.14.0 | + +## Providers + +No providers. + +## Modules + +No modules. + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [install\_nvidia\_driver](#input\_install\_nvidia\_driver) | Generate a PowerShell script that installs NVIDIA GPU drivers and the CUDA Toolkit | `bool` | `false` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [windows\_startup\_ps1](#output\_windows\_startup\_ps1) | A string list of scripts selected by this module | + diff --git a/community/modules/scripts/windows-startup-script/files/install_gpu_driver.ps1 b/community/modules/scripts/windows-startup-script/files/install_gpu_driver.ps1 new file mode 100644 index 0000000000..89d4da0644 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/files/install_gpu_driver.ps1 @@ -0,0 +1,111 @@ +#Requires -RunAsAdministrator + +<# + # Copyright 2021 Google Inc. + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. +#> + +# Determine which management interface to use +# +# Get-WmiObject is deprecated and removed in Powershell 6.0+ +# https://learn.microsoft.com/en-us/powershell/scripting/whats-new/differences-from-windows-powershell?view=powershell-7#cmdlets-removed-from-powershell +# +# We maintain backwards compabitility with older versions of Powershell by using Get-WmiObject if available +function Get-Mgmt-Command { + $Command = 'Get-CimInstance' + if (Get-Command Get-WmiObject 2>&1>$null) { + $Command = 'Get-WmiObject' + } + return $Command +} + +# Check if the GPU exists with Windows Management Instrumentation, returning the device ID if it exists +function Find-GPU { + $MgmtCommand = Get-Mgmt-Command + try { + $Command = "(${MgmtCommand} -query ""select DeviceID from Win32_PNPEntity Where (deviceid Like '%PCI\\VEN_10DE%') and (PNPClass = 'Display' or Name = '3D Video Controller')"" | Select-Object DeviceID -ExpandProperty DeviceID).substring(13,8)" + $dev_id = Invoke-Expression -Command $Command + return $dev_id + } + catch { + Write-Output "There doesn't seem to be a GPU unit connected to your system." + return "" + } +} + +# Check if the Driver is already installed +function Check-Driver { + try { + &'nvidia-smi.exe' + Write-Output 'Driver is already installed.' + Exit + } + catch { + Write-Output 'Driver is not installed, proceeding with installation' + } +} + +# Install the driver +function Install-Driver { + + # Check if the GPU exists and if the driver is already installed + $gpu_dev_id = Find-GPU + + # Set the correct URL, filename, and arguments to the installer + $url = 'https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_531.14_windows.exe'; + $file_dir = 'C:\NVIDIA-Driver\cuda_12.1.1_531.14_windows.exe'; + $install_args = '/s /n'; + $os_name = Invoke-Expression -Command 'systeminfo | findstr /B /C:"OS Name"' + if ($os_name.Contains("Microsoft Windows Server 2016 Datacenter")) { + # Windows Server 2016 needs an older version of the installer to work properly + $url = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_522.06_windows.exe" + $file_dir = "C:\NVIDIA-Driver\cuda_11.8.0_522.06_windows.exe" + # Windows 2016 also requires manual setting of TLS version + [Net.ServicePointManager]::SecurityProtocol = 'Tls12' + } + if ("DEV_102D".Equals($gpu_dev_id)) { + # K80 GPUs must use an older driver/CUDA version + $url = 'https://developer.download.nvidia.com/compute/cuda/11.4.0/network_installers/cuda_11.4.0_win10_network.exe'; + $file_dir = 'C:\NVIDIA-Driver\cuda_11.4.0_win10_network.exe'; + } + if ("DEV_27B8".Equals($gpu_dev_id)) { + # The latest CUDA bundle (12.1.1) does not support L4 GPUs, so this script + # only installs the driver (version 528.89). There is a different installer + # for Windows server 2016/2019/2022 and Windows 10/11, so use systeminfo + # to determine which installer to use. + $install_args = '/s /noeula /noreboot'; + if ($os_name.Contains("Server")) { + $url = 'https://us.download.nvidia.com/tesla/528.89/528.89-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe'; + $file_dir = 'C:\NVIDIA-Driver\528.89-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe'; + } else { + $url = 'https://us.download.nvidia.com/tesla/528.89/528.89-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; + $file_dir = 'C:\NVIDIA-Driver\528.89-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; + } + } + Check-Driver + + # Create the folder for the driver download + if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { + New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null + } + + # Download the file to a specfied directory + Invoke-WebRequest $url -OutFile $file_dir + + # Install the file with the specified path from earlier as well as the RunAs admin option + Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait +} + +# Run the functions +Install-Driver diff --git a/community/modules/scripts/windows-startup-script/main.tf b/community/modules/scripts/windows-startup-script/main.tf new file mode 100644 index 0000000000..7974559ee6 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/main.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +locals { + nvidia_ps1 = var.install_nvidia_driver ? [file("${path.module}/files/install_gpu_driver.ps1")] : [] + + # anticipate concat multiple solutions over time + startup_ps1 = local.nvidia_ps1 +} diff --git a/community/modules/scripts/windows-startup-script/outputs.tf b/community/modules/scripts/windows-startup-script/outputs.tf new file mode 100644 index 0000000000..006ea312ad --- /dev/null +++ b/community/modules/scripts/windows-startup-script/outputs.tf @@ -0,0 +1,20 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "windows_startup_ps1" { + description = "A string list of scripts selected by this module" + value = local.startup_ps1 +} diff --git a/community/modules/scripts/windows-startup-script/variables.tf b/community/modules/scripts/windows-startup-script/variables.tf new file mode 100644 index 0000000000..53b3b21045 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/variables.tf @@ -0,0 +1,21 @@ +/** + * Copyright 2023 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "install_nvidia_driver" { + description = "Generate a PowerShell script that installs NVIDIA GPU drivers and the CUDA Toolkit" + type = bool + default = false +} diff --git a/community/modules/scripts/windows-startup-script/versions.tf b/community/modules/scripts/windows-startup-script/versions.tf new file mode 100644 index 0000000000..220da92e04 --- /dev/null +++ b/community/modules/scripts/windows-startup-script/versions.tf @@ -0,0 +1,23 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +terraform { + provider_meta "google" { + module_name = "blueprints/terraform/hpc-toolkit:windows-startup-script/v1.19.1" + } + + required_version = ">= 0.14.0" +} diff --git a/modules/README.md b/modules/README.md index d040301e39..99c404f242 100644 --- a/modules/README.md +++ b/modules/README.md @@ -182,6 +182,9 @@ Modules that are still in development and less stable are labeled with the * **[startup-script]** ![core-badge] : Creates a customizable startup script that can be fed into compute VMs. +* **[windows-startup-script]** ![community-badge] ![experimental-badge]: Creates + Windows PowerShell (PS1) scripts that can be used to customize Windows VMs + and VM images. * **[htcondor-install]** ![community-badge] ![experimental-badge] : Creates a startup script to install HTCondor and exports a list of required APIs * **[kubernetes-operations]** ![community-badge] ![experimental-badge] : @@ -210,6 +213,7 @@ Modules that are still in development and less stable are labeled with the successful completion of a startup script on a compute VM. [startup-script]: scripts/startup-script/README.md +[windows-startup-script]: ../community/modules/scripts/windows-startup-script/README.md [htcondor-install]: ../community/modules/scripts/htcondor-install/README.md [kubernetes-operations]: ../community/modules/scripts/kubernetes-operations/README.md [omnia-install]: ../community/modules/scripts/omnia-install/README.md diff --git a/tools/validate_configs/golden_copies/configs/igc_pkr.yaml b/tools/validate_configs/golden_copies/configs/igc_pkr.yaml index fc119d2085..07963b6413 100644 --- a/tools/validate_configs/golden_copies/configs/igc_pkr.yaml +++ b/tools/validate_configs/golden_copies/configs/igc_pkr.yaml @@ -26,6 +26,9 @@ deployment_groups: modules: - id: network0 source: modules/network/vpc + settings: + enable_iap_rdp_ingress: true + enable_iap_winrm_ingress: true - id: homefs source: modules/file-system/filestore use: [network0] @@ -45,6 +48,10 @@ deployment_groups: content: | #!/bin/bash echo "Hello, World!" + - id: windows_startup + source: community/modules/scripts/windows-startup-script + settings: + install_nvidia_driver: true - group: one modules: @@ -54,3 +61,4 @@ deployment_groups: use: - network0 - script + - windows_startup diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml index 378b866568..f606099f79 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml @@ -47,6 +47,8 @@ deployment_groups: sensitive: true settings: deployment_name: ((var.deployment_name )) + enable_iap_rdp_ingress: true + enable_iap_winrm_ingress: true project_id: ((var.project_id )) region: ((var.region )) - source: modules/file-system/filestore @@ -102,6 +104,15 @@ deployment_groups: echo "Hello, World!" destination: hello.sh type: shell + - source: community/modules/scripts/windows-startup-script + kind: terraform + id: windows_startup + outputs: + - name: windows_startup_ps1 + description: Automatically-generated output exported for use by later deployment groups + sensitive: true + settings: + install_nvidia_driver: true kind: terraform - group: one modules: @@ -111,6 +122,7 @@ deployment_groups: use: - network0 - script + - windows_startup settings: deployment_name: ((var.deployment_name )) labels: |- @@ -120,5 +132,6 @@ deployment_groups: project_id: ((var.project_id )) startup_script: ((module.script.startup_script)) subnetwork_name: ((module.network0.subnetwork_name)) + windows_startup_ps1: ((flatten([module.windows_startup.windows_startup_ps1]))) zone: ((var.zone )) kind: packer diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/main.tf b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/main.tf index d8ab74c242..d44735f911 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/main.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/main.tf @@ -15,10 +15,12 @@ */ module "network0" { - source = "./modules/embedded/modules/network/vpc" - deployment_name = var.deployment_name - project_id = var.project_id - region = var.region + source = "./modules/embedded/modules/network/vpc" + deployment_name = var.deployment_name + enable_iap_rdp_ingress = true + enable_iap_winrm_ingress = true + project_id = var.project_id + region = var.region } module "homefs" { @@ -61,3 +63,8 @@ module "script" { type = "shell" }] } + +module "windows_startup" { + source = "./modules/embedded/community/modules/scripts/windows-startup-script" + install_nvidia_driver = true +} diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/outputs.tf b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/outputs.tf index 2ea9eaf0af..04bef33e47 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/outputs.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/outputs.tf @@ -25,3 +25,9 @@ output "startup_script_script" { value = module.script.startup_script sensitive = true } + +output "windows_startup_ps1_windows_startup" { + description = "Automatically-generated output exported for use by later deployment groups" + value = module.windows_startup.windows_startup_ps1 + sensitive = true +} From ac508b88c6d5f6685fd3a0ab6c909f4085d4778c Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 27 Jun 2023 18:09:31 -0500 Subject: [PATCH 87/92] Refactor Packer PowerShell implementation Accept a list of string scripts rather than paths to files, due to limitation in Toolkit that makes file paths useful primary only if they are absolute, which makes solutions less portable. --- modules/packer/custom-image/README.md | 2 +- modules/packer/custom-image/image.pkr.hcl | 6 +++--- modules/packer/custom-image/variables.pkr.hcl | 4 ++-- .../expectations/igc_pkr/one/image/image.pkr.hcl | 6 +++--- .../expectations/igc_pkr/one/image/variables.pkr.hcl | 4 ++-- .../expectations/text_escape/zero/lime/image.pkr.hcl | 6 +++--- .../expectations/text_escape/zero/lime/variables.pkr.hcl | 4 ++-- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/modules/packer/custom-image/README.md b/modules/packer/custom-image/README.md index 827f42ac9b..731b463e10 100644 --- a/modules/packer/custom-image/README.md +++ b/modules/packer/custom-image/README.md @@ -252,7 +252,6 @@ No resources. | [network\_project\_id](#input\_network\_project\_id) | Project ID of Shared VPC network | `string` | `null` | no | | [omit\_external\_ip](#input\_omit\_external\_ip) | Provision the image building VM without a public IP address | `bool` | `true` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except the use of GPUs requires it to be `TERMINATE` | `string` | `null` | no | -| [powershell\_scripts](#input\_powershell\_scripts) | A list of paths to local powershell scripts which will be uploaded to customize the Windows VM image (requires WinRM communicator) | `list(string)` | `[]` | no | | [project\_id](#input\_project\_id) | Project in which to create VM and image | `string` | n/a | yes | | [scopes](#input\_scopes) | Service account scopes to attach to the instance. See
https://cloud.google.com/compute/docs/access/service-accounts. | `list(string)` |
[
"https://www.googleapis.com/auth/userinfo.email",
"https://www.googleapis.com/auth/compute",
"https://www.googleapis.com/auth/devstorage.full_control",
"https://www.googleapis.com/auth/logging.write"
]
| no | | [service\_account\_email](#input\_service\_account\_email) | The service account email to use. If null or 'default', then the default Compute Engine service account will be used. | `string` | `null` | no | @@ -268,6 +267,7 @@ No resources. | [tags](#input\_tags) | Assign network tags to apply firewall rules to VM instance | `list(string)` | `null` | no | | [use\_iap](#input\_use\_iap) | Use IAP proxy when connecting by SSH | `bool` | `true` | no | | [use\_os\_login](#input\_use\_os\_login) | Use OS Login when connecting by SSH | `bool` | `false` | no | +| [windows\_startup\_ps1](#input\_windows\_startup\_ps1) | A list of strings containing PowerShell scripts which will customize a Windows VM image (requires WinRM communicator) | `list(string)` | `[]` | no | | [wrap\_startup\_script](#input\_wrap\_startup\_script) | Wrap startup script with Packer-generated wrapper | `bool` | `true` | no | | [zone](#input\_zone) | Cloud zone in which to provision image building VM | `string` | n/a | yes | diff --git a/modules/packer/custom-image/image.pkr.hcl b/modules/packer/custom-image/image.pkr.hcl index 2ddd6bad1d..fa9a372cc5 100644 --- a/modules/packer/custom-image/image.pkr.hcl +++ b/modules/packer/custom-image/image.pkr.hcl @@ -21,7 +21,7 @@ locals { # default to explicit var.communicator, otherwise in-order: ssh/winrm/none shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" - powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + powershell_script_communicator = length(var.windows_startup_ps1) > 0 ? "winrm" : "" communicator = coalesce( var.communicator, local.shell_script_communicator, @@ -134,9 +134,9 @@ build { # provisioner "powershell" blocks dynamic "provisioner" { labels = ["powershell"] - for_each = var.powershell_scripts + for_each = var.windows_startup_ps1 content { - script = provisioner.value + inline = split("\n", provisioner.value) } } diff --git a/modules/packer/custom-image/variables.pkr.hcl b/modules/packer/custom-image/variables.pkr.hcl index 49c31edc78..9589669153 100644 --- a/modules/packer/custom-image/variables.pkr.hcl +++ b/modules/packer/custom-image/variables.pkr.hcl @@ -156,8 +156,8 @@ variable "shell_scripts" { default = [] } -variable "powershell_scripts" { - description = "A list of paths to local powershell scripts which will be uploaded to customize the Windows VM image (requires WinRM communicator)" +variable "windows_startup_ps1" { + description = "A list of strings containing PowerShell scripts which will customize a Windows VM image (requires WinRM communicator)" type = list(string) default = [] } diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl index 2ddd6bad1d..fa9a372cc5 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/image.pkr.hcl @@ -21,7 +21,7 @@ locals { # default to explicit var.communicator, otherwise in-order: ssh/winrm/none shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" - powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + powershell_script_communicator = length(var.windows_startup_ps1) > 0 ? "winrm" : "" communicator = coalesce( var.communicator, local.shell_script_communicator, @@ -134,9 +134,9 @@ build { # provisioner "powershell" blocks dynamic "provisioner" { labels = ["powershell"] - for_each = var.powershell_scripts + for_each = var.windows_startup_ps1 content { - script = provisioner.value + inline = split("\n", provisioner.value) } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl index 49c31edc78..9589669153 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/one/image/variables.pkr.hcl @@ -156,8 +156,8 @@ variable "shell_scripts" { default = [] } -variable "powershell_scripts" { - description = "A list of paths to local powershell scripts which will be uploaded to customize the Windows VM image (requires WinRM communicator)" +variable "windows_startup_ps1" { + description = "A list of strings containing PowerShell scripts which will customize a Windows VM image (requires WinRM communicator)" type = list(string) default = [] } diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl index 2ddd6bad1d..fa9a372cc5 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/image.pkr.hcl @@ -21,7 +21,7 @@ locals { # default to explicit var.communicator, otherwise in-order: ssh/winrm/none shell_script_communicator = length(var.shell_scripts) > 0 ? "ssh" : "" ansible_playbook_communicator = length(var.ansible_playbooks) > 0 ? "ssh" : "" - powershell_script_communicator = length(var.powershell_scripts) > 0 ? "winrm" : "" + powershell_script_communicator = length(var.windows_startup_ps1) > 0 ? "winrm" : "" communicator = coalesce( var.communicator, local.shell_script_communicator, @@ -134,9 +134,9 @@ build { # provisioner "powershell" blocks dynamic "provisioner" { labels = ["powershell"] - for_each = var.powershell_scripts + for_each = var.windows_startup_ps1 content { - script = provisioner.value + inline = split("\n", provisioner.value) } } diff --git a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl index 49c31edc78..9589669153 100644 --- a/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl +++ b/tools/validate_configs/golden_copies/expectations/text_escape/zero/lime/variables.pkr.hcl @@ -156,8 +156,8 @@ variable "shell_scripts" { default = [] } -variable "powershell_scripts" { - description = "A list of paths to local powershell scripts which will be uploaded to customize the Windows VM image (requires WinRM communicator)" +variable "windows_startup_ps1" { + description = "A list of strings containing PowerShell scripts which will customize a Windows VM image (requires WinRM communicator)" type = list(string) default = [] } From 79ea778b3f62a7d23736607651a23a7b651aae89 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 28 Jun 2023 11:29:52 -0500 Subject: [PATCH 88/92] Update OFE Terraform module - Use pubsub module that is compatible with 4.x google provider - Satisfy linter by providing a source for null provider; add contemporary version constraint while doing so --- community/front-end/ofe/tf/README.md | 6 +++--- community/front-end/ofe/tf/main.tf | 2 +- community/front-end/ofe/tf/versions.tf | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/community/front-end/ofe/tf/README.md b/community/front-end/ofe/tf/README.md index 8c92f61916..faa58140ae 100644 --- a/community/front-end/ofe/tf/README.md +++ b/community/front-end/ofe/tf/README.md @@ -20,14 +20,14 @@ limitations under the License. | [terraform](#requirement\_terraform) | >= 0.13 | | [google](#requirement\_google) | ~> 4.0 | | [google-beta](#requirement\_google-beta) | ~> 4.0 | -| [null](#requirement\_null) | >= 1.0 | +| [null](#requirement\_null) | ~> 3.0 | ## Providers | Name | Version | |------|---------| | [google](#provider\_google) | ~> 4.0 | -| [null](#provider\_null) | >= 1.0 | +| [null](#provider\_null) | ~> 3.0 | ## Modules @@ -35,7 +35,7 @@ limitations under the License. |------|--------|---------| | [control\_bucket](#module\_control\_bucket) | terraform-google-modules/cloud-storage/google | ~> 4.0 | | [network](#module\_network) | ./network | n/a | -| [pubsub](#module\_pubsub) | terraform-google-modules/pubsub/google | ~> 1.8 | +| [pubsub](#module\_pubsub) | terraform-google-modules/pubsub/google | ~> 5.0 | | [service\_account](#module\_service\_account) | terraform-google-modules/service-accounts/google | ~> 4.1 | ## Resources diff --git a/community/front-end/ofe/tf/main.tf b/community/front-end/ofe/tf/main.tf index 89711f2b65..5954117432 100644 --- a/community/front-end/ofe/tf/main.tf +++ b/community/front-end/ofe/tf/main.tf @@ -109,7 +109,7 @@ resource "google_storage_bucket_object" "config_file" { module "pubsub" { source = "terraform-google-modules/pubsub/google" - version = "~> 1.8" + version = "~> 5.0" topic = var.deployment_name project_id = var.project_id diff --git a/community/front-end/ofe/tf/versions.tf b/community/front-end/ofe/tf/versions.tf index c067e0ccc1..e2b36c9849 100644 --- a/community/front-end/ofe/tf/versions.tf +++ b/community/front-end/ofe/tf/versions.tf @@ -27,7 +27,8 @@ terraform { version = "~> 4.0" } null = { - version = ">= 1.0" + source = "hashicorp/null" + version = "~> 3.0" } } } From cee21644e6f4512c1973ca0355e03af67b7d3c83 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 29 Jun 2023 14:40:40 -0500 Subject: [PATCH 89/92] Document known incompatibility of Slurm module with local-ssds --- .../modules/compute/schedmd-slurm-gcp-v5-node-group/README.md | 2 +- .../compute/schedmd-slurm-gcp-v5-node-group/variables.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md index 2ad32a6f22..e6f582ba70 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/README.md @@ -119,7 +119,7 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which the node group instances can be accessed via the internet. |
list(object({
network_tier = string
}))
| `[]` | no | -| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. |
list(object({
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | +| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. (do not use "disk\_type: local-ssd"; known issue being addressed) |
list(object({
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | | [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example. | `bool` | `false` | no | | [disable\_public\_ips](#input\_disable\_public\_ips) | If set to false. The node group VMs will have a random public IP assigned to it. Ignored if access\_config is set. | `bool` | `true` | no | diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf index b77caa08a3..64703bb0dc 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/variables.tf @@ -166,7 +166,7 @@ variable "disk_labels" { } variable "additional_disks" { - description = "Configurations of additional disks to be included on the partition nodes." + description = "Configurations of additional disks to be included on the partition nodes. (do not use \"disk_type: local-ssd\"; known issue being addressed)" type = list(object({ disk_name = string device_name = string From af6bf86dfa95c0990d81b857ba68bd7fd6e1ffbe Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 6 Jul 2023 10:30:16 -0500 Subject: [PATCH 90/92] Update HTCondor tutorial --- docs/tutorials/htcondor.md | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/docs/tutorials/htcondor.md b/docs/tutorials/htcondor.md index 27f8168522..1f7c7b9932 100644 --- a/docs/tutorials/htcondor.md +++ b/docs/tutorials/htcondor.md @@ -20,7 +20,7 @@ Talk with your tutorial leader to see if Google Cloud credits are available. ## Enable APIs & Permissions -In a new Google Cloud project there are several apis that must be enabled to +In a new Google Cloud project there are several APIs that must be enabled to deploy your HPC cluster. These will be caught when you perform `terraform apply` but you can save time by enabling them now by running: @@ -63,8 +63,8 @@ a basic auto-scaling HTCondor pool. * a new VPC network secured from the public internet * an HTCondor Access Point for users to submit jobs * an HTCondor Central Manager that will operate the pool -* a Managed Instance Group to scale a pool of HTCondor Execute Points to serve - new jobs as they are submitted +* 2 Managed Instance Groups for HTCondor Execute Points: 1 is configured with + Spot pricing and the other with On-Demand pricing The blueprint `community/examples/htc-htcondor.yaml` should be open in the Cloud Shell Editor (on the left). @@ -89,15 +89,13 @@ contains the terraform needed to deploy your cluster. Use the following commands to run terraform and deploy your cluster. ```bash -terraform -chdir=htcondor-001/htcondor init -terraform -chdir=htcondor-001/htcondor validate -terraform -chdir=htcondor-001/htcondor apply -auto-approve +./ghpc deploy htcondor-pool --auto-approve ``` -If you receive any errors during `apply`, you may re-run it to resolve them. -The deployment will take about 3 minutes. There should be regular status updates -in the terminal. If the `apply` is successful, a message similar to the -following will be displayed: +The Toolkit will automatically approve provisioning a network, building a VM +image with HTCondor and, finally, the HTCondor pool itself. There will be +regular status updates in the terminal. At the conclusion, a message similar to +the following will be displayed: @@ -111,10 +109,10 @@ Apply complete! Resources: xx added, 0 changed, 0 destroyed. Once terraform has finished, you may SSH to the HTCondor Access Point: ```bash -gcloud compute ssh htcondor001-ap-0 --tunnel-through-iap --project --zone us-central1-c +gcloud compute ssh htcondor-pool-ap-0 --tunnel-through-iap --project --zone us-central1-c ``` -Alternatively, you may browse to the `htcondor001-ap-0` VM and click on "SSH" in +Alternatively, you may browse to the `htcondor-pool-ap-0` VM and click on "SSH" in the Cloud Console at this address: ```text @@ -142,7 +140,7 @@ connect"). Installation may take 5 minutes or more. When it succeeds, you will observe output similar to ```text -htcondor001-ap-0.us-central1-c.c..internal +htcondor-pool-ap-0.us-central1-c.c..internal ``` ## Submit an example job @@ -224,7 +222,7 @@ You should be returned to the Cloud Shell console. You may then destroy your HTCondor pool: ```bash -terraform -chdir=htcondor-001/htcondor destroy -auto-approve +./ghpc destroy htcondor-pool --auto-approve ``` When complete you should see output similar to: From 8dfbecd552cff36a738f2966758e5af06f0c947d Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 6 Jul 2023 13:12:54 -0700 Subject: [PATCH 91/92] Version update 1.19.1 -> 1.20.0 (#1550) --- cmd/root.go | 2 +- community/modules/compute/gke-node-pool/versions.tf | 2 +- .../compute/schedmd-slurm-gcp-v5-node-group/versions.tf | 2 +- .../compute/schedmd-slurm-gcp-v5-partition/versions.tf | 2 +- .../modules/database/slurm-cloudsql-federation/versions.tf | 4 ++-- .../modules/file-system/cloud-storage-bucket/versions.tf | 2 +- .../modules/file-system/gke-persistent-volume/versions.tf | 2 +- community/modules/file-system/nfs-server/versions.tf | 2 +- community/modules/project/service-enablement/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-controller/versions.tf | 2 +- .../scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf | 2 +- community/modules/scheduler/gke-cluster/versions.tf | 2 +- community/modules/scheduler/htcondor-configure/versions.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v5-controller/versions.tf | 2 +- .../modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf | 2 +- community/modules/scripts/wait-for-startup/versions.tf | 2 +- community/modules/scripts/windows-startup-script/versions.tf | 2 +- modules/compute/vm-instance/versions.tf | 4 ++-- modules/file-system/filestore/versions.tf | 4 ++-- modules/monitoring/dashboard/versions.tf | 2 +- modules/network/pre-existing-vpc/versions.tf | 2 +- modules/scheduler/batch-login-node/versions.tf | 2 +- modules/scripts/startup-script/versions.tf | 2 +- 23 files changed, 26 insertions(+), 26 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 25b81ac0e3..1fd2ffd9ad 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -50,7 +50,7 @@ HPC deployments on the Google Cloud Platform.`, log.Fatalf("cmd.Help function failed: %s", err) } }, - Version: "v1.19.1", + Version: "v1.20.0", Annotations: annotation, } ) diff --git a/community/modules/compute/gke-node-pool/versions.tf b/community/modules/compute/gke-node-pool/versions.tf index 045878e228..b6d8925661 100644 --- a/community/modules/compute/gke-node-pool/versions.tf +++ b/community/modules/compute/gke-node-pool/versions.tf @@ -26,6 +26,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-node-pool/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:gke-node-pool/v1.20.0" } } diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/versions.tf index 1a49d06801..ad2ac86f04 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-node-group/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-node-group/v1.20.0" } required_version = ">= 0.13.0" } diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-partition/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v5-partition/versions.tf index 319382700d..7905f526f5 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-partition/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-partition/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-partition/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-partition/v1.20.0" } required_version = ">= 0.13.0" } diff --git a/community/modules/database/slurm-cloudsql-federation/versions.tf b/community/modules/database/slurm-cloudsql-federation/versions.tf index 2a247f4e0b..25f1c7a332 100644 --- a/community/modules/database/slurm-cloudsql-federation/versions.tf +++ b/community/modules/database/slurm-cloudsql-federation/versions.tf @@ -30,10 +30,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.20.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:slurm-cloudsql-federation/v1.20.0" } required_version = ">= 0.13.0" diff --git a/community/modules/file-system/cloud-storage-bucket/versions.tf b/community/modules/file-system/cloud-storage-bucket/versions.tf index c502ae4337..72f75aa07f 100644 --- a/community/modules/file-system/cloud-storage-bucket/versions.tf +++ b/community/modules/file-system/cloud-storage-bucket/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:cloud-storage-bucket/v1.20.0" } required_version = ">= 0.14.0" } diff --git a/community/modules/file-system/gke-persistent-volume/versions.tf b/community/modules/file-system/gke-persistent-volume/versions.tf index 56c950669f..c3c0003c5c 100644 --- a/community/modules/file-system/gke-persistent-volume/versions.tf +++ b/community/modules/file-system/gke-persistent-volume/versions.tf @@ -29,6 +29,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-persistent-volume/v1.18.0" + module_name = "blueprints/terraform/hpc-toolkit:gke-persistent-volume/v1.20.0" } } diff --git a/community/modules/file-system/nfs-server/versions.tf b/community/modules/file-system/nfs-server/versions.tf index 645d41dc84..9230a6d48c 100644 --- a/community/modules/file-system/nfs-server/versions.tf +++ b/community/modules/file-system/nfs-server/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:nfs-server/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/project/service-enablement/versions.tf b/community/modules/project/service-enablement/versions.tf index 2d01317135..0b9170fe1a 100644 --- a/community/modules/project/service-enablement/versions.tf +++ b/community/modules/project/service-enablement/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:service-enablement/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf index 02d5ebd34b..ec48f280bb 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf @@ -16,7 +16,7 @@ terraform { provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-controller/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf index c2d62da384..50fe9e61e3 100644 --- a/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf +++ b/community/modules/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf @@ -16,7 +16,7 @@ terraform { provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:SchedMD-slurm-on-gcp-login-node/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scheduler/gke-cluster/versions.tf b/community/modules/scheduler/gke-cluster/versions.tf index a763888558..fb7b6bf7b6 100644 --- a/community/modules/scheduler/gke-cluster/versions.tf +++ b/community/modules/scheduler/gke-cluster/versions.tf @@ -26,6 +26,6 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:gke-cluster/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:gke-cluster/v1.20.0" } } diff --git a/community/modules/scheduler/htcondor-configure/versions.tf b/community/modules/scheduler/htcondor-configure/versions.tf index 0f755aeeb1..6827118448 100644 --- a/community/modules/scheduler/htcondor-configure/versions.tf +++ b/community/modules/scheduler/htcondor-configure/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:htcondor-configure/v1.20.0" } required_version = ">= 0.13.0" diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/versions.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/versions.tf index b9ddedc830..5c2986b93f 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/versions.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-controller/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-controller/v1.20.0" } required_version = ">= 0.14.0" } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf index afdf62ab5c..d195c77a28 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-login/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v5-login/v1.20.0" } required_version = ">= 0.14.0" } diff --git a/community/modules/scripts/wait-for-startup/versions.tf b/community/modules/scripts/wait-for-startup/versions.tf index be1799aca6..3b8ac805b8 100644 --- a/community/modules/scripts/wait-for-startup/versions.tf +++ b/community/modules/scripts/wait-for-startup/versions.tf @@ -26,7 +26,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:wait-for-startup/v1.20.0" } required_version = ">= 0.14.0" diff --git a/community/modules/scripts/windows-startup-script/versions.tf b/community/modules/scripts/windows-startup-script/versions.tf index 220da92e04..145b1fbfda 100644 --- a/community/modules/scripts/windows-startup-script/versions.tf +++ b/community/modules/scripts/windows-startup-script/versions.tf @@ -16,7 +16,7 @@ terraform { provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:windows-startup-script/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:windows-startup-script/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 9fa052445e..9adc408492 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -30,10 +30,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.20.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.20.0" } required_version = ">= 1.2.0" diff --git a/modules/file-system/filestore/versions.tf b/modules/file-system/filestore/versions.tf index 2ad4eee7d1..779bfb96cf 100644 --- a/modules/file-system/filestore/versions.tf +++ b/modules/file-system/filestore/versions.tf @@ -26,10 +26,10 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.20.0" } provider_meta "google-beta" { - module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:filestore/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/monitoring/dashboard/versions.tf b/modules/monitoring/dashboard/versions.tf index 17a7c1a0b2..2341c79401 100644 --- a/modules/monitoring/dashboard/versions.tf +++ b/modules/monitoring/dashboard/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:dashboard/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/network/pre-existing-vpc/versions.tf b/modules/network/pre-existing-vpc/versions.tf index d952b61492..b763751779 100644 --- a/modules/network/pre-existing-vpc/versions.tf +++ b/modules/network/pre-existing-vpc/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:pre-existing-vpc/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/scheduler/batch-login-node/versions.tf b/modules/scheduler/batch-login-node/versions.tf index 565be47f1f..b1927fdf65 100644 --- a/modules/scheduler/batch-login-node/versions.tf +++ b/modules/scheduler/batch-login-node/versions.tf @@ -22,7 +22,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:batch-login-node/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:batch-login-node/v1.20.0" } required_version = ">= 0.14.0" diff --git a/modules/scripts/startup-script/versions.tf b/modules/scripts/startup-script/versions.tf index e48ca9e18c..38824dcba8 100644 --- a/modules/scripts/startup-script/versions.tf +++ b/modules/scripts/startup-script/versions.tf @@ -30,7 +30,7 @@ terraform { } } provider_meta "google" { - module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.19.1" + module_name = "blueprints/terraform/hpc-toolkit:startup-script/v1.20.0" } required_version = ">= 0.14.0" From 6227b7ca276ffaf86acd1b8fc8121efbc10b1a50 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 6 Jul 2023 14:40:07 -0700 Subject: [PATCH 92/92] Update Django 4.2.2 -> 4.2.3 (#1552) See https://github.com/advisories/GHSA-jh3w-4vvf-mjgr --- community/front-end/ofe/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/front-end/ofe/requirements.txt b/community/front-end/ofe/requirements.txt index 608a70d6e3..ba99fa8c1e 100644 --- a/community/front-end/ofe/requirements.txt +++ b/community/front-end/ofe/requirements.txt @@ -16,7 +16,7 @@ dill==0.3.6 distlib==0.3.6 # django-revproxy==0.11.0 released but not yet in pypi git+https://github.com/jazzband/django-revproxy.git@d2234005135dc0771b7c4e0bb0465664ccfa5787 -Django==4.2.2 +Django==4.2.3 django-allauth==0.54.0 django-extensions==3.2.3 djangorestframework==3.14.0