From 83dc14618a6b4f21aff1d956608a0412536fcffb Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Tue, 6 Aug 2024 18:32:15 +0000 Subject: [PATCH 01/26] Updating vm instance to allow for RDMA nic-types (private preview only --- community/modules/compute/pbspro-execution/README.md | 2 +- community/modules/compute/pbspro-execution/variables.tf | 2 +- .../modules/remote-desktop/chrome-remote-desktop/README.md | 2 +- .../modules/remote-desktop/chrome-remote-desktop/variables.tf | 2 +- modules/compute/vm-instance/variables.tf | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/community/modules/compute/pbspro-execution/README.md b/community/modules/compute/pbspro-execution/README.md index 0dd9b64775..c1460c5e4e 100644 --- a/community/modules/compute/pbspro-execution/README.md +++ b/community/modules/compute/pbspro-execution/README.md @@ -102,7 +102,7 @@ No resources. | [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation | `string` | `"c2-standard-60"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | | [name\_prefix](#input\_name\_prefix) | Name prefix for PBS execution hostnames | `string` | `null` | no | -| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface

**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply
to network interfaces defined in this variable.

Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | +| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface

**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply
to network interfaces defined in this variable.

Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET", "RDMA", "IRDMA", "MRDMA"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. | `string` | `"default"` | no | | [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `null` | no | diff --git a/community/modules/compute/pbspro-execution/variables.tf b/community/modules/compute/pbspro-execution/variables.tf index a67df059d9..4f45d8b3ab 100644 --- a/community/modules/compute/pbspro-execution/variables.tf +++ b/community/modules/compute/pbspro-execution/variables.tf @@ -203,7 +203,7 @@ variable "network_interfaces" { subnetwork (string, required if network is not supplied) subnetwork_project (string, optional) network_ip (string, optional) - nic_type (string, optional, choose from ["GVNIC", "VIRTIO_NET"]) + nic_type (string, optional, choose from ["GVNIC", "VIRTIO_NET", "RDMA", "IRDMA", "MRDMA"]) stack_type (string, optional, choose from ["IPV4_ONLY", "IPV4_IPV6"]) queue_count (number, optional) access_config (object, optional) diff --git a/community/modules/remote-desktop/chrome-remote-desktop/README.md b/community/modules/remote-desktop/chrome-remote-desktop/README.md index 19ab5361f7..82d9f6932e 100644 --- a/community/modules/remote-desktop/chrome-remote-desktop/README.md +++ b/community/modules/remote-desktop/chrome-remote-desktop/README.md @@ -90,7 +90,7 @@ No resources. | [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation. Must be N1 family if GPU is used. | `string` | `"n1-standard-8"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | | [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
If not supplied, `deployment_name` will be used.
When `name_prefix` is supplied, and `add_deployment_name_before_prefix` is set,
then resources are named by "<`deployment_name`>-<`name_prefix`>-<#>". | `string` | `null` | no | -| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface
**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply
to network interfaces defined in this variable.
Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | +| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface
**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply
to network interfaces defined in this variable.
Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET", "RDMA", "IRDMA", "MRDMA"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. | `string` | `"default"` | no | | [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `"TERMINATE"` | no | diff --git a/community/modules/remote-desktop/chrome-remote-desktop/variables.tf b/community/modules/remote-desktop/chrome-remote-desktop/variables.tf index 41916e70ff..040f29c1b0 100644 --- a/community/modules/remote-desktop/chrome-remote-desktop/variables.tf +++ b/community/modules/remote-desktop/chrome-remote-desktop/variables.tf @@ -172,7 +172,7 @@ variable "network_interfaces" { subnetwork (string, required if network is not supplied) subnetwork_project (string, optional) network_ip (string, optional) - nic_type (string, optional, choose from ["GVNIC", "VIRTIO_NET"]) + nic_type (string, optional, choose from ["GVNIC", "VIRTIO_NET", "RDMA", "IRDMA", "MRDMA"]) stack_type (string, optional, choose from ["IPV4_ONLY", "IPV4_IPV6"]) queue_count (number, optional) access_config (object, optional) diff --git a/modules/compute/vm-instance/variables.tf b/modules/compute/vm-instance/variables.tf index f675325187..a6e7f4ab11 100644 --- a/modules/compute/vm-instance/variables.tf +++ b/modules/compute/vm-instance/variables.tf @@ -223,9 +223,9 @@ variable "network_interfaces" { } validation { condition = alltrue([ - for ni in var.network_interfaces : ni.nic_type == "GVNIC" || ni.nic_type == "VIRTIO_NET" || ni.nic_type == null + for ni in var.network_interfaces : contains(["GVNIC", "VIRTIO_NET", "RDMA", "IRDMA", "MRDMA"], coalesce(ni.nic_type, "INVALID")) || ni.nic_type == null ]) - error_message = "In the variable network_interfaces, field \"nic_type\" must be either \"GVNIC\", \"VIRTIO_NET\" or null." + error_message = "In the variable network_interfaces, field \"nic_type\" must be either \"GVNIC\", \"VIRTIO_NET\", \"RDMA\", \"IRDMA\", \"MRDMA\", or null." } validation { condition = alltrue([ From a916e294c6e10d524c3e50151288c4487511b5d8 Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Tue, 20 Aug 2024 20:41:30 +0000 Subject: [PATCH 02/26] Adding specific google-provider and updating modules for network profile Adding documentation and moving RDMA module to community Updated for new version of guest_accelerator --- community/modules/network/rdma-vpc/README.md | 87 +++++++ community/modules/network/rdma-vpc/main.tf | 164 ++++++++++++ .../modules/network/rdma-vpc/metadata.yaml | 19 ++ community/modules/network/rdma-vpc/outputs.tf | 63 +++++ .../modules/network/rdma-vpc/variables.tf | 246 ++++++++++++++++++ .../modules/network/rdma-vpc/versions.tf | 19 ++ .../network/rdma-vpc/vpc-submodule/README.md | 163 ++++++++++++ .../network/rdma-vpc/vpc-submodule/main.tf | 97 +++++++ .../rdma-vpc/vpc-submodule/metadata.yaml | 18 ++ .../network/rdma-vpc/vpc-submodule/outputs.tf | 90 +++++++ .../rdma-vpc/vpc-submodule/variables.tf | 208 +++++++++++++++ .../rdma-vpc/vpc-submodule/versions.tf | 33 +++ modules/compute/vm-instance/README.md | 6 +- modules/compute/vm-instance/main.tf | 28 +- modules/compute/vm-instance/variables.tf | 6 + modules/compute/vm-instance/versions.tf | 9 + 16 files changed, 1252 insertions(+), 4 deletions(-) create mode 100644 community/modules/network/rdma-vpc/README.md create mode 100644 community/modules/network/rdma-vpc/main.tf create mode 100644 community/modules/network/rdma-vpc/metadata.yaml create mode 100644 community/modules/network/rdma-vpc/outputs.tf create mode 100644 community/modules/network/rdma-vpc/variables.tf create mode 100644 community/modules/network/rdma-vpc/versions.tf create mode 100644 community/modules/network/rdma-vpc/vpc-submodule/README.md create mode 100644 community/modules/network/rdma-vpc/vpc-submodule/main.tf create mode 100644 community/modules/network/rdma-vpc/vpc-submodule/metadata.yaml create mode 100644 community/modules/network/rdma-vpc/vpc-submodule/outputs.tf create mode 100644 community/modules/network/rdma-vpc/vpc-submodule/variables.tf create mode 100644 community/modules/network/rdma-vpc/vpc-submodule/versions.tf diff --git a/community/modules/network/rdma-vpc/README.md b/community/modules/network/rdma-vpc/README.md new file mode 100644 index 0000000000..c9380bc914 --- /dev/null +++ b/community/modules/network/rdma-vpc/README.md @@ -0,0 +1,87 @@ +## Description + +This is an experimental VPC module. + +Documentation will be updated at a later point. + +## License + + +Copyright 2022 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 0.15.0 | + +## Providers + +No providers. + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [vpc](#module\_vpc) | ./vpc-submodule | n/a | + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [additional\_subnetworks](#input\_additional\_subnetworks) | DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions | `list(map(string))` | `null` | no | +| [allowed\_ssh\_ip\_ranges](#input\_allowed\_ssh\_ip\_ranges) | A list of CIDR IP ranges from which to allow ssh access | `list(string)` | `[]` | no | +| [default\_primary\_subnetwork\_size](#input\_default\_primary\_subnetwork\_size) | The size, in CIDR bits, of the default primary subnetwork unless explicitly defined in var.subnetworks | `number` | `15` | no | +| [delete\_default\_internet\_gateway\_routes](#input\_delete\_default\_internet\_gateway\_routes) | If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted | `bool` | `false` | no | +| [deployment\_name](#input\_deployment\_name) | The name of the current deployment | `string` | n/a | yes | +| [enable\_iap\_rdp\_ingress](#input\_enable\_iap\_rdp\_ingress) | Enable a firewall rule to allow Windows Remote Desktop Protocol access using IAP tunnels | `bool` | `false` | no | +| [enable\_iap\_ssh\_ingress](#input\_enable\_iap\_ssh\_ingress) | Enable a firewall rule to allow SSH access using IAP tunnels | `bool` | `true` | no | +| [enable\_iap\_winrm\_ingress](#input\_enable\_iap\_winrm\_ingress) | Enable a firewall rule to allow Windows Remote Management (WinRM) access using IAP tunnels | `bool` | `false` | no | +| [enable\_internal\_traffic](#input\_enable\_internal\_traffic) | Enable a firewall rule to allow all internal TCP, UDP, and ICMP traffic within the network | `bool` | `true` | no | +| [extra\_iap\_ports](#input\_extra\_iap\_ports) | A list of TCP ports for which to create firewall rules that enable IAP for TCP forwarding (use dedicated enable\_iap variables for standard ports) | `list(string)` | `[]` | no | +| [firewall\_log\_config](#input\_firewall\_log\_config) | Firewall log configuration for Toolkit firewall rules (var.enable\_iap\_ssh\_ingress and others) | `string` | `"DISABLE_LOGGING"` | no | +| [firewall\_rules](#input\_firewall\_rules) | List of firewall rules | `any` | `[]` | no | +| [mtu](#input\_mtu) | The network MTU (default: 8896). Recommended values: 0 (use Compute Engine default), 1460 (default outside HPC environments), 1500 (Internet default), or 8896 (for Jumbo packets). Allowed are all values in the range 1300 to 8896, inclusively. | `number` | `8896` | no | +| [network\_address\_range](#input\_network\_address\_range) | IP address range (CIDR) for global network | `string` | `"10.0.0.0/9"` | no | +| [network\_description](#input\_network\_description) | An optional description of this resource (changes will trigger resource destroy/create) | `string` | `""` | no | +| [network\_name](#input\_network\_name) | The name of the network to be created (if unsupplied, will default to "{deployment\_name}-net") | `string` | `null` | no | +| [network\_profile](#input\_network\_profile) | Profile name for VPC configuration | `string` | `null` | no | +| [network\_routing\_mode](#input\_network\_routing\_mode) | The network routing mode (default "GLOBAL") | `string` | `"GLOBAL"` | no | +| [primary\_subnetwork](#input\_primary\_subnetwork) | DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions | `map(string)` | `null` | no | +| [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created | `string` | n/a | yes | +| [region](#input\_region) | The default region for Cloud resources | `string` | n/a | yes | +| [secondary\_ranges](#input\_secondary\_ranges) | Secondary ranges that will be used in some of the subnets. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions. | `map(list(object({ range_name = string, ip_cidr_range = string })))` | `{}` | no | +| [shared\_vpc\_host](#input\_shared\_vpc\_host) | Makes this project a Shared VPC host if 'true' (default 'false') | `bool` | `false` | no | +| [subnetwork\_name](#input\_subnetwork\_name) | The name of the network to be created (if unsupplied, will default to "{deployment\_name}-primary-subnet") | `string` | `null` | no | +| [subnetwork\_size](#input\_subnetwork\_size) | DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions | `number` | `null` | no | +| [subnetworks](#input\_subnetworks) | List of subnetworks to create within the VPC. If left empty, it will be
replaced by a single, default subnetwork constructed from other parameters
(e.g. var.region). In all cases, the first subnetwork in the list is identified
by outputs as a "primary" subnetwork.

subnet\_name (string, required, name of subnet)
subnet\_region (string, required, region of subnet)
subnet\_ip (string, mutually exclusive with new\_bits, CIDR-formatted IP range for subnetwork)
new\_bits (number, mutually exclusive with subnet\_ip, CIDR bits used to calculate subnetwork range)
subnet\_private\_access (bool, optional, Enable Private Access on subnetwork)
subnet\_flow\_logs (map(string), optional, Configure Flow Logs see terraform-google-network module)
description (string, optional, Description of Network)
purpose (string, optional, related to Load Balancing)
role (string, optional, related to Load Balancing) | `list(map(string))` | `[]` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| [network\_id](#output\_network\_id) | ID of the new VPC network | +| [network\_name](#output\_network\_name) | Name of the new VPC network | +| [network\_self\_link](#output\_network\_self\_link) | Self link of the new VPC network | +| [subnetwork](#output\_subnetwork) | Primary subnetwork object | +| [subnetwork\_address](#output\_subnetwork\_address) | IP address range of the primary subnetwork | +| [subnetwork\_name](#output\_subnetwork\_name) | Name of the primary subnetwork | +| [subnetwork\_self\_link](#output\_subnetwork\_self\_link) | Self link of the primary subnetwork | +| [subnetworks](#output\_subnetworks) | Full list of subnetwork objects belonging to the new VPC network | + diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf new file mode 100644 index 0000000000..b14aab8834 --- /dev/null +++ b/community/modules/network/rdma-vpc/main.tf @@ -0,0 +1,164 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +locals { + autoname = replace(var.deployment_name, "_", "-") + network_name = var.network_name == null ? "${local.autoname}-net" : var.network_name + subnetwork_name = var.subnetwork_name == null ? "${local.autoname}-primary-subnet" : var.subnetwork_name + + # define a default subnetwork for cases in which no explicit subnetworks are + # defined in var.subnetworks + default_primary_subnetwork_cidr_block = cidrsubnet(var.network_address_range, var.default_primary_subnetwork_size, 0) + default_primary_subnetwork = { + subnet_name = local.subnetwork_name + subnet_ip = local.default_primary_subnetwork_cidr_block + subnet_region = var.region + subnet_private_access = true + subnet_flow_logs = false + description = "primary subnetwork in ${local.network_name}" + purpose = null + role = null + } + + # Identify user-supplied primary subnetwork + # (1) explicit var.subnetworks[0] + # (2) implicit local default subnetwork + input_primary_subnetwork = coalesce(try(var.subnetworks[0], null), local.default_primary_subnetwork) + + # Identify user-supplied additional subnetworks + # (1) explicit var.subnetworks[1:end] + # (2) empty list + input_additional_subnetworks = try(slice(var.subnetworks, 1, length(var.subnetworks)), []) + + # at this point we have constructed a list of subnetworks but need to extract + # user-provided CIDR blocks or calculate them from user-provided new_bits + # after we complete deprecation, local.all_subnetworks can be replaced with + # var.subnetworks (or local.default_primary_subnetwork if that is null) + input_subnetworks = concat([local.input_primary_subnetwork], local.input_additional_subnetworks) + subnetworks_cidr_blocks = try( + local.input_subnetworks[*]["subnet_ip"], + cidrsubnets(var.network_address_range, local.input_subnetworks[*]["new_bits"]...) + ) + + # merge in the CIDR blocks (even when already there) and remove new_bits + subnetworks = [for i, subnet in local.input_subnetworks : + merge({ for k, v in subnet : k => v if k != "new_bits" }, { "subnet_ip" = local.subnetworks_cidr_blocks[i] }) + ] + + # this comprehension should have 1 and only 1 match + output_primary_subnetwork = one([for k, v in module.vpc.subnets : v if k == "${local.subnetworks[0].subnet_region}/${local.subnetworks[0].subnet_name}"]) + output_primary_subnetwork_name = local.output_primary_subnetwork.name + output_primary_subnetwork_self_link = local.output_primary_subnetwork.self_link + output_primary_subnetwork_ip_cidr_range = local.output_primary_subnetwork.ip_cidr_range + + iap_ports = distinct(concat(compact([ + var.enable_iap_rdp_ingress ? "3389" : "", + var.enable_iap_ssh_ingress ? "22" : "", + var.enable_iap_winrm_ingress ? "5986" : "", + ]), var.extra_iap_ports)) + + firewall_log_api_values = { + "DISABLE_LOGGING" = null + "INCLUDE_ALL_METADATA" = { metadata = "INCLUDE_ALL_METADATA" }, + "EXCLUDE_ALL_METADATA" = { metadata = "EXCLUDE_ALL_METADATA" }, + } + firewall_log_config = lookup(local.firewall_log_api_values, var.firewall_log_config, null) + + allow_iap_ingress = { + name = "${local.network_name}-fw-allow-iap-ingress" + description = "allow TCP access via Identity-Aware Proxy" + direction = "INGRESS" + priority = null + ranges = ["35.235.240.0/20"] + source_tags = null + source_service_accounts = null + target_tags = null + target_service_accounts = null + allow = [{ + protocol = "tcp" + ports = local.iap_ports + }] + deny = [] + log_config = local.firewall_log_config + } + + allow_ssh_ingress = { + name = "${local.network_name}-fw-allow-ssh-ingress" + description = "allow SSH access" + direction = "INGRESS" + priority = null + ranges = var.allowed_ssh_ip_ranges + source_tags = null + source_service_accounts = null + target_tags = null + target_service_accounts = null + allow = [{ + protocol = "tcp" + ports = ["22"] + }] + deny = [] + log_config = local.firewall_log_config + } + + allow_internal_traffic = { + name = "${local.network_name}-fw-allow-internal-traffic" + priority = null + description = "allow traffic between nodes of this VPC" + direction = "INGRESS" + ranges = [var.network_address_range] + source_tags = null + source_service_accounts = null + target_tags = null + target_service_accounts = null + allow = [{ + protocol = "tcp" + ports = ["0-65535"] + }, { + protocol = "udp" + ports = ["0-65535"] + }, { + protocol = "icmp" + ports = null + }, + ] + deny = [] + log_config = local.firewall_log_config + } + + firewall_rules = concat( + var.firewall_rules, + length(var.allowed_ssh_ip_ranges) > 0 ? [local.allow_ssh_ingress] : [], + var.enable_internal_traffic ? [local.allow_internal_traffic] : [], + length(local.iap_ports) > 0 ? [local.allow_iap_ingress] : [] + ) +} + +module "vpc" { + source = "./vpc-submodule" + + network_name = local.network_name + project_id = var.project_id + auto_create_subnetworks = false + subnets = local.subnetworks + secondary_ranges = var.secondary_ranges + routing_mode = var.network_routing_mode + mtu = var.mtu + description = var.network_description + shared_vpc_host = var.shared_vpc_host + delete_default_internet_gateway_routes = var.delete_default_internet_gateway_routes + firewall_rules = local.firewall_rules + network_profile = var.network_profile +} diff --git a/community/modules/network/rdma-vpc/metadata.yaml b/community/modules/network/rdma-vpc/metadata.yaml new file mode 100644 index 0000000000..4c2f23a8d7 --- /dev/null +++ b/community/modules/network/rdma-vpc/metadata.yaml @@ -0,0 +1,19 @@ +# Copyright 2023 "Google LLC" +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +spec: + requirements: + services: + - compute.googleapis.com diff --git a/community/modules/network/rdma-vpc/outputs.tf b/community/modules/network/rdma-vpc/outputs.tf new file mode 100644 index 0000000000..bdcd72d195 --- /dev/null +++ b/community/modules/network/rdma-vpc/outputs.tf @@ -0,0 +1,63 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +output "network_name" { + description = "Name of the new VPC network" + value = module.vpc.network_name + depends_on = [module.vpc] +} + +output "network_id" { + description = "ID of the new VPC network" + value = module.vpc.network_id + depends_on = [module.vpc] +} + +output "network_self_link" { + description = "Self link of the new VPC network" + value = module.vpc.network_self_link + depends_on = [module.vpc] +} + +output "subnetworks" { + description = "Full list of subnetwork objects belonging to the new VPC network" + value = module.vpc.subnets + depends_on = [module.vpc] +} + +output "subnetwork" { + description = "Primary subnetwork object" + value = local.output_primary_subnetwork + depends_on = [module.vpc] +} + +output "subnetwork_name" { + description = "Name of the primary subnetwork" + value = local.output_primary_subnetwork_name + depends_on = [module.vpc] +} + +output "subnetwork_self_link" { + description = "Self link of the primary subnetwork" + value = local.output_primary_subnetwork_self_link + depends_on = [module.vpc] +} + +output "subnetwork_address" { + description = "IP address range of the primary subnetwork" + value = local.output_primary_subnetwork_ip_cidr_range + depends_on = [module.vpc] +} diff --git a/community/modules/network/rdma-vpc/variables.tf b/community/modules/network/rdma-vpc/variables.tf new file mode 100644 index 0000000000..34308a8a64 --- /dev/null +++ b/community/modules/network/rdma-vpc/variables.tf @@ -0,0 +1,246 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +variable "project_id" { + description = "Project in which the HPC deployment will be created" + type = string +} + +variable "network_name" { + description = "The name of the network to be created (if unsupplied, will default to \"{deployment_name}-net\")" + type = string + default = null +} + +variable "subnetwork_name" { + description = "The name of the network to be created (if unsupplied, will default to \"{deployment_name}-primary-subnet\")" + type = string + default = null +} + +# tflint-ignore: terraform_unused_declarations +variable "subnetwork_size" { + description = "DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions" + type = number + default = null + validation { + condition = var.subnetwork_size == null + error_message = "subnetwork_size is deprecated. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions." + } +} + +variable "default_primary_subnetwork_size" { + description = "The size, in CIDR bits, of the default primary subnetwork unless explicitly defined in var.subnetworks" + type = number + default = 15 +} + +variable "region" { + description = "The default region for Cloud resources" + type = string +} + +variable "deployment_name" { + description = "The name of the current deployment" + type = string +} + +variable "network_address_range" { + description = "IP address range (CIDR) for global network" + type = string + default = "10.0.0.0/9" + + validation { + condition = can(cidrhost(var.network_address_range, 0)) + error_message = "IP address range must be in CIDR format." + } +} + +variable "mtu" { + type = number + description = "The network MTU (default: 8896). Recommended values: 0 (use Compute Engine default), 1460 (default outside HPC environments), 1500 (Internet default), or 8896 (for Jumbo packets). Allowed are all values in the range 1300 to 8896, inclusively." + default = 8896 +} + +variable "subnetworks" { + description = <<-EOT + List of subnetworks to create within the VPC. If left empty, it will be + replaced by a single, default subnetwork constructed from other parameters + (e.g. var.region). In all cases, the first subnetwork in the list is identified + by outputs as a "primary" subnetwork. + + subnet_name (string, required, name of subnet) + subnet_region (string, required, region of subnet) + subnet_ip (string, mutually exclusive with new_bits, CIDR-formatted IP range for subnetwork) + new_bits (number, mutually exclusive with subnet_ip, CIDR bits used to calculate subnetwork range) + subnet_private_access (bool, optional, Enable Private Access on subnetwork) + subnet_flow_logs (map(string), optional, Configure Flow Logs see terraform-google-network module) + description (string, optional, Description of Network) + purpose (string, optional, related to Load Balancing) + role (string, optional, related to Load Balancing) + EOT + type = list(map(string)) + default = [] + validation { + condition = alltrue([ + for s in var.subnetworks : can(s["subnet_name"]) + ]) + error_message = "All subnetworks must define \"subnet_name\"." + } + validation { + condition = alltrue([ + for s in var.subnetworks : can(s["subnet_region"]) + ]) + error_message = "All subnetworks must define \"subnet_region\"." + } + validation { + condition = alltrue([ + for s in var.subnetworks : can(s["subnet_ip"]) != can(s["new_bits"]) + ]) + error_message = "All subnetworks must define exactly one of \"subnet_ip\" or \"new_bits\"." + } + validation { + condition = alltrue([for s in var.subnetworks : can(s["subnet_ip"])]) || alltrue([for s in var.subnetworks : can(s["new_bits"])]) + error_message = "All subnetworks must make same choice of \"subnet_ip\" or \"new_bits\"." + } +} + +# tflint-ignore: terraform_unused_declarations +variable "primary_subnetwork" { + description = "DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions" + type = map(string) + default = null + validation { + condition = var.primary_subnetwork == null + error_message = "primary_subnetwork is deprecated. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions." + } +} + +# tflint-ignore: terraform_unused_declarations +variable "additional_subnetworks" { + description = "DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions" + type = list(map(string)) + default = null + validation { + condition = var.additional_subnetworks == null + error_message = "additional_subnetworks is deprecated. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions." + } +} + +variable "secondary_ranges" { + type = map(list(object({ range_name = string, ip_cidr_range = string }))) + description = "Secondary ranges that will be used in some of the subnets. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions." + default = {} +} + +variable "network_routing_mode" { + type = string + default = "GLOBAL" + description = "The network routing mode (default \"GLOBAL\")" + + validation { + condition = contains(["GLOBAL", "REGIONAL"], var.network_routing_mode) + error_message = "The network routing mode must either be \"GLOBAL\" or \"REGIONAL\"." + } +} + +variable "network_description" { + type = string + description = "An optional description of this resource (changes will trigger resource destroy/create)" + default = "" +} + +variable "shared_vpc_host" { + type = bool + description = "Makes this project a Shared VPC host if 'true' (default 'false')" + default = false +} + +variable "delete_default_internet_gateway_routes" { + type = bool + description = "If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted" + default = false +} + +variable "enable_iap_ssh_ingress" { + type = bool + description = "Enable a firewall rule to allow SSH access using IAP tunnels" + default = true +} + +variable "enable_iap_rdp_ingress" { + type = bool + description = "Enable a firewall rule to allow Windows Remote Desktop Protocol access using IAP tunnels" + default = false +} + +variable "enable_iap_winrm_ingress" { + type = bool + description = "Enable a firewall rule to allow Windows Remote Management (WinRM) access using IAP tunnels" + default = false +} + +variable "enable_internal_traffic" { + type = bool + description = "Enable a firewall rule to allow all internal TCP, UDP, and ICMP traffic within the network" + default = true +} + +variable "extra_iap_ports" { + type = list(string) + description = "A list of TCP ports for which to create firewall rules that enable IAP for TCP forwarding (use dedicated enable_iap variables for standard ports)" + default = [] +} + +variable "allowed_ssh_ip_ranges" { + type = list(string) + description = "A list of CIDR IP ranges from which to allow ssh access" + default = [] + + validation { + condition = alltrue([for r in var.allowed_ssh_ip_ranges : can(cidrhost(r, 32))]) + error_message = "Each element of var.allowed_ssh_ip_ranges must be a valid CIDR-formatted IPv4 range." + } +} + +variable "firewall_rules" { + type = any + description = "List of firewall rules" + default = [] +} + +variable "firewall_log_config" { + type = string + description = "Firewall log configuration for Toolkit firewall rules (var.enable_iap_ssh_ingress and others)" + default = "DISABLE_LOGGING" + nullable = false + + validation { + condition = contains([ + "INCLUDE_ALL_METADATA", + "EXCLUDE_ALL_METADATA", + "DISABLE_LOGGING", + ], var.firewall_log_config) + error_message = "var.firewall_log_config must be set to \"DISABLE_LOGGING\", or enable logging with \"INCLUDE_ALL_METADATA\" or \"EXCLUDE_ALL_METADATA\"" + } +} + +variable "network_profile" { + # TODO Update this description + description = "Profile name for VPC configuration" + type = string + default = null +} diff --git a/community/modules/network/rdma-vpc/versions.tf b/community/modules/network/rdma-vpc/versions.tf new file mode 100644 index 0000000000..71b7106734 --- /dev/null +++ b/community/modules/network/rdma-vpc/versions.tf @@ -0,0 +1,19 @@ +/** + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +terraform { + required_version = ">= 0.15.0" +} diff --git a/community/modules/network/rdma-vpc/vpc-submodule/README.md b/community/modules/network/rdma-vpc/vpc-submodule/README.md new file mode 100644 index 0000000000..d46d0c365f --- /dev/null +++ b/community/modules/network/rdma-vpc/vpc-submodule/README.md @@ -0,0 +1,163 @@ +# Terraform Network Module + +This is a temporary sub-module to use the new private-preview features in the VPC module: network_profile + + +Copyright 2019 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3 | +| [google](#requirement\_google) | >= 4.64 | +| [google-private](#requirement\_google-private) | >= 0.0.1954 | + +## Providers + +| Name | Version | +|------|---------| +| [google](#provider\_google) | >= 4.64 | +| [google-private](#provider\_google-private) | >= 0.0.1954 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [firewall\_rules](#module\_firewall\_rules) | github.com/terraform-google-modules/terraform-google-network.git//modules/firewall-rules?depth=1&ref=v9.0.0 | n/a | +| [routes](#module\_routes) | github.com/terraform-google-modules/terraform-google-network.git//modules/routes?depth=1&ref=v9.0.0 | n/a | +| [subnets](#module\_subnets) | github.com/terraform-google-modules/terraform-google-network.git//modules/subnets?depth=1&ref=v9.0.0 | n/a | + +## Resources + +| Name | Type | +|------|------| +| [google-private_google_compute_network.network](https://registry.terraform.io/providers/hashicorp/google-private/latest/docs/resources/google_compute_network) | resource | +| [google_compute_shared_vpc_host_project.shared_vpc_host](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_shared_vpc_host_project) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [auto\_create\_subnetworks](#input\_auto\_create\_subnetworks) | When set to true, the network is created in 'auto subnet mode' and it will create a subnet for each region automatically across the 10.128.0.0/9 address range. When set to false, the network is created in 'custom subnet mode' so the user can explicitly connect subnetwork resources. | `bool` | `false` | no | +| [delete\_default\_internet\_gateway\_routes](#input\_delete\_default\_internet\_gateway\_routes) | If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted | `bool` | `false` | no | +| [description](#input\_description) | An optional description of this resource. The resource must be recreated to modify this field. | `string` | `""` | no | +| [egress\_rules](#input\_egress\_rules) | List of egress rules. This will be ignored if variable 'rules' is non-empty |
list(object({
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | +| [enable\_ipv6\_ula](#input\_enable\_ipv6\_ula) | Enabled IPv6 ULA, this is a permanent change and cannot be undone! (default 'false') | `bool` | `false` | no | +| [firewall\_rules](#input\_firewall\_rules) | This is DEPRECATED and available for backward compatibility. Use ingress\_rules and egress\_rules variables. List of firewall rules |
list(object({
name = string
description = optional(string, null)
direction = optional(string, "INGRESS")
disabled = optional(bool, null)
priority = optional(number, null)
ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | +| [ingress\_rules](#input\_ingress\_rules) | List of ingress rules. This will be ignored if variable 'rules' is non-empty |
list(object({
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | +| [internal\_ipv6\_range](#input\_internal\_ipv6\_range) | When enabling IPv6 ULA, optionally, specify a /48 from fd20::/20 (default null) | `string` | `null` | no | +| [mtu](#input\_mtu) | The network MTU (If set to 0, meaning MTU is unset - defaults to '1460'). Recommended values: 1460 (default for historic reasons), 1500 (Internet default), or 8896 (for Jumbo packets). Allowed are all values in the range 1300 to 8896, inclusively. | `number` | `0` | no | +| [network\_firewall\_policy\_enforcement\_order](#input\_network\_firewall\_policy\_enforcement\_order) | Set the order that Firewall Rules and Firewall Policies are evaluated. Valid values are `BEFORE_CLASSIC_FIREWALL` and `AFTER_CLASSIC_FIREWALL`. (default null or equivalent to `AFTER_CLASSIC_FIREWALL`) | `string` | `null` | no | +| [network\_name](#input\_network\_name) | The name of the network being created | `string` | n/a | yes | +| [network\_profile](#input\_network\_profile) | Profile name for VPC configuration | `string` | `null` | no | +| [project\_id](#input\_project\_id) | The ID of the project where this VPC will be created | `string` | n/a | yes | +| [routes](#input\_routes) | List of routes being created in this VPC | `list(map(string))` | `[]` | no | +| [routing\_mode](#input\_routing\_mode) | The network routing mode (default 'GLOBAL') | `string` | `"GLOBAL"` | no | +| [secondary\_ranges](#input\_secondary\_ranges) | Secondary ranges that will be used in some of the subnets | `map(list(object({ range_name = string, ip_cidr_range = string })))` | `{}` | no | +| [shared\_vpc\_host](#input\_shared\_vpc\_host) | Makes this project a Shared VPC host if 'true' (default 'false') | `bool` | `false` | no | +| [subnets](#input\_subnets) | The list of subnets being created |
list(object({
subnet_name = string
subnet_ip = string
subnet_region = string
subnet_private_access = optional(string)
subnet_private_ipv6_access = optional(string)
subnet_flow_logs = optional(string)
subnet_flow_logs_interval = optional(string)
subnet_flow_logs_sampling = optional(string)
subnet_flow_logs_metadata = optional(string)
subnet_flow_logs_filter = optional(string)
subnet_flow_logs_metadata_fields = optional(list(string))
description = optional(string)
purpose = optional(string)
role = optional(string)
stack_type = optional(string)
ipv6_access_type = optional(string)
}))
| n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [network](#output\_network) | The created network | +| [network\_id](#output\_network\_id) | The ID of the VPC being created | +| [network\_name](#output\_network\_name) | The name of the VPC being created | +| [network\_self\_link](#output\_network\_self\_link) | The URI of the VPC being created | +| [project\_id](#output\_project\_id) | VPC project id | +| [route\_names](#output\_route\_names) | The route names associated with this VPC | +| [subnets](#output\_subnets) | A map with keys of form subnet\_region/subnet\_name and values being the outputs of the google\_compute\_subnetwork resources used to create corresponding subnets. | +| [subnets\_flow\_logs](#output\_subnets\_flow\_logs) | Whether the subnets will have VPC flow logs enabled | +| [subnets\_ids](#output\_subnets\_ids) | The IDs of the subnets being created | +| [subnets\_ips](#output\_subnets\_ips) | The IPs and CIDRs of the subnets being created | +| [subnets\_names](#output\_subnets\_names) | The names of the subnets being created | +| [subnets\_private\_access](#output\_subnets\_private\_access) | Whether the subnets will have access to Google API's without a public IP | +| [subnets\_regions](#output\_subnets\_regions) | The region where the subnets will be created | +| [subnets\_secondary\_ranges](#output\_subnets\_secondary\_ranges) | The secondary ranges associated with these subnets | +| [subnets\_self\_links](#output\_subnets\_self\_links) | The self-links of subnets being created | + + +### Subnet Inputs + +The subnets list contains maps, where each object represents a subnet. Each map has the following inputs (please see examples folder for additional references): + +| Name | Description | Type | Default | Required | +| ---------------------------- | --------------------------------------------------------------------------------------------------------------- | :----: | :----------------------: | :------: | +| subnet\_name | The name of the subnet being created | string | - | yes | +| subnet\_ip | The IP and CIDR range of the subnet being created | string | - | yes | +| subnet\_region | The region where the subnet will be created | string | - | yes | +| subnet\_private\_access | Whether this subnet will have private Google access enabled | string | `"false"` | no | +| subnet\_private\_ipv6\_access| The private IPv6 google access type for the VMs in this subnet | string | - | no | +| subnet\_flow\_logs | Whether the subnet will record and send flow log data to logging | string | `"false"` | no | +| subnet\_flow\_logs\_interval | If subnet\_flow\_logs is true, sets the aggregation interval for collecting flow logs | string | `"INTERVAL_5_SEC"` | no | +| subnet\_flow\_logs\_sampling | If subnet\_flow\_logs is true, set the sampling rate of VPC flow logs within the subnetwork | string | `"0.5"` | no | +| subnet\_flow\_logs\_metadata | If subnet\_flow\_logs is true, configures whether metadata fields should be added to the reported VPC flow logs | string | `"INCLUDE_ALL_METADATA"` | no | +| subnet\_flow\_logs\_filter | Export filter defining which VPC flow logs should be logged, see https://cloud.google.com/vpc/docs/flow-logs#filtering for formatting details | string | `"true"` | no | +| subnet\_flow\_logs\_metadata\_fields | List of metadata fields that should be added to reported logs. Can only be specified if VPC flow logs for this subnetwork is enabled and "metadata" is set to CUSTOM_METADATA. | any | - | no | +| description | An optional description of this resource. Provide this property when you create the resource. This field can be set only at resource creation time | string | - | no | +| purpose | The purpose of the subnet usage. Whether it is to be used as a regular subnet or for proxy or loadbalacing purposes, see https://cloud.google.com/vpc/docs/subnets#purpose for more details | string | `"PRIVATE"` | no | +| role | The role of the subnet when using it as a proxy or loadbalancer network. Whether it is to be used as the active or as a backup subnet, see https://cloud.google.com/load-balancing/docs/proxy-only-subnets#proxy_only_subnet_create for more details | string | - | no | +| stack\_type | `IPV4_ONLY` or `IPV4_IPV6` for dual-stack networking | string | - | no | +| ipv6\_access\_type | `INTERNAL` or `EXTERNAL`. `INTERNAL` requires ULA be enabled on the VPC | string | - | no | + +### Route Inputs + +The routes list contains maps, where each object represents a route. For the next_hop_\* inputs, only one is possible to be used in each route. Having two next_hop_\* inputs will produce an error. Each map has the following inputs (please see examples folder for additional references): + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| name | The name of the route being created | string | - | no | +| description | The description of the route being created | string | - | no | +| tags | The network tags assigned to this route. This is a list in string format. Eg. "tag-01,tag-02"| string | - | yes | +| destination\_range | The destination range of outgoing packets that this route applies to. Only IPv4 is supported | string | - | yes +| next\_hop\_internet | Whether the next hop to this route will the default internet gateway. Use "true" to enable this as next hop | string | `"false"` | yes | +| next\_hop\_ip | Network IP address of an instance that should handle matching packets | string | - | yes | +| next\_hop\_instance | URL or name of an instance that should handle matching packets. If just name is specified "next\_hop\_instance\_zone" is required | string | - | yes | +| next\_hop\_instance\_zone | The zone of the instance specified in next\_hop\_instance. Only required if next\_hop\_instance is specified as a name | string | - | no | +| next\_hop\_vpn\_tunnel | URL to a VpnTunnel that should handle matching packets | string | - | yes | +| priority | The priority of this route. Priority is used to break ties in cases where there is more than one matching route of equal prefix length. In the case of two routes with equal prefix length, the one with the lowest-numbered priority value wins | string | `"1000"` | yes | + +## Requirements +### Installed Software + +- [Terraform](https://www.terraform.io/downloads.html) >= 1.3 +- [Terraform Provider for GCP](https://github.com/terraform-providers/terraform-provider-google) >= 4.25 +- [Terraform Provider for GCP Beta](https://github.com/terraform-providers/terraform-provider-google-beta) >= 4.25 +- [gcloud](https://cloud.google.com/sdk/gcloud/) >243.0.0 + +### Configure a Service Account +In order to execute this module you must have a Service Account with the following roles: + +- roles/compute.networkAdmin on the organization or folder + +If you are going to manage a Shared VPC, you must have either: + +- roles/compute.xpnAdmin on the organization +- roles/compute.xpnAdmin on the folder (beta) + +### Enable API's +In order to operate with the Service Account you must activate the following API on the project where the Service Account was created: + +- Compute Engine API - compute.googleapis.com + +## Contributing + +Refer to the [contribution guidelines](./CONTRIBUTING.md) for +information on contributing to this module. + +[terraform-0.13-upgrade]: https://www.terraform.io/upgrade-guides/0-13.html +[2.6.0]: https://registry.terraform.io/modules/terraform-google-modules/network/google/2.6.0 diff --git a/community/modules/network/rdma-vpc/vpc-submodule/main.tf b/community/modules/network/rdma-vpc/vpc-submodule/main.tf new file mode 100644 index 0000000000..e524a4cbff --- /dev/null +++ b/community/modules/network/rdma-vpc/vpc-submodule/main.tf @@ -0,0 +1,97 @@ +/** + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/****************************************** + VPC configuration + *****************************************/ +resource "google_compute_network" "network" { + provider = google-private + name = var.network_name + auto_create_subnetworks = var.auto_create_subnetworks + routing_mode = var.routing_mode + project = var.project_id + description = var.description + delete_default_routes_on_create = var.delete_default_internet_gateway_routes + mtu = var.mtu + enable_ula_internal_ipv6 = var.enable_ipv6_ula + internal_ipv6_range = var.internal_ipv6_range + network_firewall_policy_enforcement_order = var.network_firewall_policy_enforcement_order + network_profile = var.network_profile +} + +/****************************************** + Shared VPC + *****************************************/ +resource "google_compute_shared_vpc_host_project" "shared_vpc_host" { + count = var.shared_vpc_host ? 1 : 0 + project = var.project_id + depends_on = [google_compute_network.network] +} + + +/****************************************** + Subnet configuration + *****************************************/ +module "subnets" { + source = "github.com/terraform-google-modules/terraform-google-network.git//modules/subnets?depth=1&ref=v9.0.0" + project_id = var.project_id + network_name = google_compute_network.network.name + subnets = var.subnets + secondary_ranges = var.secondary_ranges +} + +/****************************************** + Routes + *****************************************/ +module "routes" { + source = "github.com/terraform-google-modules/terraform-google-network.git//modules/routes?depth=1&ref=v9.0.0" + project_id = var.project_id + network_name = google_compute_network.network.name + routes = var.routes + module_depends_on = [module.subnets.subnets] +} + +/****************************************** + Firewall rules + *****************************************/ +locals { + rules = [ + for f in var.firewall_rules : { + name = f.name + direction = f.direction + disabled = lookup(f, "disabled", null) + priority = lookup(f, "priority", null) + description = lookup(f, "description", null) + ranges = lookup(f, "ranges", null) + source_tags = lookup(f, "source_tags", null) + source_service_accounts = lookup(f, "source_service_accounts", null) + target_tags = lookup(f, "target_tags", null) + target_service_accounts = lookup(f, "target_service_accounts", null) + allow = lookup(f, "allow", []) + deny = lookup(f, "deny", []) + log_config = lookup(f, "log_config", null) + } + ] +} + +module "firewall_rules" { + source = "github.com/terraform-google-modules/terraform-google-network.git//modules/firewall-rules?depth=1&ref=v9.0.0" + project_id = var.project_id + network_name = google_compute_network.network.name + rules = local.rules + ingress_rules = var.ingress_rules + egress_rules = var.egress_rules +} diff --git a/community/modules/network/rdma-vpc/vpc-submodule/metadata.yaml b/community/modules/network/rdma-vpc/vpc-submodule/metadata.yaml new file mode 100644 index 0000000000..dad26c8e20 --- /dev/null +++ b/community/modules/network/rdma-vpc/vpc-submodule/metadata.yaml @@ -0,0 +1,18 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +spec: + requirements: + services: + - compute.googleapis.com diff --git a/community/modules/network/rdma-vpc/vpc-submodule/outputs.tf b/community/modules/network/rdma-vpc/vpc-submodule/outputs.tf new file mode 100644 index 0000000000..822bbcbdec --- /dev/null +++ b/community/modules/network/rdma-vpc/vpc-submodule/outputs.tf @@ -0,0 +1,90 @@ +/** + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "network" { + value = google_compute_network.network + description = "The created network" +} + +output "subnets" { + value = module.subnets.subnets + description = "A map with keys of form subnet_region/subnet_name and values being the outputs of the google_compute_subnetwork resources used to create corresponding subnets." +} + +output "network_name" { + value = google_compute_network.network.name + description = "The name of the VPC being created" +} + +output "network_id" { + value = google_compute_network.network.id + description = "The ID of the VPC being created" +} + +output "network_self_link" { + value = google_compute_network.network.self_link + description = "The URI of the VPC being created" +} + +output "project_id" { + value = google_compute_network.network.project + description = "VPC project id" +} + +output "subnets_names" { + value = [for network in module.subnets.subnets : network.name] + description = "The names of the subnets being created" +} + +output "subnets_ids" { + value = [for network in module.subnets.subnets : network.id] + description = "The IDs of the subnets being created" +} + +output "subnets_ips" { + value = [for network in module.subnets.subnets : network.ip_cidr_range] + description = "The IPs and CIDRs of the subnets being created" +} + +output "subnets_self_links" { + value = [for network in module.subnets.subnets : network.self_link] + description = "The self-links of subnets being created" +} + +output "subnets_regions" { + value = [for network in module.subnets.subnets : network.region] + description = "The region where the subnets will be created" +} + +output "subnets_private_access" { + value = [for network in module.subnets.subnets : network.private_ip_google_access] + description = "Whether the subnets will have access to Google API's without a public IP" +} + +output "subnets_flow_logs" { + value = [for network in module.subnets.subnets : length(network.log_config) != 0 ? true : false] + description = "Whether the subnets will have VPC flow logs enabled" +} + +output "subnets_secondary_ranges" { + value = [for network in module.subnets.subnets : network.secondary_ip_range] + description = "The secondary ranges associated with these subnets" +} + +output "route_names" { + value = [for route in module.routes.routes : route.name] + description = "The route names associated with this VPC" +} diff --git a/community/modules/network/rdma-vpc/vpc-submodule/variables.tf b/community/modules/network/rdma-vpc/vpc-submodule/variables.tf new file mode 100644 index 0000000000..cf767f2fe8 --- /dev/null +++ b/community/modules/network/rdma-vpc/vpc-submodule/variables.tf @@ -0,0 +1,208 @@ +/** + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "project_id" { + description = "The ID of the project where this VPC will be created" + type = string +} + +variable "network_name" { + description = "The name of the network being created" + type = string +} + +variable "routing_mode" { + type = string + default = "GLOBAL" + description = "The network routing mode (default 'GLOBAL')" +} + +variable "shared_vpc_host" { + type = bool + description = "Makes this project a Shared VPC host if 'true' (default 'false')" + default = false +} + +variable "subnets" { + type = list(object({ + subnet_name = string + subnet_ip = string + subnet_region = string + subnet_private_access = optional(string) + subnet_private_ipv6_access = optional(string) + subnet_flow_logs = optional(string) + subnet_flow_logs_interval = optional(string) + subnet_flow_logs_sampling = optional(string) + subnet_flow_logs_metadata = optional(string) + subnet_flow_logs_filter = optional(string) + subnet_flow_logs_metadata_fields = optional(list(string)) + description = optional(string) + purpose = optional(string) + role = optional(string) + stack_type = optional(string) + ipv6_access_type = optional(string) + })) + description = "The list of subnets being created" +} + +variable "secondary_ranges" { + type = map(list(object({ range_name = string, ip_cidr_range = string }))) + description = "Secondary ranges that will be used in some of the subnets" + default = {} +} + +variable "routes" { + type = list(map(string)) + description = "List of routes being created in this VPC" + default = [] +} + +variable "firewall_rules" { + type = list(object({ + name = string + description = optional(string, null) + direction = optional(string, "INGRESS") + disabled = optional(bool, null) + priority = optional(number, null) + ranges = optional(list(string), []) + source_tags = optional(list(string)) + source_service_accounts = optional(list(string)) + target_tags = optional(list(string)) + target_service_accounts = optional(list(string)) + + allow = optional(list(object({ + protocol = string + ports = optional(list(string)) + })), []) + deny = optional(list(object({ + protocol = string + ports = optional(list(string)) + })), []) + log_config = optional(object({ + metadata = string + })) + })) + description = "This is DEPRECATED and available for backward compatibility. Use ingress_rules and egress_rules variables. List of firewall rules" + default = [] +} + +variable "delete_default_internet_gateway_routes" { + type = bool + description = "If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted" + default = false +} + + +variable "description" { + type = string + description = "An optional description of this resource. The resource must be recreated to modify this field." + default = "" +} + +variable "auto_create_subnetworks" { + type = bool + description = "When set to true, the network is created in 'auto subnet mode' and it will create a subnet for each region automatically across the 10.128.0.0/9 address range. When set to false, the network is created in 'custom subnet mode' so the user can explicitly connect subnetwork resources." + default = false +} + +variable "mtu" { + type = number + description = "The network MTU (If set to 0, meaning MTU is unset - defaults to '1460'). Recommended values: 1460 (default for historic reasons), 1500 (Internet default), or 8896 (for Jumbo packets). Allowed are all values in the range 1300 to 8896, inclusively." + default = 0 +} + +variable "ingress_rules" { + description = "List of ingress rules. This will be ignored if variable 'rules' is non-empty" + default = [] + type = list(object({ + name = string + description = optional(string, null) + disabled = optional(bool, null) + priority = optional(number, null) + destination_ranges = optional(list(string), []) + source_ranges = optional(list(string), []) + source_tags = optional(list(string)) + source_service_accounts = optional(list(string)) + target_tags = optional(list(string)) + target_service_accounts = optional(list(string)) + + allow = optional(list(object({ + protocol = string + ports = optional(list(string)) + })), []) + deny = optional(list(object({ + protocol = string + ports = optional(list(string)) + })), []) + log_config = optional(object({ + metadata = string + })) + })) +} + +variable "egress_rules" { + description = "List of egress rules. This will be ignored if variable 'rules' is non-empty" + default = [] + type = list(object({ + name = string + description = optional(string, null) + disabled = optional(bool, null) + priority = optional(number, null) + destination_ranges = optional(list(string), []) + source_ranges = optional(list(string), []) + source_tags = optional(list(string)) + source_service_accounts = optional(list(string)) + target_tags = optional(list(string)) + target_service_accounts = optional(list(string)) + + allow = optional(list(object({ + protocol = string + ports = optional(list(string)) + })), []) + deny = optional(list(object({ + protocol = string + ports = optional(list(string)) + })), []) + log_config = optional(object({ + metadata = string + })) + })) +} + +variable "enable_ipv6_ula" { + type = bool + description = "Enabled IPv6 ULA, this is a permanent change and cannot be undone! (default 'false')" + default = false +} + +variable "internal_ipv6_range" { + type = string + default = null + description = "When enabling IPv6 ULA, optionally, specify a /48 from fd20::/20 (default null)" +} + +variable "network_firewall_policy_enforcement_order" { + type = string + default = null + description = "Set the order that Firewall Rules and Firewall Policies are evaluated. Valid values are `BEFORE_CLASSIC_FIREWALL` and `AFTER_CLASSIC_FIREWALL`. (default null or equivalent to `AFTER_CLASSIC_FIREWALL`)" +} + +variable "network_profile" { + # TODO Update this description + description = "Profile name for VPC configuration" + type = string + default = null +} diff --git a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf new file mode 100644 index 0000000000..8285c7f341 --- /dev/null +++ b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf @@ -0,0 +1,33 @@ +/** + * Copyright 2019 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +terraform { + required_version = ">= 1.3" + required_providers { + google = { + source = "hashicorp/google" + version = ">= 4.64" + } + google-private = { + source = "hashicorp/google-private" + version = ">= 0.0.1954" + } + } + + provider_meta "google" { + module_name = "blueprints/terraform/terraform-google-network/v9.1.0" + } +} diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index fb3fde84e6..847f2f3398 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -171,6 +171,7 @@ limitations under the License. | [terraform](#requirement\_terraform) | >= 1.3.0 | | [google](#requirement\_google) | >= 4.73.0 | | [google-beta](#requirement\_google-beta) | >= 4.73.0 | +| [google-private](#requirement\_google-private) | >= 0.0.1954 | | [null](#requirement\_null) | >= 3.0 | ## Providers @@ -179,6 +180,7 @@ limitations under the License. |------|---------| | [google](#provider\_google) | >= 4.73.0 | | [google-beta](#provider\_google-beta) | >= 4.73.0 | +| [google-private](#provider\_google-private) | >= 0.0.1954 | | [null](#provider\_null) | >= 3.0 | ## Modules @@ -191,12 +193,13 @@ limitations under the License. | Name | Type | |------|------| -| [google-beta_google_compute_instance.compute_vm](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_compute_instance) | resource | | [google-beta_google_compute_resource_policy.placement_policy](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_compute_resource_policy) | resource | +| [google-private_google_compute_instance.compute_vm](https://registry.terraform.io/providers/hashicorp/google-private/latest/docs/resources/google_compute_instance) | resource | | [google_compute_address.compute_ip](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_address) | resource | | [google_compute_disk.boot_disk](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource | | [null_resource.image](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [null_resource.replace_vm_trigger_from_placement](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | +| [google-beta_google_compute_resource_policy.pre_existing_placement_policy](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/data-sources/google_compute_resource_policy) | data source | | [google_compute_image.compute_image](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | ## Inputs @@ -229,6 +232,7 @@ limitations under the License. | [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `null` | no | | [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone.
See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_resource_policy#nested_group_placement_policy | `any` | `null` | no | +| [pre\_existing\_placement\_policy](#input\_pre\_existing\_placement\_policy) | Use pre-existing placement policy within the project specified | `string` | `null` | no | | [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created | `string` | n/a | yes | | [region](#input\_region) | The region to deploy to | `string` | n/a | yes | | [service\_account](#input\_service\_account) | DEPRECATED - Use `service_account_email` and `service_account_scopes` instead. |
object({
email = string,
scopes = set(string)
})
| `null` | no | diff --git a/modules/compute/vm-instance/main.tf b/modules/compute/vm-instance/main.tf index 683fa77682..d80430e9d4 100644 --- a/modules/compute/vm-instance/main.tf +++ b/modules/compute/vm-instance/main.tf @@ -123,6 +123,15 @@ resource "google_compute_disk" "boot_disk" { } } +data "google_compute_resource_policy" "pre_existing_placement_policy" { + project = var.project_id + provider = google-beta + + count = var.pre_existing_placement_policy != null ? 1 : 0 + name = var.pre_existing_placement_policy + region = var.region +} + resource "google_compute_resource_policy" "placement_policy" { project = var.project_id provider = google-beta @@ -165,7 +174,7 @@ resource "google_compute_address" "compute_ip" { resource "google_compute_instance" "compute_vm" { project = var.project_id - provider = google-beta + provider = google-private count = var.instance_count @@ -176,7 +185,8 @@ resource "google_compute_instance" "compute_vm" { machine_type = var.machine_type zone = var.zone - resource_policies = google_compute_resource_policy.placement_policy[*].self_link + resource_policies = coalesce(google_compute_resource_policy.placement_policy[*].self_link, + data.google_compute_resource_policy.pre_existing_placement_policy[*].self_link) tags = var.tags labels = local.labels @@ -239,7 +249,14 @@ resource "google_compute_instance" "compute_vm" { scopes = var.service_account_scopes } - guest_accelerator = local.guest_accelerator + dynamic "guest_accelerator" { + for_each = local.guest_accelerator + content { + count = guest_accelerator.count + type = guest_accelerator.type + } + } + scheduling { on_host_maintenance = local.on_host_maintenance automatic_restart = local.automatic_restart @@ -290,5 +307,10 @@ resource "google_compute_instance" "compute_vm" { ], "${substr(var.machine_type, 0, 3)}:${var.disk_type}") error_message = "A disk_type=${var.disk_type} cannot be used with machine_type=${var.machine_type}." } + precondition { + condition = (length(google_compute_resource_policy.placement_policy) == 0 || + length(data.google_compute_resource_policy.pre_existing_placement_policy) == 0) + error_message = "Pre-existing placement policy and placement policy variables are mutually exclusive" + } } } diff --git a/modules/compute/vm-instance/variables.tf b/modules/compute/vm-instance/variables.tf index a6e7f4ab11..717667f104 100644 --- a/modules/compute/vm-instance/variables.tf +++ b/modules/compute/vm-instance/variables.tf @@ -327,6 +327,12 @@ variable "placement_policy" { } } +variable "pre_existing_placement_policy" { + description = "Use pre-existing placement policy within the project specified" + type = string + default = null +} + variable "spot" { description = "Provision VMs using discounted Spot pricing, allowing for preemption" type = bool diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 1b46a4e5e1..1d5f27887a 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -25,6 +25,12 @@ terraform { source = "hashicorp/google-beta" version = ">= 4.73.0" } + + google-private = { + source = "hashicorp/google-private" + version = ">= 0.0.1954" + } + null = { source = "hashicorp/null" version = ">= 3.0" @@ -36,6 +42,9 @@ terraform { provider_meta "google-beta" { module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.38.0" } + provider_meta "google-private" { + module_name = "blueprints/terraform/hpc-toolkit:vm-instance/v1.38.0" + } required_version = ">= 1.3.0" } From 7507dc5c80181880541c2d6f1cedb15e654c3347 Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Tue, 27 Aug 2024 14:54:05 +0000 Subject: [PATCH 03/26] Updated version to match functional version of google-private --- modules/compute/vm-instance/README.md | 4 ++-- modules/compute/vm-instance/versions.tf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index 847f2f3398..6eaa80f3f7 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -171,7 +171,7 @@ limitations under the License. | [terraform](#requirement\_terraform) | >= 1.3.0 | | [google](#requirement\_google) | >= 4.73.0 | | [google-beta](#requirement\_google-beta) | >= 4.73.0 | -| [google-private](#requirement\_google-private) | >= 0.0.1954 | +| [google-private](#requirement\_google-private) | >= 0.0.1960 | | [null](#requirement\_null) | >= 3.0 | ## Providers @@ -180,7 +180,7 @@ limitations under the License. |------|---------| | [google](#provider\_google) | >= 4.73.0 | | [google-beta](#provider\_google-beta) | >= 4.73.0 | -| [google-private](#provider\_google-private) | >= 0.0.1954 | +| [google-private](#provider\_google-private) | >= 0.0.1960 | | [null](#provider\_null) | >= 3.0 | ## Modules diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 1d5f27887a..52e858aa35 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -28,7 +28,7 @@ terraform { google-private = { source = "hashicorp/google-private" - version = ">= 0.0.1954" + version = ">= 0.0.1960" } null = { From fcffeb483942df6a42b7fb43b230053c2fb5577c Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Thu, 29 Aug 2024 13:19:30 +0000 Subject: [PATCH 04/26] Updating slurm to use new branch for experimental features --- .../compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md | 2 +- .../compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf | 2 +- community/modules/network/rdma-vpc/vpc-submodule/README.md | 2 +- .../scheduler/schedmd-slurm-gcp-v6-controller/README.md | 6 +++--- .../scheduler/schedmd-slurm-gcp-v6-controller/controller.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v6-controller/login.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v6-controller/partition.tf | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md index 864b8933ad..6f5aeb4000 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md @@ -74,7 +74,7 @@ modules. For support with the underlying modules, see the instructions in the | Name | Source | Version | |------|--------|---------| -| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 6.6.1 | +| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 016b76c | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf index f064171b67..f91a6d754a 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf @@ -61,7 +61,7 @@ data "google_compute_default_service_account" "default" { module "slurm_nodeset_template" { - source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=6.6.1" + source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=016b76c" project_id = var.project_id region = var.region diff --git a/community/modules/network/rdma-vpc/vpc-submodule/README.md b/community/modules/network/rdma-vpc/vpc-submodule/README.md index d46d0c365f..a87f9758ac 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/README.md +++ b/community/modules/network/rdma-vpc/vpc-submodule/README.md @@ -1,6 +1,6 @@ # Terraform Network Module -This is a temporary sub-module to use the new private-preview features in the VPC module: network_profile +This is a temporary sub-module to use the new experimental features in the VPC module: network_profile Copyright 2019 Google LLC diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index dc16e79894..036adfa2d0 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -197,11 +197,11 @@ limitations under the License. | [bucket](#module\_bucket) | terraform-google-modules/cloud-storage/google | ~> 5.0 | | [daos\_network\_storage\_scripts](#module\_daos\_network\_storage\_scripts) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | v1.36.0&depth=1 | | [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/_slurm_instance | 6.6.1 | -| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 6.6.1 | +| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 016b76c | | [slurm\_files](#module\_slurm\_files) | ./modules/slurm_files | n/a | | [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/_slurm_instance | 6.6.1 | -| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 6.6.1 | -| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 6.6.1 | +| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 016b76c | +| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 016b76c | | [slurm\_nodeset\_tpu](#module\_slurm\_nodeset\_tpu) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_nodeset_tpu | 6.6.1 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf index d2c345cf8d..9a9bb47ad9 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf @@ -44,7 +44,7 @@ locals { # INSTANCE TEMPLATE module "slurm_controller_template" { - source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=6.6.1" + source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=016b76c" project_id = var.project_id region = var.region diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf index e693dc22f8..4e0d651c3c 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf @@ -14,7 +14,7 @@ # TEMPLATE module "slurm_login_template" { - source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=6.6.1" + source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=016b76c" for_each = { for x in var.login_nodes : x.name_prefix => x } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf index d1f783f64a..916c40a9e1 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf @@ -26,7 +26,7 @@ locals { # NODESET # TODO: remove dependency on slurm-gcp repo, move to local template module module "slurm_nodeset_template" { - source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=6.6.1" + source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=016b76c" for_each = local.nodeset_map project_id = var.project_id From 3ddad0b5688a5d90436c4deddf4ff9d98aba946b Mon Sep 17 00:00:00 2001 From: annuay Date: Thu, 5 Sep 2024 12:56:31 +0000 Subject: [PATCH 05/26] create some delta From d6ea3359dbe414b783f5377a95b4c864e5105b32 Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Mon, 9 Sep 2024 13:55:08 +0000 Subject: [PATCH 06/26] Updating private provider versions and slurm-gcp references --- .../compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md | 2 +- .../compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf | 2 +- community/modules/network/rdma-vpc/vpc-submodule/README.md | 4 ++-- .../modules/network/rdma-vpc/vpc-submodule/versions.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v6-controller/README.md | 6 +++--- .../scheduler/schedmd-slurm-gcp-v6-controller/controller.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v6-controller/login.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v6-controller/partition.tf | 2 +- modules/compute/vm-instance/README.md | 4 ++-- modules/compute/vm-instance/versions.tf | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md index 6f5aeb4000..c17f5b0965 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md @@ -74,7 +74,7 @@ modules. For support with the underlying modules, see the instructions in the | Name | Source | Version | |------|--------|---------| -| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 016b76c | +| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 2aa6ad1 | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf index f91a6d754a..c981886df1 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf @@ -61,7 +61,7 @@ data "google_compute_default_service_account" "default" { module "slurm_nodeset_template" { - source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=016b76c" + source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=2aa6ad1" project_id = var.project_id region = var.region diff --git a/community/modules/network/rdma-vpc/vpc-submodule/README.md b/community/modules/network/rdma-vpc/vpc-submodule/README.md index a87f9758ac..470062baf7 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/README.md +++ b/community/modules/network/rdma-vpc/vpc-submodule/README.md @@ -23,14 +23,14 @@ limitations under the License. |------|---------| | [terraform](#requirement\_terraform) | >= 1.3 | | [google](#requirement\_google) | >= 4.64 | -| [google-private](#requirement\_google-private) | >= 0.0.1954 | +| [google-private](#requirement\_google-private) | >= 0.0.1962 | ## Providers | Name | Version | |------|---------| | [google](#provider\_google) | >= 4.64 | -| [google-private](#provider\_google-private) | >= 0.0.1954 | +| [google-private](#provider\_google-private) | >= 0.0.1962 | ## Modules diff --git a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf index 8285c7f341..a4425f0a46 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf +++ b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf @@ -23,7 +23,7 @@ terraform { } google-private = { source = "hashicorp/google-private" - version = ">= 0.0.1954" + version = ">= 0.0.1962" } } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index 036adfa2d0..c0a2ea5f5b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -197,11 +197,11 @@ limitations under the License. | [bucket](#module\_bucket) | terraform-google-modules/cloud-storage/google | ~> 5.0 | | [daos\_network\_storage\_scripts](#module\_daos\_network\_storage\_scripts) | github.com/GoogleCloudPlatform/hpc-toolkit//modules/scripts/startup-script | v1.36.0&depth=1 | | [slurm\_controller\_instance](#module\_slurm\_controller\_instance) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/_slurm_instance | 6.6.1 | -| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 016b76c | +| [slurm\_controller\_template](#module\_slurm\_controller\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 2aa6ad1 | | [slurm\_files](#module\_slurm\_files) | ./modules/slurm_files | n/a | | [slurm\_login\_instance](#module\_slurm\_login\_instance) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/_slurm_instance | 6.6.1 | -| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 016b76c | -| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 016b76c | +| [slurm\_login\_template](#module\_slurm\_login\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 2aa6ad1 | +| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template | 2aa6ad1 | | [slurm\_nodeset\_tpu](#module\_slurm\_nodeset\_tpu) | github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_nodeset_tpu | 6.6.1 | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf index 9a9bb47ad9..efa10b589e 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf @@ -44,7 +44,7 @@ locals { # INSTANCE TEMPLATE module "slurm_controller_template" { - source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=016b76c" + source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=2aa6ad1" project_id = var.project_id region = var.region diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf index 4e0d651c3c..fc7a89d89d 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf @@ -14,7 +14,7 @@ # TEMPLATE module "slurm_login_template" { - source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=016b76c" + source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=2aa6ad1" for_each = { for x in var.login_nodes : x.name_prefix => x } diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf index 916c40a9e1..ecb397e858 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf @@ -26,7 +26,7 @@ locals { # NODESET # TODO: remove dependency on slurm-gcp repo, move to local template module module "slurm_nodeset_template" { - source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=016b76c" + source = "github.com/GoogleCloudPlatform/slurm-gcp.git//terraform/slurm_cluster/modules/slurm_instance_template?ref=2aa6ad1" for_each = local.nodeset_map project_id = var.project_id diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index 6eaa80f3f7..fe0d12f9cf 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -171,7 +171,7 @@ limitations under the License. | [terraform](#requirement\_terraform) | >= 1.3.0 | | [google](#requirement\_google) | >= 4.73.0 | | [google-beta](#requirement\_google-beta) | >= 4.73.0 | -| [google-private](#requirement\_google-private) | >= 0.0.1960 | +| [google-private](#requirement\_google-private) | >= 0.0.1962 | | [null](#requirement\_null) | >= 3.0 | ## Providers @@ -180,7 +180,7 @@ limitations under the License. |------|---------| | [google](#provider\_google) | >= 4.73.0 | | [google-beta](#provider\_google-beta) | >= 4.73.0 | -| [google-private](#provider\_google-private) | >= 0.0.1960 | +| [google-private](#provider\_google-private) | >= 0.0.1962 | | [null](#provider\_null) | >= 3.0 | ## Modules diff --git a/modules/compute/vm-instance/versions.tf b/modules/compute/vm-instance/versions.tf index 52e858aa35..b965b9fd7a 100644 --- a/modules/compute/vm-instance/versions.tf +++ b/modules/compute/vm-instance/versions.tf @@ -28,7 +28,7 @@ terraform { google-private = { source = "hashicorp/google-private" - version = ">= 0.0.1960" + version = ">= 0.0.1962" } null = { From 61b7542f9b535b7d27633771e817bb86d8cc78b1 Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Wed, 11 Sep 2024 17:20:12 +0000 Subject: [PATCH 07/26] Update to RDMA VPC for reducing repeated code --- community/modules/network/rdma-vpc/README.md | 13 +-- community/modules/network/rdma-vpc/main.tf | 75 +++++-------- community/modules/network/rdma-vpc/outputs.tf | 24 +--- .../modules/network/rdma-vpc/variables.tf | 106 ++++-------------- 4 files changed, 55 insertions(+), 163 deletions(-) diff --git a/community/modules/network/rdma-vpc/README.md b/community/modules/network/rdma-vpc/README.md index c9380bc914..df9f6c3b94 100644 --- a/community/modules/network/rdma-vpc/README.md +++ b/community/modules/network/rdma-vpc/README.md @@ -45,9 +45,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [additional\_subnetworks](#input\_additional\_subnetworks) | DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions | `list(map(string))` | `null` | no | | [allowed\_ssh\_ip\_ranges](#input\_allowed\_ssh\_ip\_ranges) | A list of CIDR IP ranges from which to allow ssh access | `list(string)` | `[]` | no | -| [default\_primary\_subnetwork\_size](#input\_default\_primary\_subnetwork\_size) | The size, in CIDR bits, of the default primary subnetwork unless explicitly defined in var.subnetworks | `number` | `15` | no | | [delete\_default\_internet\_gateway\_routes](#input\_delete\_default\_internet\_gateway\_routes) | If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted | `bool` | `false` | no | | [deployment\_name](#input\_deployment\_name) | The name of the current deployment | `string` | n/a | yes | | [enable\_iap\_rdp\_ingress](#input\_enable\_iap\_rdp\_ingress) | Enable a firewall rule to allow Windows Remote Desktop Protocol access using IAP tunnels | `bool` | `false` | no | @@ -63,14 +61,12 @@ No resources. | [network\_name](#input\_network\_name) | The name of the network to be created (if unsupplied, will default to "{deployment\_name}-net") | `string` | `null` | no | | [network\_profile](#input\_network\_profile) | Profile name for VPC configuration | `string` | `null` | no | | [network\_routing\_mode](#input\_network\_routing\_mode) | The network routing mode (default "GLOBAL") | `string` | `"GLOBAL"` | no | -| [primary\_subnetwork](#input\_primary\_subnetwork) | DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions | `map(string)` | `null` | no | +| [nic\_type](#input\_nic\_type) | NIC type for use in modules that use the output | `string` | `null` | no | | [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created | `string` | n/a | yes | | [region](#input\_region) | The default region for Cloud resources | `string` | n/a | yes | | [secondary\_ranges](#input\_secondary\_ranges) | Secondary ranges that will be used in some of the subnets. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions. | `map(list(object({ range_name = string, ip_cidr_range = string })))` | `{}` | no | | [shared\_vpc\_host](#input\_shared\_vpc\_host) | Makes this project a Shared VPC host if 'true' (default 'false') | `bool` | `false` | no | -| [subnetwork\_name](#input\_subnetwork\_name) | The name of the network to be created (if unsupplied, will default to "{deployment\_name}-primary-subnet") | `string` | `null` | no | -| [subnetwork\_size](#input\_subnetwork\_size) | DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions | `number` | `null` | no | -| [subnetworks](#input\_subnetworks) | List of subnetworks to create within the VPC. If left empty, it will be
replaced by a single, default subnetwork constructed from other parameters
(e.g. var.region). In all cases, the first subnetwork in the list is identified
by outputs as a "primary" subnetwork.

subnet\_name (string, required, name of subnet)
subnet\_region (string, required, region of subnet)
subnet\_ip (string, mutually exclusive with new\_bits, CIDR-formatted IP range for subnetwork)
new\_bits (number, mutually exclusive with subnet\_ip, CIDR bits used to calculate subnetwork range)
subnet\_private\_access (bool, optional, Enable Private Access on subnetwork)
subnet\_flow\_logs (map(string), optional, Configure Flow Logs see terraform-google-network module)
description (string, optional, Description of Network)
purpose (string, optional, related to Load Balancing)
role (string, optional, related to Load Balancing) | `list(map(string))` | `[]` | no | +| [subnetworks\_template](#input\_subnetworks\_template) | Rules for creating subnetworks within the VPC |
object({
count = number
name_prefix = string
ip_range = string
region = string
private_access = optional(bool)
})
|
{
"count": 8,
"ip_range": "192.168.0.0/16",
"name_prefix": "subnet",
"region": null
}
| no | ## Outputs @@ -79,9 +75,6 @@ No resources. | [network\_id](#output\_network\_id) | ID of the new VPC network | | [network\_name](#output\_network\_name) | Name of the new VPC network | | [network\_self\_link](#output\_network\_self\_link) | Self link of the new VPC network | -| [subnetwork](#output\_subnetwork) | Primary subnetwork object | -| [subnetwork\_address](#output\_subnetwork\_address) | IP address range of the primary subnetwork | -| [subnetwork\_name](#output\_subnetwork\_name) | Name of the primary subnetwork | -| [subnetwork\_self\_link](#output\_subnetwork\_self\_link) | Self link of the primary subnetwork | +| [subnetwork\_interfaces](#output\_subnetwork\_interfaces) | Full list of subnetwork objects belonging to the new VPC network (compatible with vm-instance) | | [subnetworks](#output\_subnetworks) | Full list of subnetwork objects belonging to the new VPC network | diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index b14aab8834..6ffcea9a83 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -15,55 +15,19 @@ */ locals { - autoname = replace(var.deployment_name, "_", "-") - network_name = var.network_name == null ? "${local.autoname}-net" : var.network_name - subnetwork_name = var.subnetwork_name == null ? "${local.autoname}-primary-subnet" : var.subnetwork_name + autoname = replace(var.deployment_name, "_", "-") + network_name = var.network_name == null ? "${local.autoname}-net" : var.network_name - # define a default subnetwork for cases in which no explicit subnetworks are - # defined in var.subnetworks - default_primary_subnetwork_cidr_block = cidrsubnet(var.network_address_range, var.default_primary_subnetwork_size, 0) - default_primary_subnetwork = { - subnet_name = local.subnetwork_name - subnet_ip = local.default_primary_subnetwork_cidr_block - subnet_region = var.region - subnet_private_access = true - subnet_flow_logs = false - description = "primary subnetwork in ${local.network_name}" - purpose = null - role = null - } - - # Identify user-supplied primary subnetwork - # (1) explicit var.subnetworks[0] - # (2) implicit local default subnetwork - input_primary_subnetwork = coalesce(try(var.subnetworks[0], null), local.default_primary_subnetwork) - - # Identify user-supplied additional subnetworks - # (1) explicit var.subnetworks[1:end] - # (2) empty list - input_additional_subnetworks = try(slice(var.subnetworks, 1, length(var.subnetworks)), []) - - # at this point we have constructed a list of subnetworks but need to extract - # user-provided CIDR blocks or calculate them from user-provided new_bits - # after we complete deprecation, local.all_subnetworks can be replaced with - # var.subnetworks (or local.default_primary_subnetwork if that is null) - input_subnetworks = concat([local.input_primary_subnetwork], local.input_additional_subnetworks) - subnetworks_cidr_blocks = try( - local.input_subnetworks[*]["subnet_ip"], - cidrsubnets(var.network_address_range, local.input_subnetworks[*]["new_bits"]...) - ) - - # merge in the CIDR blocks (even when already there) and remove new_bits - subnetworks = [for i, subnet in local.input_subnetworks : - merge({ for k, v in subnet : k => v if k != "new_bits" }, { "subnet_ip" = local.subnetworks_cidr_blocks[i] }) + new_bits = ceil(log(var.subnetworks_template.count, 2)) + template_subnetworks = [for i in range(var.subnetworks_template.count) : + { + subnet_name = "${var.subnetworks_template.name_prefix}-${i}" + subnet_region = try(var.subnetworks_template.region, var.region) + subnet_ip = cidrsubnet(var.subnetworks_template.ip_range, local.new_bits, i) + subnet_private_access = coalesce(var.subnetworks_template.private_access, false) + } ] - # this comprehension should have 1 and only 1 match - output_primary_subnetwork = one([for k, v in module.vpc.subnets : v if k == "${local.subnetworks[0].subnet_region}/${local.subnetworks[0].subnet_name}"]) - output_primary_subnetwork_name = local.output_primary_subnetwork.name - output_primary_subnetwork_self_link = local.output_primary_subnetwork.self_link - output_primary_subnetwork_ip_cidr_range = local.output_primary_subnetwork.ip_cidr_range - iap_ports = distinct(concat(compact([ var.enable_iap_rdp_ingress ? "3389" : "", var.enable_iap_ssh_ingress ? "22" : "", @@ -144,6 +108,23 @@ locals { var.enable_internal_traffic ? [local.allow_internal_traffic] : [], length(local.iap_ports) > 0 ? [local.allow_iap_ingress] : [] ) + + url_parts = split("/", var.network_profile) + profile_name = upper(element(local.url_parts, length(local.url_parts) - 1)) + output_subnets = [ + for subnet in module.vpc.subnets : { + network = null + subnetwork = subnet.self_link + subnetwork_project = null # will populate from subnetwork_self_link + network_ip = null + nic_type = coalesce(var.nic_type, try(regex("IRDMA", local.profile_name), regex("MRDMA", local.profile_name), "RDMA")) + stack_type = null + queue_count = null + access_config = [] + ipv6_access_config = [] + alias_ip_range = [] + } + ] } module "vpc" { @@ -152,7 +133,7 @@ module "vpc" { network_name = local.network_name project_id = var.project_id auto_create_subnetworks = false - subnets = local.subnetworks + subnets = local.template_subnetworks secondary_ranges = var.secondary_ranges routing_mode = var.network_routing_mode mtu = var.mtu diff --git a/community/modules/network/rdma-vpc/outputs.tf b/community/modules/network/rdma-vpc/outputs.tf index bdcd72d195..7831625145 100644 --- a/community/modules/network/rdma-vpc/outputs.tf +++ b/community/modules/network/rdma-vpc/outputs.tf @@ -38,26 +38,8 @@ output "subnetworks" { depends_on = [module.vpc] } -output "subnetwork" { - description = "Primary subnetwork object" - value = local.output_primary_subnetwork - depends_on = [module.vpc] -} - -output "subnetwork_name" { - description = "Name of the primary subnetwork" - value = local.output_primary_subnetwork_name - depends_on = [module.vpc] -} - -output "subnetwork_self_link" { - description = "Self link of the primary subnetwork" - value = local.output_primary_subnetwork_self_link - depends_on = [module.vpc] -} - -output "subnetwork_address" { - description = "IP address range of the primary subnetwork" - value = local.output_primary_subnetwork_ip_cidr_range +output "subnetwork_interfaces" { + description = "Full list of subnetwork objects belonging to the new VPC network (compatible with vm-instance)" + value = local.output_subnets depends_on = [module.vpc] } diff --git a/community/modules/network/rdma-vpc/variables.tf b/community/modules/network/rdma-vpc/variables.tf index 34308a8a64..642f822131 100644 --- a/community/modules/network/rdma-vpc/variables.tf +++ b/community/modules/network/rdma-vpc/variables.tf @@ -25,29 +25,6 @@ variable "network_name" { default = null } -variable "subnetwork_name" { - description = "The name of the network to be created (if unsupplied, will default to \"{deployment_name}-primary-subnet\")" - type = string - default = null -} - -# tflint-ignore: terraform_unused_declarations -variable "subnetwork_size" { - description = "DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions" - type = number - default = null - validation { - condition = var.subnetwork_size == null - error_message = "subnetwork_size is deprecated. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions." - } -} - -variable "default_primary_subnetwork_size" { - description = "The size, in CIDR bits, of the default primary subnetwork unless explicitly defined in var.subnetworks" - type = number - default = 15 -} - variable "region" { description = "The default region for Cloud resources" type = string @@ -75,68 +52,21 @@ variable "mtu" { default = 8896 } -variable "subnetworks" { - description = <<-EOT - List of subnetworks to create within the VPC. If left empty, it will be - replaced by a single, default subnetwork constructed from other parameters - (e.g. var.region). In all cases, the first subnetwork in the list is identified - by outputs as a "primary" subnetwork. - - subnet_name (string, required, name of subnet) - subnet_region (string, required, region of subnet) - subnet_ip (string, mutually exclusive with new_bits, CIDR-formatted IP range for subnetwork) - new_bits (number, mutually exclusive with subnet_ip, CIDR bits used to calculate subnetwork range) - subnet_private_access (bool, optional, Enable Private Access on subnetwork) - subnet_flow_logs (map(string), optional, Configure Flow Logs see terraform-google-network module) - description (string, optional, Description of Network) - purpose (string, optional, related to Load Balancing) - role (string, optional, related to Load Balancing) - EOT - type = list(map(string)) - default = [] - validation { - condition = alltrue([ - for s in var.subnetworks : can(s["subnet_name"]) - ]) - error_message = "All subnetworks must define \"subnet_name\"." - } - validation { - condition = alltrue([ - for s in var.subnetworks : can(s["subnet_region"]) - ]) - error_message = "All subnetworks must define \"subnet_region\"." - } - validation { - condition = alltrue([ - for s in var.subnetworks : can(s["subnet_ip"]) != can(s["new_bits"]) - ]) - error_message = "All subnetworks must define exactly one of \"subnet_ip\" or \"new_bits\"." - } - validation { - condition = alltrue([for s in var.subnetworks : can(s["subnet_ip"])]) || alltrue([for s in var.subnetworks : can(s["new_bits"])]) - error_message = "All subnetworks must make same choice of \"subnet_ip\" or \"new_bits\"." - } -} - -# tflint-ignore: terraform_unused_declarations -variable "primary_subnetwork" { - description = "DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions" - type = map(string) - default = null - validation { - condition = var.primary_subnetwork == null - error_message = "primary_subnetwork is deprecated. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions." - } -} - -# tflint-ignore: terraform_unused_declarations -variable "additional_subnetworks" { - description = "DEPRECATED: please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions" - type = list(map(string)) - default = null - validation { - condition = var.additional_subnetworks == null - error_message = "additional_subnetworks is deprecated. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions." +variable "subnetworks_template" { + # TODO: Add validation and improve description + description = "Rules for creating subnetworks within the VPC" + type = object({ + count = number + name_prefix = string + ip_range = string + region = string + private_access = optional(bool) + }) + default = { + count = 8 + name_prefix = "subnet" + ip_range = "192.168.0.0/16" + region = null } } @@ -244,3 +174,9 @@ variable "network_profile" { type = string default = null } + +variable "nic_type" { + description = "NIC type for use in modules that use the output" + type = string + default = null +} From c8011cabdf98ba22b555163134208e7cd29d558a Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 26 Sep 2024 14:46:28 +0000 Subject: [PATCH 08/26] Update network_ip to use empty string --- community/modules/network/rdma-vpc/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index 6ffcea9a83..d2fa87603b 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -116,7 +116,7 @@ locals { network = null subnetwork = subnet.self_link subnetwork_project = null # will populate from subnetwork_self_link - network_ip = null + network_ip = "" nic_type = coalesce(var.nic_type, try(regex("IRDMA", local.profile_name), regex("MRDMA", local.profile_name), "RDMA")) stack_type = null queue_count = null From 00c34d8e4a1b9058d3e2b85fa3e0e7601332dbec Mon Sep 17 00:00:00 2001 From: Atul Rajmane Date: Wed, 9 Oct 2024 14:05:45 +0000 Subject: [PATCH 09/26] RDMA Support in GKE Modules --- .../compute/pbspro-execution/README.md | 16 ++--- .../README.md | 30 ++++---- community/modules/network/rdma-vpc/README.md | 4 +- community/modules/network/rdma-vpc/main.tf | 17 +++++ community/modules/network/rdma-vpc/outputs.tf | 14 ++++ .../network/rdma-vpc/vpc-submodule/README.md | 8 +-- .../chrome-remote-desktop/README.md | 16 ++--- .../schedmd-slurm-gcp-v6-controller/README.md | 66 ++++++++--------- modules/compute/gke-node-pool/README.md | 2 + modules/compute/gke-node-pool/main.tf | 1 + modules/compute/gke-node-pool/variables.tf | 14 ++++ modules/compute/vm-instance/README.md | 26 +++---- modules/scheduler/gke-cluster/README.md | 3 + modules/scheduler/gke-cluster/main.tf | 70 +++++++++++++------ .../templates/gke-network-paramset.yaml.tftpl | 2 +- modules/scheduler/gke-cluster/variables.tf | 20 ++++++ .../pre-existing-gke-cluster/README.md | 1 + .../pre-existing-gke-cluster/main.tf | 43 +++++++++--- .../templates/gke-network-paramset.yaml.tftpl | 2 +- .../pre-existing-gke-cluster/variables.tf | 6 ++ 20 files changed, 248 insertions(+), 113 deletions(-) diff --git a/community/modules/compute/pbspro-execution/README.md b/community/modules/compute/pbspro-execution/README.md index 9b7ce281fa..5e0a884453 100644 --- a/community/modules/compute/pbspro-execution/README.md +++ b/community/modules/compute/pbspro-execution/README.md @@ -87,38 +87,38 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [auto\_delete\_boot\_disk](#input\_auto\_delete\_boot\_disk) | Controls if boot disk should be auto-deleted when instance is deleted. | `bool` | `true` | no | -| [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image th
at supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-v
m-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no | +| [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image th
at supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-v
m-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no | | [deployment\_name](#input\_deployment\_name) | Cluster Toolkit deployment name. Cloud resource names will include this value. | `string` | n/a | yes | | [disk\_size\_gb](#input\_disk\_size\_gb) | Size of disk for instances. | `number` | `200` | no | | [disk\_type](#input\_disk\_type) | Disk type for instances. | `string` | `"pd-standard"` | no | | [enable\_oslogin](#input\_enable\_oslogin) | Enable or Disable OS Login with "ENABLE" or "DISABLE". Set to "INHERIT" to inherit project OS Login setting. | `string` | `"ENABLE"` | no | | [enable\_public\_ips](#input\_enable\_public\_ips) | If set to true, instances will have public IPs on the internet. | `bool` | `true` | no | -| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `null` | no | +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `null` | no | | [instance\_count](#input\_instance\_count) | Number of instances | `number` | `1` | no | -| [instance\_image](#input\_instance\_image) | Instance Image

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted. | `map(string)` |
{
"name": "hpc-centos-7-v20240712",
"project": "cloud-hpc-image-public"
}
| no | +| [instance\_image](#input\_instance\_image) | Instance Image

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted. | `map(string)` |
{
"name": "hpc-centos-7-v20240712",
"project": "cloud-hpc-image-public"
}
| no | | [labels](#input\_labels) | Labels to add to the instances. Key-value pairs. | `map(string)` | n/a | yes | | [local\_ssd\_count](#input\_local\_ssd\_count) | The number of local SSDs to attach to each VM. See https://cloud.google.com/compute/docs/disks/local-ssd. | `number` | `0` | no | | [local\_ssd\_interface](#input\_local\_ssd\_interface) | Interface to be used with local SSDs. Can be either 'NVME' or 'SCSI'. No effect unless `local_ssd_count` is also set. | `string` | `"NVME"` | no | | [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation | `string` | `"c2-standard-60"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | | [name\_prefix](#input\_name\_prefix) | Name prefix for PBS execution hostnames | `string` | `null` | no | -| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface

**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply
to network interfaces defined in this variable.

Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET", "RDMA", "IRDMA", "MRDMA"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | +| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface

**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply
to network interfaces defined in this variable.

Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET", "RDMA", "IRDMA", "MRDMA"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. | `string` | `"default"` | no | -| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | +| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `null` | no | | [pbs\_exec](#input\_pbs\_exec) | Root path in which to install PBS | `string` | `"/opt/pbs"` | no | | [pbs\_execution\_rpm\_url](#input\_pbs\_execution\_rpm\_url) | Path to PBS Pro Execution Host RPM file | `string` | n/a | yes | | [pbs\_home](#input\_pbs\_home) | PBS working directory | `string` | `"/var/spool/pbs"` | no | | [pbs\_server](#input\_pbs\_server) | IP address or DNS name of PBS server host | `string` | n/a | yes | -| [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone. |
object({
vm_count = number,
availability_domain_count = number,
collocation = string,
})
| `null` | no | +| [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone. |
object({
vm_count = number,
availability_domain_count = number,
collocation = string,
})
| `null` | no | | [project\_id](#input\_project\_id) | Project in which Google Cloud resources will be created | `string` | n/a | yes | | [region](#input\_region) | Default region for creating resources | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_write",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | +| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_write",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | | [startup\_script](#input\_startup\_script) | Startup script used on the instance | `string` | `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to attach the VM. | `string` | `null` | no | | [tags](#input\_tags) | Network tags, provided as a list | `list(string)` | `[]` | no | -| [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
to 2, Simultaneous Multithreading (SMT) is enabled extending the total number
of virtual cores. For example, a machine of type c2-standard-60 will have 60
virtual cores with threads\_per\_core equal to 2. With threads\_per\_core equal
to 1 (SMT turned off), only the 30 physical cores will be available on the VM.

The default value of \"0\" will turn off SMT for supported machine types, and
will fall back to GCE defaults for unsupported machine types (t2d, shared-core
instances, or instances with less than 2 vCPU).

Disabling SMT can be more performant in many HPC workloads, therefore it is
disabled by default where compatible.

null = SMT configuration will use the GCE defaults for the machine type
0 = SMT will be disabled where compatible (default)
1 = SMT will always be disabled (will fail on incompatible machine types)
2 = SMT will always be enabled (will fail on incompatible machine types) | `number` | `0` | no | +| [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
to 2, Simultaneous Multithreading (SMT) is enabled extending the total number
of virtual cores. For example, a machine of type c2-standard-60 will have 60
virtual cores with threads\_per\_core equal to 2. With threads\_per\_core equal
to 1 (SMT turned off), only the 30 physical cores will be available on the VM.

The default value of \"0\" will turn off SMT for supported machine types, and
will fall back to GCE defaults for unsupported machine types (t2d, shared-core
instances, or instances with less than 2 vCPU).

Disabling SMT can be more performant in many HPC workloads, therefore it is
disabled by default where compatible.

null = SMT configuration will use the GCE defaults for the machine type
0 = SMT will be disabled where compatible (default)
1 = SMT will always be disabled (will fail on incompatible machine types)
2 = SMT will always be enabled (will fail on incompatible machine types) | `number` | `0` | no | | [zone](#input\_zone) | Default zone for creating resources | `string` | n/a | yes | ## Outputs diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md index 76a1e0172f..cc81ec31d7 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md @@ -86,41 +86,41 @@ modules. For support with the underlying modules, see the instructions in the | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet. |
list(object({
nat_ip = string
network_tier = string
}))
| `[]` | no | -| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. (do not use "disk\_type: local-ssd"; known issue being addressed) |
list(object({
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | -| [additional\_networks](#input\_additional\_networks) | Additional network interface details for GCE, if any. |
list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
| `[]` | no | -| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
only available on supported images (or images derived from them). For more details, see
https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates | `bool` | `true` | no | -| [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | +| [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet. |
list(object({
nat_ip = string
network_tier = string
}))
| `[]` | no | +| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. (do not use "disk\_type: local-ssd"; known issue being addressed) |
list(object({
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | +| [additional\_networks](#input\_additional\_networks) | Additional network interface details for GCE, if any. |
list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
| `[]` | no | +| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
only available on supported images (or images derived from them). For more details, see
https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates | `bool` | `true` | no | +| [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example. | `bool` | `false` | no | | [disk\_auto\_delete](#input\_disk\_auto\_delete) | Whether or not the boot disk should be auto-deleted. | `bool` | `true` | no | | [disk\_labels](#input\_disk\_labels) | Labels specific to the boot disk. These will be merged with var.labels. | `map(string)` | `{}` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Size of boot disk to create for the partition compute nodes. | `number` | `50` | no | | [disk\_type](#input\_disk\_type) | Boot disk type, can be either hyperdisk-balanced, pd-ssd, pd-standard, pd-balanced, or pd-extreme. | `string` | `"pd-standard"` | no | | [enable\_confidential\_vm](#input\_enable\_confidential\_vm) | Enable the Confidential VM configuration. Note: the instance image must support option. | `bool` | `false` | no | -| [enable\_oslogin](#input\_enable\_oslogin) | Enables Google Cloud os-login for user login and authentication for VMs.
See https://cloud.google.com/compute/docs/oslogin | `bool` | `true` | no | +| [enable\_oslogin](#input\_enable\_oslogin) | Enables Google Cloud os-login for user login and authentication for VMs.
See https://cloud.google.com/compute/docs/oslogin | `bool` | `true` | no | | [enable\_public\_ips](#input\_enable\_public\_ips) | If set to true. The node group VMs will have a random public IP assigned to it. Ignored if access\_config is set. | `bool` | `false` | no | | [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Enable the Shielded VM configuration. Note: the instance image must support option. | `bool` | `false` | no | | [enable\_smt](#input\_enable\_smt) | Enables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no | | [enable\_spot\_vm](#input\_enable\_spot\_vm) | Enable the partition to use spot VMs (https://cloud.google.com/spot-vms). | `bool` | `false` | no | | [feature](#input\_feature) | The node feature, used to bind nodes to the nodeset. If not set, the nodeset name will be used. | `string` | `null` | no | -| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm node group VM instances.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-6-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | -| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm node group VM instances.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-6-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | +| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | | [labels](#input\_labels) | Labels to add to partition compute instances. Key-value pairs. | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Compute Platform machine type to use for this partition compute nodes. | `string` | `"c2-standard-60"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map. | `map(string)` | `{}` | no | | [min\_cpu\_platform](#input\_min\_cpu\_platform) | The name of the minimum CPU platform that you want the instance to use. | `string` | `null` | no | -| [name](#input\_name) | Name of the nodeset. Automatically populated by the module id if not set.
If setting manually, ensure a unique value across all nodesets. | `string` | n/a | yes | -| [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy.

Note: Placement groups are not supported when on\_host\_maintenance is set to
"MIGRATE" and will be deactivated regardless of the value of
enable\_placement. To support enable\_placement, ensure on\_host\_maintenance is
set to "TERMINATE". | `string` | `"TERMINATE"` | no | +| [name](#input\_name) | Name of the nodeset. Automatically populated by the module id if not set.
If setting manually, ensure a unique value across all nodesets. | `string` | n/a | yes | +| [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy.

Note: Placement groups are not supported when on\_host\_maintenance is set to
"MIGRATE" and will be deactivated regardless of the value of
enable\_placement. To support enable\_placement, ensure on\_host\_maintenance is
set to "TERMINATE". | `string` | `"TERMINATE"` | no | | [preemptible](#input\_preemptible) | Should use preemptibles to burst. | `bool` | `false` | no | | [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes | | [region](#input\_region) | The default region for Cloud resources. | `string` | n/a | yes | | [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to attach to the compute instances. | `string` | `null` | no | -| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to attach to the compute instances. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | -| [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
enable\_shielded\_vm is 'true'.
- enable\_integrity\_monitoring : Compare the most recent boot measurements to the
integrity policy baseline and return a pair of pass/fail results depending on
whether they match or not.
- enable\_secure\_boot : Verify the digital signature of all boot components, and
halt the boot process if signature verification fails.
- enable\_vtpm : Use a virtualized trusted platform module, which is a
specialized computer chip you can use to encrypt objects like keys and
certificates. |
object({
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
|
{
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
| no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to attach to the compute instances. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | +| [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
enable\_shielded\_vm is 'true'.
- enable\_integrity\_monitoring : Compare the most recent boot measurements to the
integrity policy baseline and return a pair of pass/fail results depending on
whether they match or not.
- enable\_secure\_boot : Verify the digital signature of all boot components, and
halt the boot process if signature verification fails.
- enable\_vtpm : Use a virtualized trusted platform module, which is a
specialized computer chip you can use to encrypt objects like keys and
certificates. |
object({
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
|
{
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
| no | | [slurm\_bucket\_path](#input\_slurm\_bucket\_path) | Path to the Slurm bucket. | `string` | n/a | yes | | [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Name of the Slurm cluster. | `string` | n/a | yes | -| [spot\_instance\_config](#input\_spot\_instance\_config) | Configuration for spot VMs. |
object({
termination_action = string
})
| `null` | no | +| [spot\_instance\_config](#input\_spot\_instance\_config) | Configuration for spot VMs. |
object({
termination_action = string
})
| `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | n/a | yes | | [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no | @@ -129,6 +129,6 @@ modules. For support with the underlying modules, see the instructions in the | Name | Description | |------|-------------| | [instance\_template\_self\_link](#output\_instance\_template\_self\_link) | The URI of the template. | -| [node\_name\_prefix](#output\_node\_name\_prefix) | The prefix to be used for the node names.

Make sure that nodes are named `-`
This temporary required for proper functioning of the nodes.
While Slurm scheduler uses "features" to bind node and nodeset,
the SlurmGCP relies on node names for this (to be switched to features as well). | +| [node\_name\_prefix](#output\_node\_name\_prefix) | The prefix to be used for the node names.

Make sure that nodes are named `-`
This temporary required for proper functioning of the nodes.
While Slurm scheduler uses "features" to bind node and nodeset,
the SlurmGCP relies on node names for this (to be switched to features as well). | | [nodeset\_dyn](#output\_nodeset\_dyn) | Details of the nodeset. Typically used as input to `schedmd-slurm-gcp-v6-partition`. | diff --git a/community/modules/network/rdma-vpc/README.md b/community/modules/network/rdma-vpc/README.md index df9f6c3b94..d9ff0993f2 100644 --- a/community/modules/network/rdma-vpc/README.md +++ b/community/modules/network/rdma-vpc/README.md @@ -66,7 +66,7 @@ No resources. | [region](#input\_region) | The default region for Cloud resources | `string` | n/a | yes | | [secondary\_ranges](#input\_secondary\_ranges) | Secondary ranges that will be used in some of the subnets. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions. | `map(list(object({ range_name = string, ip_cidr_range = string })))` | `{}` | no | | [shared\_vpc\_host](#input\_shared\_vpc\_host) | Makes this project a Shared VPC host if 'true' (default 'false') | `bool` | `false` | no | -| [subnetworks\_template](#input\_subnetworks\_template) | Rules for creating subnetworks within the VPC |
object({
count = number
name_prefix = string
ip_range = string
region = string
private_access = optional(bool)
})
|
{
"count": 8,
"ip_range": "192.168.0.0/16",
"name_prefix": "subnet",
"region": null
}
| no | +| [subnetworks\_template](#input\_subnetworks\_template) | Rules for creating subnetworks within the VPC |
object({
count = number
name_prefix = string
ip_range = string
region = string
private_access = optional(bool)
})
|
{
"count": 8,
"ip_range": "192.168.0.0/16",
"name_prefix": "subnet",
"region": null
}
| no | ## Outputs @@ -76,5 +76,7 @@ No resources. | [network\_name](#output\_network\_name) | Name of the new VPC network | | [network\_self\_link](#output\_network\_self\_link) | Self link of the new VPC network | | [subnetwork\_interfaces](#output\_subnetwork\_interfaces) | Full list of subnetwork objects belonging to the new VPC network (compatible with vm-instance) | +| [subnetwork\_interfaces\_gke](#output\_subnetwork\_interfaces\_gke) | Full list of subnetwork objects belonging to the new VPC network (compatible with gke-node-pool) | +| [subnetwork\_name\_prefix](#output\_subnetwork\_name\_prefix) | Prefix of the RDMA subnetwork names | | [subnetworks](#output\_subnetworks) | Full list of subnetwork objects belonging to the new VPC network | diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index d2fa87603b..a166599c58 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -125,6 +125,23 @@ locals { alias_ip_range = [] } ] + + # FIX_ME(arajmane): There is a concern about this not working in a shared VPC environment. + # To unblock experimental testing, we decided to go ahead with this. + output_subnets_gke = [ + for subnet in module.vpc.subnets : { + network = local.network_name + subnetwork = subnet.name + subnetwork_project = null + network_ip = "" + nic_type = coalesce(var.nic_type, try(regex("IRDMA", local.profile_name), regex("MRDMA", local.profile_name), "RDMA")) + stack_type = null + queue_count = null + access_config = [] + ipv6_access_config = [] + alias_ip_range = [] + } + ] } module "vpc" { diff --git a/community/modules/network/rdma-vpc/outputs.tf b/community/modules/network/rdma-vpc/outputs.tf index 7831625145..1c2a304fd8 100644 --- a/community/modules/network/rdma-vpc/outputs.tf +++ b/community/modules/network/rdma-vpc/outputs.tf @@ -43,3 +43,17 @@ output "subnetwork_interfaces" { value = local.output_subnets depends_on = [module.vpc] } + +# The output subnetwork_interfaces is compatible with vm-instance module but not with gke-node-pool +# See https://github.com/GoogleCloudPlatform/cluster-toolkit/blob/99493df21cecf6a092c45298bf7a45e0343cf622/modules/compute/vm-instance/variables.tf#L220 +# So, we need a separate output that makes the network and subnetwork names available +output "subnetwork_interfaces_gke" { + description = "Full list of subnetwork objects belonging to the new VPC network (compatible with gke-node-pool)" + value = local.output_subnets_gke + depends_on = [module.vpc] +} + +output "subnetwork_name_prefix" { + description = "Prefix of the RDMA subnetwork names" + value = var.subnetworks_template.name_prefix +} diff --git a/community/modules/network/rdma-vpc/vpc-submodule/README.md b/community/modules/network/rdma-vpc/vpc-submodule/README.md index 470062baf7..5dfd55b4bb 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/README.md +++ b/community/modules/network/rdma-vpc/vpc-submodule/README.md @@ -54,10 +54,10 @@ limitations under the License. | [auto\_create\_subnetworks](#input\_auto\_create\_subnetworks) | When set to true, the network is created in 'auto subnet mode' and it will create a subnet for each region automatically across the 10.128.0.0/9 address range. When set to false, the network is created in 'custom subnet mode' so the user can explicitly connect subnetwork resources. | `bool` | `false` | no | | [delete\_default\_internet\_gateway\_routes](#input\_delete\_default\_internet\_gateway\_routes) | If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted | `bool` | `false` | no | | [description](#input\_description) | An optional description of this resource. The resource must be recreated to modify this field. | `string` | `""` | no | -| [egress\_rules](#input\_egress\_rules) | List of egress rules. This will be ignored if variable 'rules' is non-empty |
list(object({
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | +| [egress\_rules](#input\_egress\_rules) | List of egress rules. This will be ignored if variable 'rules' is non-empty |
list(object({
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | | [enable\_ipv6\_ula](#input\_enable\_ipv6\_ula) | Enabled IPv6 ULA, this is a permanent change and cannot be undone! (default 'false') | `bool` | `false` | no | -| [firewall\_rules](#input\_firewall\_rules) | This is DEPRECATED and available for backward compatibility. Use ingress\_rules and egress\_rules variables. List of firewall rules |
list(object({
name = string
description = optional(string, null)
direction = optional(string, "INGRESS")
disabled = optional(bool, null)
priority = optional(number, null)
ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | -| [ingress\_rules](#input\_ingress\_rules) | List of ingress rules. This will be ignored if variable 'rules' is non-empty |
list(object({
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | +| [firewall\_rules](#input\_firewall\_rules) | This is DEPRECATED and available for backward compatibility. Use ingress\_rules and egress\_rules variables. List of firewall rules |
list(object({
name = string
description = optional(string, null)
direction = optional(string, "INGRESS")
disabled = optional(bool, null)
priority = optional(number, null)
ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | +| [ingress\_rules](#input\_ingress\_rules) | List of ingress rules. This will be ignored if variable 'rules' is non-empty |
list(object({
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))

allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
| `[]` | no | | [internal\_ipv6\_range](#input\_internal\_ipv6\_range) | When enabling IPv6 ULA, optionally, specify a /48 from fd20::/20 (default null) | `string` | `null` | no | | [mtu](#input\_mtu) | The network MTU (If set to 0, meaning MTU is unset - defaults to '1460'). Recommended values: 1460 (default for historic reasons), 1500 (Internet default), or 8896 (for Jumbo packets). Allowed are all values in the range 1300 to 8896, inclusively. | `number` | `0` | no | | [network\_firewall\_policy\_enforcement\_order](#input\_network\_firewall\_policy\_enforcement\_order) | Set the order that Firewall Rules and Firewall Policies are evaluated. Valid values are `BEFORE_CLASSIC_FIREWALL` and `AFTER_CLASSIC_FIREWALL`. (default null or equivalent to `AFTER_CLASSIC_FIREWALL`) | `string` | `null` | no | @@ -68,7 +68,7 @@ limitations under the License. | [routing\_mode](#input\_routing\_mode) | The network routing mode (default 'GLOBAL') | `string` | `"GLOBAL"` | no | | [secondary\_ranges](#input\_secondary\_ranges) | Secondary ranges that will be used in some of the subnets | `map(list(object({ range_name = string, ip_cidr_range = string })))` | `{}` | no | | [shared\_vpc\_host](#input\_shared\_vpc\_host) | Makes this project a Shared VPC host if 'true' (default 'false') | `bool` | `false` | no | -| [subnets](#input\_subnets) | The list of subnets being created |
list(object({
subnet_name = string
subnet_ip = string
subnet_region = string
subnet_private_access = optional(string)
subnet_private_ipv6_access = optional(string)
subnet_flow_logs = optional(string)
subnet_flow_logs_interval = optional(string)
subnet_flow_logs_sampling = optional(string)
subnet_flow_logs_metadata = optional(string)
subnet_flow_logs_filter = optional(string)
subnet_flow_logs_metadata_fields = optional(list(string))
description = optional(string)
purpose = optional(string)
role = optional(string)
stack_type = optional(string)
ipv6_access_type = optional(string)
}))
| n/a | yes | +| [subnets](#input\_subnets) | The list of subnets being created |
list(object({
subnet_name = string
subnet_ip = string
subnet_region = string
subnet_private_access = optional(string)
subnet_private_ipv6_access = optional(string)
subnet_flow_logs = optional(string)
subnet_flow_logs_interval = optional(string)
subnet_flow_logs_sampling = optional(string)
subnet_flow_logs_metadata = optional(string)
subnet_flow_logs_filter = optional(string)
subnet_flow_logs_metadata_fields = optional(list(string))
description = optional(string)
purpose = optional(string)
role = optional(string)
stack_type = optional(string)
ipv6_access_type = optional(string)
}))
| n/a | yes | ## Outputs diff --git a/community/modules/remote-desktop/chrome-remote-desktop/README.md b/community/modules/remote-desktop/chrome-remote-desktop/README.md index ee7ee37357..f2f2f1966c 100644 --- a/community/modules/remote-desktop/chrome-remote-desktop/README.md +++ b/community/modules/remote-desktop/chrome-remote-desktop/README.md @@ -74,29 +74,29 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [add\_deployment\_name\_before\_prefix](#input\_add\_deployment\_name\_before\_prefix) | If true, the names of VMs and disks will always be prefixed with `deployment_name` to enable uniqueness across deployments.
See `name_prefix` for further details on resource naming behavior. | `bool` | `false` | no | +| [add\_deployment\_name\_before\_prefix](#input\_add\_deployment\_name\_before\_prefix) | If true, the names of VMs and disks will always be prefixed with `deployment_name` to enable uniqueness across deployments.
See `name_prefix` for further details on resource naming behavior. | `bool` | `false` | no | | [auto\_delete\_boot\_disk](#input\_auto\_delete\_boot\_disk) | Controls if boot disk should be auto-deleted when instance is deleted. | `bool` | `true` | no | -| [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image th
at supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-v
m-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no | +| [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image th
at supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-v
m-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no | | [deployment\_name](#input\_deployment\_name) | Cluster Toolkit deployment name. Cloud resource names will include this value. | `string` | n/a | yes | | [disk\_size\_gb](#input\_disk\_size\_gb) | Size of disk for instances. | `number` | `200` | no | | [disk\_type](#input\_disk\_type) | Disk type for instances. | `string` | `"pd-balanced"` | no | | [enable\_oslogin](#input\_enable\_oslogin) | Enable or Disable OS Login with "ENABLE" or "DISABLE". Set to "INHERIT" to inherit project OS Login setting. | `string` | `"ENABLE"` | no | | [enable\_public\_ips](#input\_enable\_public\_ips) | If set to true, instances will have public IPs on the internet. | `bool` | `true` | no | -| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. Requires virtual workstation accelerator if Nvidia Grid Drivers are required |
list(object({
type = string,
count = number
}))
|
[
{
"count": 1,
"type": "nvidia-tesla-t4-vws"
}
]
| no | +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. Requires virtual workstation accelerator if Nvidia Grid Drivers are required |
list(object({
type = string,
count = number
}))
|
[
{
"count": 1,
"type": "nvidia-tesla-t4-vws"
}
]
| no | | [install\_nvidia\_driver](#input\_install\_nvidia\_driver) | Installs the nvidia driver (true/false). For details, see https://cloud.google.com/compute/docs/gpus/install-drivers-gpu | `bool` | n/a | yes | | [instance\_count](#input\_instance\_count) | Number of instances | `number` | `1` | no | -| [instance\_image](#input\_instance\_image) | Image used to build chrome remote desktop node. The default image is
name="debian-12-bookworm-v20240815" and project="debian-cloud".
NOTE: uses fixed version of image to avoid NVIDIA driver compatibility issues.

An alternative image is from name="ubuntu-2204-jammy-v20240126" and project="ubuntu-os-cloud".

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted. | `map(string)` |
{
"name": "debian-12-bookworm-v20240815",
"project": "debian-cloud"
}
| no | +| [instance\_image](#input\_instance\_image) | Image used to build chrome remote desktop node. The default image is
name="debian-12-bookworm-v20240815" and project="debian-cloud".
NOTE: uses fixed version of image to avoid NVIDIA driver compatibility issues.

An alternative image is from name="ubuntu-2204-jammy-v20240126" and project="ubuntu-os-cloud".

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted. | `map(string)` |
{
"name": "debian-12-bookworm-v20240815",
"project": "debian-cloud"
}
| no | | [labels](#input\_labels) | Labels to add to the instances. Key-value pairs. | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation. Must be N1 family if GPU is used. | `string` | `"n1-standard-8"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | -| [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
If not supplied, `deployment_name` will be used.
When `name_prefix` is supplied, and `add_deployment_name_before_prefix` is set,
then resources are named by "<`deployment_name`>-<`name_prefix`>-<#>". | `string` | `null` | no | -| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface
**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply
to network interfaces defined in this variable.
Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET", "RDMA", "IRDMA", "MRDMA"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | +| [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
If not supplied, `deployment_name` will be used.
When `name_prefix` is supplied, and `add_deployment_name_before_prefix` is set,
then resources are named by "<`deployment_name`>-<`name_prefix`>-<#>". | `string` | `null` | no | +| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface
**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `enable_public_ips` also do not apply
to network interfaces defined in this variable.
Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET", "RDMA", "IRDMA", "MRDMA"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. | `string` | `"default"` | no | -| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | +| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `"TERMINATE"` | no | | [project\_id](#input\_project\_id) | Project in which Google Cloud resources will be created | `string` | n/a | yes | | [region](#input\_region) | Default region for creating resources | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | +| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | | [startup\_script](#input\_startup\_script) | Startup script used on the instance | `string` | `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to attach the VM. | `string` | `null` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index ddf940b92a..b67c388d4e 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -251,19 +251,19 @@ limitations under the License. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [additional\_disks](#input\_additional\_disks) | List of maps of disks. |
list(object({
disk_name = string
device_name = string
disk_type = string
disk_size_gb = number
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | -| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
only available on supported images (or images derived from them). For more details, see
https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates | `bool` | `true` | no | -| [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | +| [additional\_disks](#input\_additional\_disks) | List of maps of disks. |
list(object({
disk_name = string
device_name = string
disk_type = string
disk_size_gb = number
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | +| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
only available on supported images (or images derived from them). For more details, see
https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates | `bool` | `true` | no | +| [bandwidth\_tier](#input\_bandwidth\_tier) | Configures the network interface card and the maximum egress bandwidth for VMs.
- Setting `platform_default` respects the Google Cloud Platform API default values for networking.
- Setting `virtio_enabled` explicitly selects the VirtioNet network adapter.
- Setting `gvnic_enabled` selects the gVNIC network adapter (without Tier 1 high bandwidth).
- Setting `tier_1_enabled` selects both the gVNIC adapter and Tier 1 high bandwidth networking.
- Note: both gVNIC and Tier 1 networking require a VM image with gVNIC support as well as specific VM families and shapes.
- See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | | [bucket\_dir](#input\_bucket\_dir) | Bucket directory for cluster files to be put into. If not specified, then one will be chosen based on slurm\_cluster\_name. | `string` | `null` | no | -| [bucket\_name](#input\_bucket\_name) | Name of GCS bucket.
Ignored when 'create\_bucket' is true. | `string` | `null` | no | +| [bucket\_name](#input\_bucket\_name) | Name of GCS bucket.
Ignored when 'create\_bucket' is true. | `string` | `null` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example. | `bool` | `false` | no | | [cgroup\_conf\_tpl](#input\_cgroup\_conf\_tpl) | Slurm cgroup.conf template file path. | `string` | `null` | no | -| [cloud\_parameters](#input\_cloud\_parameters) | cloud.conf options. Defaults inherited from [Slurm GCP repo](https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/terraform/slurm_cluster/modules/slurm_files/README_TF.md#input_cloud_parameters) |
object({
no_comma_params = optional(bool)
resume_rate = optional(number)
resume_timeout = optional(number)
suspend_rate = optional(number)
suspend_timeout = optional(number)
topology_plugin = optional(string)
topology_param = optional(string)
tree_width = optional(number)
})
| `{}` | no | -| [cloudsql](#input\_cloudsql) | Use this database instead of the one on the controller.
server\_ip : Address of the database server.
user : The user to access the database as.
password : The password, given the user, to access the given database. (sensitive)
db\_name : The database to access.
user\_managed\_replication : The list of location and (optional) kms\_key\_name for secret |
object({
server_ip = string
user = string
password = string # sensitive
db_name = string
user_managed_replication = optional(list(object({
location = string
kms_key_name = optional(string)
})), [])
})
| `null` | no | +| [cloud\_parameters](#input\_cloud\_parameters) | cloud.conf options. Defaults inherited from [Slurm GCP repo](https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/terraform/slurm_cluster/modules/slurm_files/README_TF.md#input_cloud_parameters) |
object({
no_comma_params = optional(bool)
resume_rate = optional(number)
resume_timeout = optional(number)
suspend_rate = optional(number)
suspend_timeout = optional(number)
topology_plugin = optional(string)
topology_param = optional(string)
tree_width = optional(number)
})
| `{}` | no | +| [cloudsql](#input\_cloudsql) | Use this database instead of the one on the controller.
server\_ip : Address of the database server.
user : The user to access the database as.
password : The password, given the user, to access the given database. (sensitive)
db\_name : The database to access.
user\_managed\_replication : The list of location and (optional) kms\_key\_name for secret |
object({
server_ip = string
user = string
password = string # sensitive
db_name = string
user_managed_replication = optional(list(object({
location = string
kms_key_name = optional(string)
})), [])
})
| `null` | no | | [compute\_startup\_script](#input\_compute\_startup\_script) | Startup script used by the compute VMs. | `string` | `"# no-op"` | no | -| [compute\_startup\_scripts\_timeout](#input\_compute\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in compute\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | +| [compute\_startup\_scripts\_timeout](#input\_compute\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in compute\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | | [controller\_startup\_script](#input\_controller\_startup\_script) | Startup script used by the controller VM. | `string` | `"# no-op"` | no | -| [controller\_startup\_scripts\_timeout](#input\_controller\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in controller\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | +| [controller\_startup\_scripts\_timeout](#input\_controller\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in controller\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | | [create\_bucket](#input\_create\_bucket) | Create GCS bucket instead of using an existing one. | `bool` | `true` | no | | [deployment\_name](#input\_deployment\_name) | Name of the deployment. | `string` | n/a | yes | | [disable\_controller\_public\_ips](#input\_disable\_controller\_public\_ips) | DEPRECATED: Use `enable_controller_public_ips` instead. | `bool` | `null` | no | @@ -273,56 +273,56 @@ limitations under the License. | [disk\_labels](#input\_disk\_labels) | Labels specific to the boot disk. These will be merged with var.labels. | `map(string)` | `{}` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB. | `number` | `50` | no | | [disk\_type](#input\_disk\_type) | Boot disk type, can be either hyperdisk-balanced, pd-ssd, pd-standard, pd-balanced, or pd-extreme. | `string` | `"pd-ssd"` | no | -| [enable\_bigquery\_load](#input\_enable\_bigquery\_load) | Enables loading of cluster job usage into big query.

NOTE: Requires Google Bigquery API. | `bool` | `false` | no | -| [enable\_cleanup\_compute](#input\_enable\_cleanup\_compute) | Enables automatic cleanup of compute nodes and resource policies (e.g.
placement groups) managed by this module, when cluster is destroyed.

*WARNING*: Toggling this off will impact the running workload.
Deployed compute nodes will be destroyed. | `bool` | `true` | no | +| [enable\_bigquery\_load](#input\_enable\_bigquery\_load) | Enables loading of cluster job usage into big query.

NOTE: Requires Google Bigquery API. | `bool` | `false` | no | +| [enable\_cleanup\_compute](#input\_enable\_cleanup\_compute) | Enables automatic cleanup of compute nodes and resource policies (e.g.
placement groups) managed by this module, when cluster is destroyed.

*WARNING*: Toggling this off will impact the running workload.
Deployed compute nodes will be destroyed. | `bool` | `true` | no | | [enable\_confidential\_vm](#input\_enable\_confidential\_vm) | Enable the Confidential VM configuration. Note: the instance image must support option. | `bool` | `false` | no | | [enable\_controller\_public\_ips](#input\_enable\_controller\_public\_ips) | If set to true. The controller will have a random public IP assigned to it. Ignored if access\_config is set. | `bool` | `false` | no | | [enable\_debug\_logging](#input\_enable\_debug\_logging) | Enables debug logging mode. | `bool` | `false` | no | -| [enable\_default\_mounts](#input\_enable\_default\_mounts) | Enable default global network storage from the controller
- /usr/local/etc/slurm
- /etc/munge
- /home
- /apps
Warning: If these are disabled, the slurm etc and munge dirs must be added
manually, or some other mechanism must be used to synchronize the slurm conf
files and the munge key across the cluster. | `bool` | `true` | no | +| [enable\_default\_mounts](#input\_enable\_default\_mounts) | Enable default global network storage from the controller
- /usr/local/etc/slurm
- /etc/munge
- /home
- /apps
Warning: If these are disabled, the slurm etc and munge dirs must be added
manually, or some other mechanism must be used to synchronize the slurm conf
files and the munge key across the cluster. | `bool` | `true` | no | | [enable\_devel](#input\_enable\_devel) | DEPRECATED: `enable_devel` is always on. | `bool` | `null` | no | -| [enable\_external\_prolog\_epilog](#input\_enable\_external\_prolog\_epilog) | Automatically enable a script that will execute prolog and epilog scripts
shared by NFS from the controller to compute nodes. Find more details at:
https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/tools/prologs-epilogs/README.md | `bool` | `null` | no | -| [enable\_oslogin](#input\_enable\_oslogin) | Enables Google Cloud os-login for user login and authentication for VMs.
See https://cloud.google.com/compute/docs/oslogin | `bool` | `true` | no | +| [enable\_external\_prolog\_epilog](#input\_enable\_external\_prolog\_epilog) | Automatically enable a script that will execute prolog and epilog scripts
shared by NFS from the controller to compute nodes. Find more details at:
https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/tools/prologs-epilogs/README.md | `bool` | `null` | no | +| [enable\_oslogin](#input\_enable\_oslogin) | Enables Google Cloud os-login for user login and authentication for VMs.
See https://cloud.google.com/compute/docs/oslogin | `bool` | `true` | no | | [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Enable the Shielded VM configuration. Note: the instance image must support option. | `bool` | `false` | no | | [enable\_slurm\_gcp\_plugins](#input\_enable\_slurm\_gcp\_plugins) | Enables calling hooks in scripts/slurm\_gcp\_plugins during cluster resume and suspend. | `any` | `false` | no | | [enable\_smt](#input\_enable\_smt) | Enables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no | -| [endpoint\_versions](#input\_endpoint\_versions) | Version of the API to use (The compute service is the only API currently supported) |
object({
compute = string
})
|
{
"compute": "beta"
}
| no | -| [epilog\_scripts](#input\_epilog\_scripts) | List of scripts to be used for Epilog. Programs for the slurmd to execute
on every node when a user's job completes.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Epilog. |
list(object({
filename = string
content = optional(string)
source = optional(string)
}))
| `[]` | no | +| [endpoint\_versions](#input\_endpoint\_versions) | Version of the API to use (The compute service is the only API currently supported) |
object({
compute = string
})
|
{
"compute": "beta"
}
| no | +| [epilog\_scripts](#input\_epilog\_scripts) | List of scripts to be used for Epilog. Programs for the slurmd to execute
on every node when a user's job completes.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Epilog. |
list(object({
filename = string
content = optional(string)
source = optional(string)
}))
| `[]` | no | | [extra\_logging\_flags](#input\_extra\_logging\_flags) | The only available flag is `trace_api` | `map(bool)` | `{}` | no | | [gcloud\_path\_override](#input\_gcloud\_path\_override) | Directory of the gcloud executable to be used during cleanup | `string` | `""` | no | -| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-6-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | -| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-6-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | +| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | | [instance\_template](#input\_instance\_template) | DEPRECATED: Instance template can not be specified for controller. | `string` | `null` | no | | [labels](#input\_labels) | Labels, provided as a map. | `map(string)` | `{}` | no | -| [login\_network\_storage](#input\_login\_network\_storage) | An array of network attached storage mounts to be configured on all login nodes. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
}))
| `[]` | no | -| [login\_nodes](#input\_login\_nodes) | List of slurm login instance definitions. |
list(object({
name_prefix = string
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
additional_networks = optional(list(object({
access_config = optional(list(object({
nat_ip = string
network_tier = string
})), [])
alias_ip_range = optional(list(object({
ip_cidr_range = string
subnetwork_range_name = string
})), [])
ipv6_access_config = optional(list(object({
network_tier = string
})), [])
network = optional(string)
network_ip = optional(string, "")
nic_type = optional(string)
queue_count = optional(number)
stack_type = optional(string)
subnetwork = optional(string)
subnetwork_project = optional(string)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string, "n1-standard-1")
enable_confidential_vm = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
num_instances = optional(number, 1)
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
static_ips = optional(list(string), [])
subnetwork = string
spot = optional(bool, false)
tags = optional(list(string), [])
zone = optional(string)
termination_action = optional(string)
}))
| `[]` | no | +| [login\_network\_storage](#input\_login\_network\_storage) | An array of network attached storage mounts to be configured on all login nodes. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
}))
| `[]` | no | +| [login\_nodes](#input\_login\_nodes) | List of slurm login instance definitions. |
list(object({
name_prefix = string
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
additional_networks = optional(list(object({
access_config = optional(list(object({
nat_ip = string
network_tier = string
})), [])
alias_ip_range = optional(list(object({
ip_cidr_range = string
subnetwork_range_name = string
})), [])
ipv6_access_config = optional(list(object({
network_tier = string
})), [])
network = optional(string)
network_ip = optional(string, "")
nic_type = optional(string)
queue_count = optional(number)
stack_type = optional(string)
subnetwork = optional(string)
subnetwork_project = optional(string)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string, "n1-standard-1")
enable_confidential_vm = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
num_instances = optional(number, 1)
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
static_ips = optional(list(string), [])
subnetwork = string
spot = optional(bool, false)
tags = optional(list(string), [])
zone = optional(string)
termination_action = optional(string)
}))
| `[]` | no | | [login\_startup\_script](#input\_login\_startup\_script) | Startup script used by the login VMs. | `string` | `"# no-op"` | no | -| [login\_startup\_scripts\_timeout](#input\_login\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in login\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | +| [login\_startup\_scripts\_timeout](#input\_login\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in login\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | | [machine\_type](#input\_machine\_type) | Machine type to create. | `string` | `"c2-standard-4"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map. | `map(string)` | `{}` | no | -| [min\_cpu\_platform](#input\_min\_cpu\_platform) | Specifies a minimum CPU platform. Applicable values are the friendly names of
CPU platforms, such as Intel Haswell or Intel Skylake. See the complete list:
https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform | `string` | `null` | no | -| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured on all instances. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
}))
| `[]` | no | -| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, true)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
startup_script = optional(list(object({
filename = string
content = string })), [])

zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
| `[]` | no | -| [nodeset\_dyn](#input\_nodeset\_dyn) | Defines dynamic nodesets, as a list. |
list(object({
nodeset_name = string
nodeset_feature = string
}))
| `[]` | no | -| [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 5)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, false)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
project_id = string
reserved = optional(string, false)
}))
| `[]` | no | +| [min\_cpu\_platform](#input\_min\_cpu\_platform) | Specifies a minimum CPU platform. Applicable values are the friendly names of
CPU platforms, such as Intel Haswell or Intel Skylake. See the complete list:
https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform | `string` | `null` | no | +| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured on all instances. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
}))
| `[]` | no | +| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, true)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
startup_script = optional(list(object({
filename = string
content = string })), [])

zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
| `[]` | no | +| [nodeset\_dyn](#input\_nodeset\_dyn) | Defines dynamic nodesets, as a list. |
list(object({
nodeset_name = string
nodeset_feature = string
}))
| `[]` | no | +| [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 5)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, false)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
project_id = string
reserved = optional(string, false)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy. | `string` | `"MIGRATE"` | no | -| [partitions](#input\_partitions) | Cluster partitions as a list. See module slurm\_partition. |
list(object({
partition_name = string
partition_conf = optional(map(string), {})
partition_nodeset = optional(list(string), [])
partition_nodeset_dyn = optional(list(string), [])
partition_nodeset_tpu = optional(list(string), [])
enable_job_exclusive = optional(bool, false)
}))
| n/a | yes | +| [partitions](#input\_partitions) | Cluster partitions as a list. See module slurm\_partition. |
list(object({
partition_name = string
partition_conf = optional(map(string), {})
partition_nodeset = optional(list(string), [])
partition_nodeset_dyn = optional(list(string), [])
partition_nodeset_tpu = optional(list(string), [])
enable_job_exclusive = optional(bool, false)
}))
| n/a | yes | | [preemptible](#input\_preemptible) | Allow the instance to be preempted. | `bool` | `false` | no | | [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes | -| [prolog\_scripts](#input\_prolog\_scripts) | List of scripts to be used for Prolog. Programs for the slurmd to execute
whenever it is asked to run a job step from a new job allocation.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Prolog. |
list(object({
filename = string
content = optional(string)
source = optional(string)
}))
| `[]` | no | +| [prolog\_scripts](#input\_prolog\_scripts) | List of scripts to be used for Prolog. Programs for the slurmd to execute
whenever it is asked to run a job step from a new job allocation.
See https://slurm.schedmd.com/slurm.conf.html#OPT_Prolog. |
list(object({
filename = string
content = optional(string)
source = optional(string)
}))
| `[]` | no | | [region](#input\_region) | The default region to place resources in. | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | DEPRECATED: Use `service_account_email` and `service_account_scopes` instead. |
object({
email = string
scopes = set(string)
})
| `null` | no | +| [service\_account](#input\_service\_account) | DEPRECATED: Use `service_account_email` and `service_account_scopes` instead. |
object({
email = string
scopes = set(string)
})
| `null` | no | | [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to attach to the controller instance. | `string` | `null` | no | -| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to attach to the controller instance. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | -| [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
enable\_shielded\_vm is 'true'.
enable\_integrity\_monitoring : Compare the most recent boot measurements to the
integrity policy baseline and return a pair of pass/fail results depending on
whether they match or not.
enable\_secure\_boot : Verify the digital signature of all boot components, and
halt the boot process if signature verification fails.
enable\_vtpm : Use a virtualized trusted platform module, which is a
specialized computer chip you can use to encrypt objects like keys and
certificates. |
object({
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
|
{
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
| no | -| [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Cluster name, used for resource naming and slurm accounting.
If not provided it will default to the first 8 characters of the deployment name (removing any invalid characters). | `string` | `null` | no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to attach to the controller instance. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | +| [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
enable\_shielded\_vm is 'true'.
enable\_integrity\_monitoring : Compare the most recent boot measurements to the
integrity policy baseline and return a pair of pass/fail results depending on
whether they match or not.
enable\_secure\_boot : Verify the digital signature of all boot components, and
halt the boot process if signature verification fails.
enable\_vtpm : Use a virtualized trusted platform module, which is a
specialized computer chip you can use to encrypt objects like keys and
certificates. |
object({
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
|
{
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
| no | +| [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Cluster name, used for resource naming and slurm accounting.
If not provided it will default to the first 8 characters of the deployment name (removing any invalid characters). | `string` | `null` | no | | [slurm\_conf\_tpl](#input\_slurm\_conf\_tpl) | Slurm slurm.conf template file path. | `string` | `null` | no | | [slurmdbd\_conf\_tpl](#input\_slurmdbd\_conf\_tpl) | Slurm slurmdbd.conf template file path. | `string` | `null` | no | | [static\_ips](#input\_static\_ips) | List of static IPs for VM instances. | `list(string)` | `[]` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | n/a | yes | | [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no | | [universe\_domain](#input\_universe\_domain) | Domain address for alternate API universe | `string` | `"googleapis.com"` | no | -| [zone](#input\_zone) | Zone where the instances should be created. If not specified, instances will be
spread across available zones in the region. | `string` | `null` | no | +| [zone](#input\_zone) | Zone where the instances should be created. If not specified, instances will be
spread across available zones in the region. | `string` | `null` | no | ## Outputs diff --git a/modules/compute/gke-node-pool/README.md b/modules/compute/gke-node-pool/README.md index c45a3ed83e..21cb2f9daf 100644 --- a/modules/compute/gke-node-pool/README.md +++ b/modules/compute/gke-node-pool/README.md @@ -297,12 +297,14 @@ limitations under the License. | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = optional(string)
count = optional(number, 0)
gpu_driver_installation_config = optional(list(object({
gpu_driver_version = string
})))
gpu_partition_size = optional(string)
gpu_sharing_config = optional(list(object({
gpu_sharing_strategy = optional(string)
max_shared_clients_per_gpu = optional(number)
})))
}))
| `null` | no | | [host\_maintenance\_interval](#input\_host\_maintenance\_interval) | Specifies the frequency of planned maintenance events. | `string` | `""` | no | | [image\_type](#input\_image\_type) | The default image type used by NAP once a new node pool is being created. Use either COS\_CONTAINERD or UBUNTU\_CONTAINERD. | `string` | `"COS_CONTAINERD"` | no | +| [is\_gke\_sandbox](#input\_is\_gke\_sandbox) | Temporary variable to identify the GKE sandbox environment | `bool` | `false` | no | | [kubernetes\_labels](#input\_kubernetes\_labels) | Kubernetes labels to be applied to each node in the node group. Key-value pairs.
(The `kubernetes.io/` and `k8s.io/` prefixes are reserved by Kubernetes Core components and cannot be specified) | `map(string)` | `null` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [local\_ssd\_count\_ephemeral\_storage](#input\_local\_ssd\_count\_ephemeral\_storage) | The number of local SSDs to attach to each node to back ephemeral storage.
Uses NVMe interfaces. Must be supported by `machine_type`.
When set to null, default value either is [set based on machine\_type](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) or GKE decides about default value.
[See above](#local-ssd-storage) for more info. | `number` | `null` | no | | [local\_ssd\_count\_nvme\_block](#input\_local\_ssd\_count\_nvme\_block) | The number of local SSDs to attach to each node to back block storage.
Uses NVMe interfaces. Must be supported by `machine_type`.
When set to null, default value either is [set based on machine\_type](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) or GKE decides about default value.
[See above](#local-ssd-storage) for more info. | `number` | `null` | no | | [machine\_type](#input\_machine\_type) | The name of a Google Compute Engine machine type. | `string` | `"c2-standard-60"` | no | | [name](#input\_name) | The name of the node pool. If left blank, will default to the machine type. | `string` | `null` | no | +| [node\_version](#input\_node\_version) | Temporary variable to explicitly set the node version | `string` | `null` | no | | [placement\_policy](#input\_placement\_policy) | Group placement policy to use for the node pool's nodes. `COMPACT` is the only supported value for `type` currently. `name` is the name of the placement policy.
It is assumed that the specified policy exists. To create a placement policy refer to https://cloud.google.com/sdk/gcloud/reference/compute/resource-policies/create/group-placement.
Note: Placement policies have the [following](https://cloud.google.com/compute/docs/instances/placement-policies-overview#restrictions-compact-policies) restrictions. |
object({
type = string
name = optional(string)
})
|
{
"name": null,
"type": null
}
| no | | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | | [reservation\_affinity](#input\_reservation\_affinity) | Reservation resource to consume. When targeting SPECIFIC\_RESERVATION, specific\_reservations needs be specified.
Even though specific\_reservations is a list, only one reservation is allowed by the NodePool API.
It is assumed that the specified reservation exists and has available capacity.
For a shared reservation, specify the project\_id as well in which it was created.
To create a reservation refer to https://cloud.google.com/compute/docs/instances/reservations-single-project and https://cloud.google.com/compute/docs/instances/reservations-shared |
object({
consume_reservation_type = string
specific_reservations = optional(list(object({
name = string
project = optional(string)
})))
})
|
{
"consume_reservation_type": "NO_RESERVATION",
"specific_reservations": []
}
| no | diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 59cbe1d911..9c126dcc65 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -44,6 +44,7 @@ resource "google_container_node_pool" "node_pool" { name = var.name == null ? var.machine_type : var.name cluster = var.cluster_id node_locations = var.zones + version = var.is_gke_sandbox ? var.node_version : null node_count = var.static_node_count dynamic "autoscaling" { diff --git a/modules/compute/gke-node-pool/variables.tf b/modules/compute/gke-node-pool/variables.tf index 6e24edaa02..877fae56a8 100644 --- a/modules/compute/gke-node-pool/variables.tf +++ b/modules/compute/gke-node-pool/variables.tf @@ -354,3 +354,17 @@ variable "host_maintenance_interval" { error_message = "Invalid host_maintenance_interval value. Must be PERIODIC, AS_NEEDED or the empty string" } } + +# REMOVE_ME: It's a temporary variable used in internal testing +variable "is_gke_sandbox" { + description = "Temporary variable to identify the GKE sandbox environment" + default = false + type = bool +} + +# REMOVE_ME: It's a temporary variable used in internal testing +variable "node_version" { + description = "Temporary variable to explicitly set the node version" + type = string + default = null +} diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index ce1b93f949..e5e4ffe63d 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -206,43 +206,43 @@ limitations under the License. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [add\_deployment\_name\_before\_prefix](#input\_add\_deployment\_name\_before\_prefix) | If true, the names of VMs and disks will always be prefixed with `deployment_name` to enable uniqueness across deployments.
See `name_prefix` for further details on resource naming behavior. | `bool` | `false` | no | -| [allocate\_ip](#input\_allocate\_ip) | If not null, allocate IPs with the given configuration. See details at
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_address |
object({
address_type = optional(string, "INTERNAL")
purpose = optional(string),
network_tier = optional(string),
ip_version = optional(string, "IPV4"),
})
| `null` | no | -| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
only available on supported images (or images derived from them). For more details, see
https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates | `bool` | `true` | no | +| [add\_deployment\_name\_before\_prefix](#input\_add\_deployment\_name\_before\_prefix) | If true, the names of VMs and disks will always be prefixed with `deployment_name` to enable uniqueness across deployments.
See `name_prefix` for further details on resource naming behavior. | `bool` | `false` | no | +| [allocate\_ip](#input\_allocate\_ip) | If not null, allocate IPs with the given configuration. See details at
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_address |
object({
address_type = optional(string, "INTERNAL")
purpose = optional(string),
network_tier = optional(string),
ip_version = optional(string, "IPV4"),
})
| `null` | no | +| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
only available on supported images (or images derived from them). For more details, see
https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates | `bool` | `true` | no | | [auto\_delete\_boot\_disk](#input\_auto\_delete\_boot\_disk) | Controls if boot disk should be auto-deleted when instance is deleted. | `bool` | `true` | no | | [automatic\_restart](#input\_automatic\_restart) | Specifies if the instance should be restarted if it was terminated by Compute Engine (not a user). | `bool` | `null` | no | -| [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image that supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no | +| [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image that supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no | | [deployment\_name](#input\_deployment\_name) | Name of the deployment, will optionally be used name resources according to `name_prefix` | `string` | n/a | yes | | [disable\_public\_ips](#input\_disable\_public\_ips) | If set to true, instances will not have public IPs | `bool` | `false` | no | | [disk\_size\_gb](#input\_disk\_size\_gb) | Size of disk for instances. | `number` | `200` | no | | [disk\_type](#input\_disk\_type) | Disk type for instances. | `string` | `"pd-standard"` | no | | [enable\_oslogin](#input\_enable\_oslogin) | Enable or Disable OS Login with "ENABLE" or "DISABLE". Set to "INHERIT" to inherit project OS Login setting. | `string` | `"ENABLE"` | no | -| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | | [instance\_count](#input\_instance\_count) | Number of instances | `number` | `1` | no | -| [instance\_image](#input\_instance\_image) | Instance Image | `map(string)` |
{
"family": "hpc-rocky-linux-8",
"project": "cloud-hpc-image-public"
}
| no | +| [instance\_image](#input\_instance\_image) | Instance Image | `map(string)` |
{
"family": "hpc-rocky-linux-8",
"project": "cloud-hpc-image-public"
}
| no | | [labels](#input\_labels) | Labels to add to the instances. Key-value pairs. | `map(string)` | n/a | yes | | [local\_ssd\_count](#input\_local\_ssd\_count) | The number of local SSDs to attach to each VM. See https://cloud.google.com/compute/docs/disks/local-ssd. | `number` | `0` | no | | [local\_ssd\_interface](#input\_local\_ssd\_interface) | Interface to be used with local SSDs. Can be either 'NVME' or 'SCSI'. No effect unless `local_ssd_count` is also set. | `string` | `"NVME"` | no | | [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation | `string` | `"c2-standard-60"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | | [min\_cpu\_platform](#input\_min\_cpu\_platform) | The name of the minimum CPU platform that you want the instance to use. | `string` | `null` | no | -| [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
If not supplied, `deployment_name` will be used.
When `name_prefix` is supplied, and `add_deployment_name_before_prefix` is set,
then resources are named by "<`deployment_name`>-<`name_prefix`>-<#>". | `string` | `null` | no | -| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface

**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `disable_public_ips` also do not apply
to network interfaces defined in this variable.

Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | +| [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
If not supplied, `deployment_name` will be used.
When `name_prefix` is supplied, and `add_deployment_name_before_prefix` is set,
then resources are named by "<`deployment_name`>-<`name_prefix`>-<#>". | `string` | `null` | no | +| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network\_interface block of google\_compute\_instance. For descriptions of the
subfields or more information see the documentation:
https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_instance#nested_network_interface

**\_NOTE:\_** If `network_interfaces` are set, `network_self_link` and
`subnetwork_self_link` will be ignored, even if they are provided through
the `use` field. `bandwidth_tier` and `disable_public_ips` also do not apply
to network interfaces defined in this variable.

Subfields:
network (string, required if subnetwork is not supplied)
subnetwork (string, required if network is not supplied)
subnetwork\_project (string, optional)
network\_ip (string, optional)
nic\_type (string, optional, choose from ["GVNIC", "VIRTIO\_NET"])
stack\_type (string, optional, choose from ["IPV4\_ONLY", "IPV4\_IPV6"])
queue\_count (number, optional)
access\_config (object, optional)
ipv6\_access\_config (object, optional)
alias\_ip\_range (list(object), optional) |
list(object({
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
| `[]` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. Can use "default" for the default network. | `string` | `null` | no | -| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | +| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `null` | no | -| [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone.
See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_resource_policy#nested_group_placement_policy | `any` | `null` | no | +| [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone.
See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_resource_policy#nested_group_placement_policy | `any` | `null` | no | | [pre\_existing\_placement\_policy](#input\_pre\_existing\_placement\_policy) | Use pre-existing placement policy within the project specified | `string` | `null` | no | | [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created | `string` | n/a | yes | | [region](#input\_region) | The region to deploy to | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | DEPRECATED - Use `service_account_email` and `service_account_scopes` instead. |
object({
email = string,
scopes = set(string)
})
| `null` | no | +| [service\_account](#input\_service\_account) | DEPRECATED - Use `service_account_email` and `service_account_scopes` instead. |
object({
email = string,
scopes = set(string)
})
| `null` | no | | [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to use with the node pool | `string` | `null` | no | -| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to to use with the node pool. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to to use with the node pool. | `set(string)` |
[
"https://www.googleapis.com/auth/cloud-platform"
]
| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | | [startup\_script](#input\_startup\_script) | Startup script used on the instance | `string` | `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to attach the VM. | `string` | `null` | no | | [tags](#input\_tags) | Network tags, provided as a list | `list(string)` | `[]` | no | -| [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
to 2, Simultaneous Multithreading (SMT) is enabled extending the total number
of virtual cores. For example, a machine of type c2-standard-60 will have 60
virtual cores with threads\_per\_core equal to 2. With threads\_per\_core equal
to 1 (SMT turned off), only the 30 physical cores will be available on the VM.

The default value of \"0\" will turn off SMT for supported machine types, and
will fall back to GCE defaults for unsupported machine types (t2d, shared-core
instances, or instances with less than 2 vCPU).

Disabling SMT can be more performant in many HPC workloads, therefore it is
disabled by default where compatible.

null = SMT configuration will use the GCE defaults for the machine type
0 = SMT will be disabled where compatible (default)
1 = SMT will always be disabled (will fail on incompatible machine types)
2 = SMT will always be enabled (will fail on incompatible machine types) | `number` | `0` | no | +| [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
to 2, Simultaneous Multithreading (SMT) is enabled extending the total number
of virtual cores. For example, a machine of type c2-standard-60 will have 60
virtual cores with threads\_per\_core equal to 2. With threads\_per\_core equal
to 1 (SMT turned off), only the 30 physical cores will be available on the VM.

The default value of \"0\" will turn off SMT for supported machine types, and
will fall back to GCE defaults for unsupported machine types (t2d, shared-core
instances, or instances with less than 2 vCPU).

Disabling SMT can be more performant in many HPC workloads, therefore it is
disabled by default where compatible.

null = SMT configuration will use the GCE defaults for the machine type
0 = SMT will be disabled where compatible (default)
1 = SMT will always be disabled (will fail on incompatible machine types)
2 = SMT will always be enabled (will fail on incompatible machine types) | `number` | `0` | no | | [zone](#input\_zone) | Compute Platform zone | `string` | n/a | yes | ## Outputs diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index 3a72e1149b..b39e159e39 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -159,6 +159,7 @@ limitations under the License. | [enable\_private\_ipv6\_google\_access](#input\_enable\_private\_ipv6\_google\_access) | The private IPv6 google access type for the VMs in this subnet. | `bool` | `true` | no | | [enable\_private\_nodes](#input\_enable\_private\_nodes) | (Beta) Whether nodes have internal IP addresses only. | `bool` | `true` | no | | [gcp\_public\_cidrs\_access\_enabled](#input\_gcp\_public\_cidrs\_access\_enabled) | Whether the cluster master is accessible via all the Google Compute Engine Public IPs. To view this list of IP addresses look here https://cloud.google.com/compute/docs/faq#find_ip_range | `bool` | `false` | no | +| [is\_gke\_sandbox](#input\_is\_gke\_sandbox) | Temporary variable to identify the GKE sandbox environment | `bool` | `false` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [maintenance\_exclusions](#input\_maintenance\_exclusions) | List of maintenance exclusions. A cluster can have up to three. |
list(object({
name = string
start_time = string
end_time = string
exclusion_scope = string
}))
| `[]` | no | | [maintenance\_start\_time](#input\_maintenance\_start\_time) | Start time for daily maintenance operations. Specified in GMT with `HH:MM` format. | `string` | `"09:00"` | no | @@ -170,6 +171,7 @@ limitations under the License. | [pods\_ip\_range\_name](#input\_pods\_ip\_range\_name) | The name of the secondary subnet ip range to use for pods. | `string` | `"pods"` | no | | [prefix\_with\_deployment\_name](#input\_prefix\_with\_deployment\_name) | If true, cluster name will be prefixed by `deployment_name` (ex: -). | `bool` | `true` | no | | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | +| [rdma\_subnetwork\_name\_prefix](#input\_rdma\_subnetwork\_name\_prefix) | Prefix of the RDMA subnetwork names | `string` | `null` | no | | [region](#input\_region) | The region to host the cluster in. | `string` | n/a | yes | | [release\_channel](#input\_release\_channel) | The release channel of this cluster. Accepted values are `UNSPECIFIED`, `RAPID`, `REGULAR` and `STABLE`. | `string` | `"UNSPECIFIED"` | no | | [service\_account](#input\_service\_account) | DEPRECATED: use service\_account\_email and scopes. |
object({
email = string,
scopes = set(string)
})
| `null` | no | @@ -187,6 +189,7 @@ limitations under the License. | [system\_node\_pool\_taints](#input\_system\_node\_pool\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "components.gke.io/gke-managed-components",
"value": true
}
]
| no | | [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no | | [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no | +| [zone](#input\_zone) | Zone | `string` | `null` | no | ## Outputs diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 480d5b7d58..72514b2414 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -36,6 +36,44 @@ locals { # multi networking needs enabled Dataplane v2 derived_enable_dataplane_v2 = coalesce(var.enable_dataplane_v2, local.derived_enable_multi_networking) + + rdma_networks = [for network_info in var.additional_networks : network_info if strcontains(upper(network_info.nic_type), "RDMA")] + non_rdma_networks = [for network_info in var.additional_networks : network_info if !strcontains(upper(network_info.nic_type), "RDMA")] + apply_manifests_rdma_networks = flatten([ + for idx, network_info in local.rdma_networks : [ + { + source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", + template_vars = { + name = "${var.rdma_subnetwork_name_prefix}-${idx}", + network_name = network_info.network + subnetwork_name = "${var.rdma_subnetwork_name_prefix}-${idx}", + device_mode = "RDMA" + } + }, + { + source = "${path.module}/templates/network-object.yaml.tftpl", + template_vars = { name = "${var.rdma_subnetwork_name_prefix}-${idx}" } + } + ] + ]) + + apply_manifests_non_rdma_networks = flatten([ + for idx, network_info in local.non_rdma_networks : [ + { + source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", + template_vars = { + name = network_info.subnetwork + network_name = network_info.network + subnetwork_name = network_info.subnetwork + device_mode = "NetDevice" + } + }, + { + source = "${path.module}/templates/network-object.yaml.tftpl", + template_vars = { name = network_info.subnetwork } + } + ] + ]) } data "google_compute_default_service_account" "default_sa" { @@ -47,7 +85,7 @@ resource "google_container_cluster" "gke_cluster" { project = var.project_id name = local.name - location = var.region + location = var.is_gke_sandbox ? var.zone : var.region resource_labels = local.labels # decouple node pool lifecycle from cluster life cycle @@ -59,6 +97,10 @@ resource "google_container_cluster" "gke_cluster" { network = var.network_id subnetwork = var.subnetwork_self_link + # Note: Though the default value of VPC_NATIVE is sufficient to enable IP Aliasing, + # It makes sense to let that argument be explicit so that it remains in our consideration when upgrading the provider. + # Because, in the newer provider versions the default may change + networking_mode = "VPC_NATIVE" # Note: the existence of the "master_authorized_networks_config" block enables # the master authorized networks even if it's empty. @@ -196,9 +238,12 @@ resource "google_container_node_pool" "system_node_pools" { provider = google-beta count = var.system_node_pool_enabled ? 1 : 0 - project = var.project_id - name = var.system_node_pool_name - cluster = google_container_cluster.gke_cluster.self_link + project = var.project_id + name = var.system_node_pool_name + cluster = var.is_gke_sandbox ? google_container_cluster.gke_cluster.name : google_container_cluster.gke_cluster.self_link + version = var.min_master_version + location = var.is_gke_sandbox ? var.zone : null + autoscaling { total_min_node_count = var.system_node_pool_node_count.total_min_nodes total_max_node_count = var.system_node_pool_node_count.total_max_nodes @@ -338,20 +383,5 @@ module "kubectl_apply" { cluster_id = google_container_cluster.gke_cluster.id project_id = var.project_id - apply_manifests = flatten([ - for idx, network_info in var.additional_networks : [ - { - source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", - template_vars = { - name = "vpc${idx + 1}", - network_name = network_info.network - subnetwork_name = network_info.subnetwork - } - }, - { - source = "${path.module}/templates/network-object.yaml.tftpl", - template_vars = { name = "vpc${idx + 1}" } - } - ] - ]) + apply_manifests = concat(local.apply_manifests_non_rdma_networks, local.apply_manifests_rdma_networks) } diff --git a/modules/scheduler/gke-cluster/templates/gke-network-paramset.yaml.tftpl b/modules/scheduler/gke-cluster/templates/gke-network-paramset.yaml.tftpl index fb7f0dba83..d376a1a760 100644 --- a/modules/scheduler/gke-cluster/templates/gke-network-paramset.yaml.tftpl +++ b/modules/scheduler/gke-cluster/templates/gke-network-paramset.yaml.tftpl @@ -6,4 +6,4 @@ metadata: spec: vpc: ${network_name} vpcSubnet: ${subnetwork_name} - deviceMode: NetDevice + deviceMode: ${device_mode} diff --git a/modules/scheduler/gke-cluster/variables.tf b/modules/scheduler/gke-cluster/variables.tf index e91be6b297..4088eae21c 100644 --- a/modules/scheduler/gke-cluster/variables.tf +++ b/modules/scheduler/gke-cluster/variables.tf @@ -327,3 +327,23 @@ variable "additional_networks" { })) })) } + +variable "rdma_subnetwork_name_prefix" { + description = "Prefix of the RDMA subnetwork names" + default = null + type = string +} + +# REMOVE_ME: It's a temporary variable used in internal testing +variable "is_gke_sandbox" { + description = "Temporary variable to identify the GKE sandbox environment" + default = false + type = bool +} + +# REMOVE_ME: It's a temporary variable used in internal testing +variable "zone" { + description = "Zone" + default = null + type = string +} diff --git a/modules/scheduler/pre-existing-gke-cluster/README.md b/modules/scheduler/pre-existing-gke-cluster/README.md index 519715480d..ada5676eb8 100644 --- a/modules/scheduler/pre-existing-gke-cluster/README.md +++ b/modules/scheduler/pre-existing-gke-cluster/README.md @@ -103,6 +103,7 @@ limitations under the License. | [additional\_networks](#input\_additional\_networks) | Additional network interface details for GKE, if any. Providing additional networks creates relevat network objects on the cluster. |
list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
| `[]` | no | | [cluster\_name](#input\_cluster\_name) | Name of the existing cluster | `string` | n/a | yes | | [project\_id](#input\_project\_id) | Project that hosts the existing cluster | `string` | n/a | yes | +| [rdma\_subnetwork\_name\_prefix](#input\_rdma\_subnetwork\_name\_prefix) | Prefix of the RDMA subnetwork names | `string` | `null` | no | | [region](#input\_region) | Region in which to search for the cluster | `string` | n/a | yes | ## Outputs diff --git a/modules/scheduler/pre-existing-gke-cluster/main.tf b/modules/scheduler/pre-existing-gke-cluster/main.tf index 4b65ebe365..d8d3171361 100644 --- a/modules/scheduler/pre-existing-gke-cluster/main.tf +++ b/modules/scheduler/pre-existing-gke-cluster/main.tf @@ -20,26 +20,51 @@ data "google_container_cluster" "existing_gke_cluster" { location = var.region } -module "kubectl_apply" { - source = "../../management/kubectl-apply" # can point to github - - cluster_id = data.google_container_cluster.existing_gke_cluster.id - project_id = var.project_id +locals { + rdma_networks = [for network_info in var.additional_networks : network_info if strcontains(upper(network_info.nic_type), "RDMA")] + non_rdma_networks = [for network_info in var.additional_networks : network_info if !strcontains(upper(network_info.nic_type), "RDMA")] + apply_manifests_rdma_networks = flatten([ + for idx, network_info in local.rdma_networks : [ + { + source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", + template_vars = { + name = "${var.rdma_subnetwork_name_prefix}-${idx}", + network_name = network_info.network + subnetwork_name = "${var.rdma_subnetwork_name_prefix}-${idx}", + device_mode = "RDMA" + } + }, + { + source = "${path.module}/templates/network-object.yaml.tftpl", + template_vars = { name = "${var.rdma_subnetwork_name_prefix}-${idx}" } + } + ] + ]) - apply_manifests = flatten([ - for idx, network_info in var.additional_networks : [ + apply_manifests_non_rdma_networks = flatten([ + for idx, network_info in local.non_rdma_networks : [ { source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", template_vars = { - name = "vpc${idx + 1}", + name = network_info.subnetwork network_name = network_info.network subnetwork_name = network_info.subnetwork + device_mode = "NetDevice" } }, { source = "${path.module}/templates/network-object.yaml.tftpl", - template_vars = { name = "vpc${idx + 1}" } + template_vars = { name = network_info.subnetwork } } ] ]) } + +module "kubectl_apply" { + source = "../../management/kubectl-apply" # can point to github + + cluster_id = data.google_container_cluster.existing_gke_cluster.id + project_id = var.project_id + + apply_manifests = concat(local.apply_manifests_non_rdma_networks, local.apply_manifests_rdma_networks) +} diff --git a/modules/scheduler/pre-existing-gke-cluster/templates/gke-network-paramset.yaml.tftpl b/modules/scheduler/pre-existing-gke-cluster/templates/gke-network-paramset.yaml.tftpl index fb7f0dba83..d376a1a760 100644 --- a/modules/scheduler/pre-existing-gke-cluster/templates/gke-network-paramset.yaml.tftpl +++ b/modules/scheduler/pre-existing-gke-cluster/templates/gke-network-paramset.yaml.tftpl @@ -6,4 +6,4 @@ metadata: spec: vpc: ${network_name} vpcSubnet: ${subnetwork_name} - deviceMode: NetDevice + deviceMode: ${device_mode} diff --git a/modules/scheduler/pre-existing-gke-cluster/variables.tf b/modules/scheduler/pre-existing-gke-cluster/variables.tf index 67e7a24dca..9e9ed98ed3 100644 --- a/modules/scheduler/pre-existing-gke-cluster/variables.tf +++ b/modules/scheduler/pre-existing-gke-cluster/variables.tf @@ -53,3 +53,9 @@ variable "additional_networks" { })) })) } + +variable "rdma_subnetwork_name_prefix" { + description = "Prefix of the RDMA subnetwork names" + default = null + type = string +} From 5ca7d82e60599865b5d3490c6b317bb51a71d72c Mon Sep 17 00:00:00 2001 From: Atul Rajmane Date: Fri, 11 Oct 2024 13:41:50 +0000 Subject: [PATCH 10/26] Address Feedback --- community/modules/network/rdma-vpc/main.tf | 2 +- modules/compute/gke-node-pool/README.md | 1 - modules/compute/gke-node-pool/main.tf | 2 +- modules/compute/gke-node-pool/variables.tf | 8 -------- modules/scheduler/gke-cluster/README.md | 5 +++-- modules/scheduler/gke-cluster/main.tf | 19 ++++++++++++------- modules/scheduler/gke-cluster/variables.tf | 18 +++++++++++------- .../pre-existing-gke-cluster/main.tf | 2 +- 8 files changed, 29 insertions(+), 28 deletions(-) diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index a166599c58..85f2125209 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -132,7 +132,7 @@ locals { for subnet in module.vpc.subnets : { network = local.network_name subnetwork = subnet.name - subnetwork_project = null + subnetwork_project = var.project_id network_ip = "" nic_type = coalesce(var.nic_type, try(regex("IRDMA", local.profile_name), regex("MRDMA", local.profile_name), "RDMA")) stack_type = null diff --git a/modules/compute/gke-node-pool/README.md b/modules/compute/gke-node-pool/README.md index 21cb2f9daf..d487efc5cd 100644 --- a/modules/compute/gke-node-pool/README.md +++ b/modules/compute/gke-node-pool/README.md @@ -297,7 +297,6 @@ limitations under the License. | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = optional(string)
count = optional(number, 0)
gpu_driver_installation_config = optional(list(object({
gpu_driver_version = string
})))
gpu_partition_size = optional(string)
gpu_sharing_config = optional(list(object({
gpu_sharing_strategy = optional(string)
max_shared_clients_per_gpu = optional(number)
})))
}))
| `null` | no | | [host\_maintenance\_interval](#input\_host\_maintenance\_interval) | Specifies the frequency of planned maintenance events. | `string` | `""` | no | | [image\_type](#input\_image\_type) | The default image type used by NAP once a new node pool is being created. Use either COS\_CONTAINERD or UBUNTU\_CONTAINERD. | `string` | `"COS_CONTAINERD"` | no | -| [is\_gke\_sandbox](#input\_is\_gke\_sandbox) | Temporary variable to identify the GKE sandbox environment | `bool` | `false` | no | | [kubernetes\_labels](#input\_kubernetes\_labels) | Kubernetes labels to be applied to each node in the node group. Key-value pairs.
(The `kubernetes.io/` and `k8s.io/` prefixes are reserved by Kubernetes Core components and cannot be specified) | `map(string)` | `null` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [local\_ssd\_count\_ephemeral\_storage](#input\_local\_ssd\_count\_ephemeral\_storage) | The number of local SSDs to attach to each node to back ephemeral storage.
Uses NVMe interfaces. Must be supported by `machine_type`.
When set to null, default value either is [set based on machine\_type](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) or GKE decides about default value.
[See above](#local-ssd-storage) for more info. | `number` | `null` | no | diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 9c126dcc65..356377abea 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -44,7 +44,7 @@ resource "google_container_node_pool" "node_pool" { name = var.name == null ? var.machine_type : var.name cluster = var.cluster_id node_locations = var.zones - version = var.is_gke_sandbox ? var.node_version : null + version = var.node_version node_count = var.static_node_count dynamic "autoscaling" { diff --git a/modules/compute/gke-node-pool/variables.tf b/modules/compute/gke-node-pool/variables.tf index 877fae56a8..37c19ca201 100644 --- a/modules/compute/gke-node-pool/variables.tf +++ b/modules/compute/gke-node-pool/variables.tf @@ -355,14 +355,6 @@ variable "host_maintenance_interval" { } } -# REMOVE_ME: It's a temporary variable used in internal testing -variable "is_gke_sandbox" { - description = "Temporary variable to identify the GKE sandbox environment" - default = false - type = bool -} - -# REMOVE_ME: It's a temporary variable used in internal testing variable "node_version" { description = "Temporary variable to explicitly set the node version" type = string diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index b39e159e39..f338452460 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -147,6 +147,8 @@ limitations under the License. | [additional\_networks](#input\_additional\_networks) | Additional network interface details for GKE, if any. Providing additional networks enables multi networking and creates relevat network objects on the cluster. |
list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
| `[]` | no | | [authenticator\_security\_group](#input\_authenticator\_security\_group) | The name of the RBAC security group for use with Google security groups in Kubernetes RBAC. Group name must be in format gke-security-groups@yourdomain.com | `string` | `null` | no | | [autoscaling\_profile](#input\_autoscaling\_profile) | (Beta) Optimize for utilization or availability when deciding to remove nodes. Can be BALANCED or OPTIMIZE\_UTILIZATION. | `string` | `"OPTIMIZE_UTILIZATION"` | no | +| [cluster\_availability\_type](#input\_cluster\_availability\_type) | Type of cluster availability. Possible values are: {REGIONAL, MULTI\_ZONAL} | `string` | `"REGIONAL"` | no | +| [cluster\_reference\_type](#input\_cluster\_reference\_type) | How the google\_container\_node\_pool.system\_node\_pools refers to the cluster. Possible values are: {SELF\_LINK, NAME} | `string` | `"SELF_LINK"` | no | | [configure\_workload\_identity\_sa](#input\_configure\_workload\_identity\_sa) | When true, a kubernetes service account will be created and bound using workload identity to the service account used to create the cluster. | `bool` | `false` | no | | [deployment\_name](#input\_deployment\_name) | Name of the HPC deployment. Used in the GKE cluster name by default and can be configured with `prefix_with_deployment_name`. | `string` | n/a | yes | | [enable\_dataplane\_v2](#input\_enable\_dataplane\_v2) | Enables [Dataplane v2](https://cloud.google.com/kubernetes-engine/docs/concepts/dataplane-v2). This setting is immutable on clusters. If null, will default to false unless using multi-networking, in which case it will default to true | `bool` | `null` | no | @@ -159,7 +161,6 @@ limitations under the License. | [enable\_private\_ipv6\_google\_access](#input\_enable\_private\_ipv6\_google\_access) | The private IPv6 google access type for the VMs in this subnet. | `bool` | `true` | no | | [enable\_private\_nodes](#input\_enable\_private\_nodes) | (Beta) Whether nodes have internal IP addresses only. | `bool` | `true` | no | | [gcp\_public\_cidrs\_access\_enabled](#input\_gcp\_public\_cidrs\_access\_enabled) | Whether the cluster master is accessible via all the Google Compute Engine Public IPs. To view this list of IP addresses look here https://cloud.google.com/compute/docs/faq#find_ip_range | `bool` | `false` | no | -| [is\_gke\_sandbox](#input\_is\_gke\_sandbox) | Temporary variable to identify the GKE sandbox environment | `bool` | `false` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [maintenance\_exclusions](#input\_maintenance\_exclusions) | List of maintenance exclusions. A cluster can have up to three. |
list(object({
name = string
start_time = string
end_time = string
exclusion_scope = string
}))
| `[]` | no | | [maintenance\_start\_time](#input\_maintenance\_start\_time) | Start time for daily maintenance operations. Specified in GMT with `HH:MM` format. | `string` | `"09:00"` | no | @@ -189,7 +190,7 @@ limitations under the License. | [system\_node\_pool\_taints](#input\_system\_node\_pool\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "components.gke.io/gke-managed-components",
"value": true
}
]
| no | | [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no | | [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no | -| [zone](#input\_zone) | Zone | `string` | `null` | no | +| [zone](#input\_zone) | Zone for a zonal cluster | `string` | `null` | no | ## Outputs diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 72514b2414..2a42cd909a 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -85,7 +85,7 @@ resource "google_container_cluster" "gke_cluster" { project = var.project_id name = local.name - location = var.is_gke_sandbox ? var.zone : var.region + location = var.cluster_availability_type == "MULTI_ZONAL" ? var.zone : var.region resource_labels = local.labels # decouple node pool lifecycle from cluster life cycle @@ -97,10 +97,6 @@ resource "google_container_cluster" "gke_cluster" { network = var.network_id subnetwork = var.subnetwork_self_link - # Note: Though the default value of VPC_NATIVE is sufficient to enable IP Aliasing, - # It makes sense to let that argument be explicit so that it remains in our consideration when upgrading the provider. - # Because, in the newer provider versions the default may change - networking_mode = "VPC_NATIVE" # Note: the existence of the "master_authorized_networks_config" block enables # the master authorized networks even if it's empty. @@ -226,6 +222,15 @@ resource "google_container_cluster" "gke_cluster" { condition = !(!coalesce(var.enable_multi_networking, true) && length(var.additional_networks) > 0) error_message = "'enable_multi_networking' cannot be false when using multivpc module, which passes additional_networks." } + precondition { + condition = contains(["REGIONAL", "MULTI_ZONAL"], var.cluster_availability_type) + error_message = "`cluster_availability_type` must be one of {REGIONAL, MULTI_ZONAL}" + } + precondition { + condition = contains(["SELF_LINK", "NAME"], var.cluster_reference_type) + error_message = "`cluster_reference_type` must be one of {SELF_LINK, NAME}" + } + } logging_service = "logging.googleapis.com/kubernetes" @@ -240,9 +245,9 @@ resource "google_container_node_pool" "system_node_pools" { project = var.project_id name = var.system_node_pool_name - cluster = var.is_gke_sandbox ? google_container_cluster.gke_cluster.name : google_container_cluster.gke_cluster.self_link + cluster = var.cluster_reference_type == "NAME" ? google_container_cluster.gke_cluster.name : google_container_cluster.gke_cluster.self_link version = var.min_master_version - location = var.is_gke_sandbox ? var.zone : null + location = var.cluster_availability_type == "MULTI_ZONAL" ? var.zone : null autoscaling { total_min_node_count = var.system_node_pool_node_count.total_min_nodes diff --git a/modules/scheduler/gke-cluster/variables.tf b/modules/scheduler/gke-cluster/variables.tf index 4088eae21c..bbaf07bd4a 100644 --- a/modules/scheduler/gke-cluster/variables.tf +++ b/modules/scheduler/gke-cluster/variables.tf @@ -334,16 +334,20 @@ variable "rdma_subnetwork_name_prefix" { type = string } -# REMOVE_ME: It's a temporary variable used in internal testing -variable "is_gke_sandbox" { - description = "Temporary variable to identify the GKE sandbox environment" - default = false - type = bool +variable "cluster_reference_type" { + description = "How the google_container_node_pool.system_node_pools refers to the cluster. Possible values are: {SELF_LINK, NAME}" + default = "SELF_LINK" + type = string +} + +variable "cluster_availability_type" { + description = "Type of cluster availability. Possible values are: {REGIONAL, MULTI_ZONAL}" + default = "REGIONAL" + type = string } -# REMOVE_ME: It's a temporary variable used in internal testing variable "zone" { - description = "Zone" + description = "Zone for a zonal cluster" default = null type = string } diff --git a/modules/scheduler/pre-existing-gke-cluster/main.tf b/modules/scheduler/pre-existing-gke-cluster/main.tf index d8d3171361..926d2be100 100644 --- a/modules/scheduler/pre-existing-gke-cluster/main.tf +++ b/modules/scheduler/pre-existing-gke-cluster/main.tf @@ -61,7 +61,7 @@ locals { } module "kubectl_apply" { - source = "../../management/kubectl-apply" # can point to github + source = "../../management/kubectl-apply" cluster_id = data.google_container_cluster.existing_gke_cluster.id project_id = var.project_id From 321e6b94e585d65800f96dbad849a0bc6488d95e Mon Sep 17 00:00:00 2001 From: Atul Rajmane Date: Wed, 16 Oct 2024 12:46:45 +0000 Subject: [PATCH 11/26] Use template_subnetworks to generate output_subnets_gke --- community/modules/network/rdma-vpc/main.tf | 6 +-- modules/scheduler/gke-cluster/README.md | 1 - modules/scheduler/gke-cluster/main.tf | 56 +++++++--------------- modules/scheduler/gke-cluster/variables.tf | 6 --- 4 files changed, 19 insertions(+), 50 deletions(-) diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index 85f2125209..b4532f36f3 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -126,12 +126,10 @@ locals { } ] - # FIX_ME(arajmane): There is a concern about this not working in a shared VPC environment. - # To unblock experimental testing, we decided to go ahead with this. output_subnets_gke = [ - for subnet in module.vpc.subnets : { + for i in range(length(module.vpc.subnets)) : { network = local.network_name - subnetwork = subnet.name + subnetwork = local.template_subnetworks[i].subnet_name subnetwork_project = var.project_id network_ip = "" nic_type = coalesce(var.nic_type, try(regex("IRDMA", local.profile_name), regex("MRDMA", local.profile_name), "RDMA")) diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index f338452460..d391e52cb1 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -172,7 +172,6 @@ limitations under the License. | [pods\_ip\_range\_name](#input\_pods\_ip\_range\_name) | The name of the secondary subnet ip range to use for pods. | `string` | `"pods"` | no | | [prefix\_with\_deployment\_name](#input\_prefix\_with\_deployment\_name) | If true, cluster name will be prefixed by `deployment_name` (ex: -). | `bool` | `true` | no | | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | -| [rdma\_subnetwork\_name\_prefix](#input\_rdma\_subnetwork\_name\_prefix) | Prefix of the RDMA subnetwork names | `string` | `null` | no | | [region](#input\_region) | The region to host the cluster in. | `string` | n/a | yes | | [release\_channel](#input\_release\_channel) | The release channel of this cluster. Accepted values are `UNSPECIFIED`, `RAPID`, `REGULAR` and `STABLE`. | `string` | `"UNSPECIFIED"` | no | | [service\_account](#input\_service\_account) | DEPRECATED: use service\_account\_email and scopes. |
object({
email = string,
scopes = set(string)
})
| `null` | no | diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 2a42cd909a..865fdcf99b 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -36,44 +36,6 @@ locals { # multi networking needs enabled Dataplane v2 derived_enable_dataplane_v2 = coalesce(var.enable_dataplane_v2, local.derived_enable_multi_networking) - - rdma_networks = [for network_info in var.additional_networks : network_info if strcontains(upper(network_info.nic_type), "RDMA")] - non_rdma_networks = [for network_info in var.additional_networks : network_info if !strcontains(upper(network_info.nic_type), "RDMA")] - apply_manifests_rdma_networks = flatten([ - for idx, network_info in local.rdma_networks : [ - { - source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", - template_vars = { - name = "${var.rdma_subnetwork_name_prefix}-${idx}", - network_name = network_info.network - subnetwork_name = "${var.rdma_subnetwork_name_prefix}-${idx}", - device_mode = "RDMA" - } - }, - { - source = "${path.module}/templates/network-object.yaml.tftpl", - template_vars = { name = "${var.rdma_subnetwork_name_prefix}-${idx}" } - } - ] - ]) - - apply_manifests_non_rdma_networks = flatten([ - for idx, network_info in local.non_rdma_networks : [ - { - source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", - template_vars = { - name = network_info.subnetwork - network_name = network_info.network - subnetwork_name = network_info.subnetwork - device_mode = "NetDevice" - } - }, - { - source = "${path.module}/templates/network-object.yaml.tftpl", - template_vars = { name = network_info.subnetwork } - } - ] - ]) } data "google_compute_default_service_account" "default_sa" { @@ -388,5 +350,21 @@ module "kubectl_apply" { cluster_id = google_container_cluster.gke_cluster.id project_id = var.project_id - apply_manifests = concat(local.apply_manifests_non_rdma_networks, local.apply_manifests_rdma_networks) + apply_manifests = flatten([ + for idx, network_info in var.additional_networks : [ + { + source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", + template_vars = { + name = network_info.subnetwork, + network_name = network_info.network + subnetwork_name = network_info.subnetwork, + device_mode = strcontains(upper(network_info.nic_type), "RDMA") ? "RDMA" : "NetDevice" + } + }, + { + source = "${path.module}/templates/network-object.yaml.tftpl", + template_vars = { name = network_info.subnetwork } + } + ] + ]) } diff --git a/modules/scheduler/gke-cluster/variables.tf b/modules/scheduler/gke-cluster/variables.tf index bbaf07bd4a..02e29db3fd 100644 --- a/modules/scheduler/gke-cluster/variables.tf +++ b/modules/scheduler/gke-cluster/variables.tf @@ -328,12 +328,6 @@ variable "additional_networks" { })) } -variable "rdma_subnetwork_name_prefix" { - description = "Prefix of the RDMA subnetwork names" - default = null - type = string -} - variable "cluster_reference_type" { description = "How the google_container_node_pool.system_node_pools refers to the cluster. Possible values are: {SELF_LINK, NAME}" default = "SELF_LINK" From 52ef9479cb0e77c250eba0d8ccf45d25704aeb80 Mon Sep 17 00:00:00 2001 From: annuay Date: Mon, 28 Oct 2024 09:35:40 +0000 Subject: [PATCH 12/26] Support NCCL and add blueprint --- .../htcondor-execute-point/gpu_definition.tf | 1 + .../gpu_definition.tf | 1 + .../gpu_definition.tf | 1 + .../gpu_definition.tf | 1 + .../gpu_definition.tf | 1 + .../gpu_definition.tf | 1 + .../gpu_definition.tf | 1 + .../gpu_definition.tf | 1 + examples/gke-a3-ultragpu.yaml | 207 ++++++++++++++++++ .../compute/gke-node-pool/gpu_definition.tf | 1 + modules/compute/vm-instance/gpu_definition.tf | 1 + 11 files changed, 217 insertions(+) create mode 100644 examples/gke-a3-ultragpu.yaml diff --git a/community/modules/compute/htcondor-execute-point/gpu_definition.tf b/community/modules/compute/htcondor-execute-point/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/community/modules/compute/htcondor-execute-point/gpu_definition.tf +++ b/community/modules/compute/htcondor-execute-point/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/examples/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu.yaml new file mode 100644 index 0000000000..59285836c3 --- /dev/null +++ b/examples/gke-a3-ultragpu.yaml @@ -0,0 +1,207 @@ +# Copyright 2024 "Google LLC" +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +blueprint_name: a3ultra-staging-gke + +vars: + project_id: hpc-toolkit-dev-staging + deployment_name: a3ultra-staging-gke + region: us-east5 + zone: us-east5-staginga + # Cidr block containing the IP of the machine calling terraform. + # The following line must be updated for this example to work. + + # For staging purposes authorized_cidr has been left completely open + # The value can be more specific if the IPs are known which will run kubectl + # e.g. the local system running Terraform or a remote node + authorized_cidr: 0.0.0.0/0 + nccl_installer_path: /home/user/nccl_installer.yaml + +validators: +- validator: test_zone_exists + inputs: {} + skip: true +- validator: test_zone_in_region + inputs: {} + skip: true +- validator: test_region_exists + inputs: {} + skip: true + +terraform_providers: + google: + source: hashicorp/google + version: 5.38.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + container_custom_endpoint: "https://hpc-toolkit-rdma-sandbox-1-test-container.sandbox.googleapis.com/" + compute_custom_endpoint: "https://www.googleapis.com/compute/staging_v1/" + + google-beta: + source: hashicorp/google-beta + version: 5.38.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + container_custom_endpoint: "https://hpc-toolkit-rdma-sandbox-1-test-container.sandbox.googleapis.com/" + compute_custom_endpoint: "https://www.googleapis.com/compute/staging_v1/" + + google-private: + source: hashicorp/google-private + version: 0.0.1962 # This version should not change - google-private is inherently brittle + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + container_custom_endpoint: "https://hpc-toolkit-rdma-sandbox-1-test-container.sandbox.googleapis.com/" + compute_custom_endpoint: "https://www.googleapis.com/compute/staging_alpha/" + +deployment_groups: +- group: primary + modules: + - id: a3ultra-gke-net-0 + source: modules/network/vpc + settings: + network_name: a3ultra-gke-net-0 + allowed_ssh_ip_ranges: [0.0.0.0/0] + subnetworks: + - subnet_name: a3ultra-gke-sub-0 + subnet_region: $(vars.region) + subnet_ip: 192.168.0.0/18 + secondary_ranges: + a3ultra-gke-sub-0: + - range_name: pods + ip_cidr_range: 10.4.0.0/14 + - range_name: services + ip_cidr_range: 10.0.32.0/20 + firewall_rules: + - name: a3ultra-gke-internal-0 + ranges: [192.168.0.0/16] + allow: + - protocol: tcp + ports: ["0-65535"] + - protocol: udp + ports: ["0-65535"] + - protocol: icmp + + - id: a3ultra-gke-net-1 + source: modules/network/vpc + settings: + network_name: a3ultra-gke-net-1 + subnetworks: + - subnet_name: a3ultra-gke-sub-1 + subnet_region: $(vars.region) + subnet_ip: 192.168.64.0/18 + firewall_rules: + - name: a3ultra-gke-internal-1 + ranges: [192.168.0.0/16] + allow: + - protocol: tcp + ports: ["0-65535"] + - protocol: udp + ports: ["0-65535"] + - protocol: icmp + + - id: a3ultra-gke-rdma-net + source: community/modules/network/rdma-vpc + settings: + network_name: a3ultra-gke-rdma-net + network_profile: https://www.googleapis.com/compute/staging_alpha/projects/$(vars.project_id)/global/networkProfiles/titanium-mrdma + network_routing_mode: REGIONAL + subnetworks_template: + name_prefix: a3ultra-gke-mrdma-sub + count: 8 + ip_range: 192.168.128.0/18 + region: $(vars.region) + + - id: a3-ultragpu-cluster + source: modules/scheduler/gke-cluster + use: [a3ultra-gke-net-0] + settings: + cluster_availability_type: MULTI_ZONAL + cluster_reference_type: NAME + min_master_version: 1.30.4-gke.1348000 + enable_private_endpoint: false # Allows for access from authorized public IPs + zone: $(vars.zone) # Temporarily used to target the GKE sandbox + master_authorized_networks: + - cidr_block: $(vars.authorized_cidr) # Allows your machine run kubectl command. It's required for the multi-network setup. + display_name: "kubectl-access-network" + additional_networks: + $(concat( + [{ + network=a3ultra-gke-net-1.network_name, + subnetwork=a3ultra-gke-net-1.subnetwork_name, + subnetwork_project=vars.project_id, + nic_type="GVNIC", + queue_count=null, + network_ip=null, + stack_type=null, + access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], + ipv6_access_config=[], + alias_ip_range=[] + }], + a3ultra-gke-rdma-net.subnetwork_interfaces_gke + )) + outputs: [instructions] + + - id: nccl_installer + source: modules/management/kubectl-apply + use: [a3-ultragpu-cluster] + settings: + apply_manifests: + - source: $(vars.nccl_installer_path) + + - id: a3-ultragpu-pool + source: modules/compute/gke-node-pool + use: [a3-ultragpu-cluster] + settings: + node_version: 1.30.4-gke.1348000 # Temporarily used to target the GKE sandbox + machine_type: a3-ultragpu-8g + zones: [$(vars.zone)] + disk_type: hyperdisk-balanced + guest_accelerator: + - type: nvidia-h200-141gb + count: 8 + gpu_driver_installation_config: + - gpu_driver_version: "LATEST" + additional_networks: + $(concat( + [{ + network=a3ultra-gke-net-1.network_name, + subnetwork=a3ultra-gke-net-1.subnetwork_name, + subnetwork_project=vars.project_id, + nic_type="GVNIC", + queue_count=null, + network_ip=null, + stack_type=null, + access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], + ipv6_access_config=[], + alias_ip_range=[] + }], + a3ultra-gke-rdma-net.subnetwork_interfaces_gke + )) + outputs: [instructions] + + - id: job_template + source: modules/compute/gke-job-template + use: [a3-ultragpu-pool] + settings: + image: nvidia/cuda:11.0.3-runtime-ubuntu20.04 + command: + - nvidia-smi + node_count: 2 + outputs: [instructions] diff --git a/modules/compute/gke-node-pool/gpu_definition.tf b/modules/compute/gke-node-pool/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/modules/compute/gke-node-pool/gpu_definition.tf +++ b/modules/compute/gke-node-pool/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, diff --git a/modules/compute/vm-instance/gpu_definition.tf b/modules/compute/vm-instance/gpu_definition.tf index 6c5d96d286..1c84a92721 100644 --- a/modules/compute/vm-instance/gpu_definition.tf +++ b/modules/compute/vm-instance/gpu_definition.tf @@ -38,6 +38,7 @@ locals { "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, + "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, "g2-standard-4" = { type = "nvidia-l4", count = 1 }, "g2-standard-8" = { type = "nvidia-l4", count = 1 }, "g2-standard-12" = { type = "nvidia-l4", count = 1 }, From 9c860116be6280fcf8f8b436093f0e3ee133a635 Mon Sep 17 00:00:00 2001 From: annuay Date: Mon, 28 Oct 2024 10:05:40 +0000 Subject: [PATCH 13/26] Delete blueprint --- examples/gke-a3-ultragpu.yaml | 207 ---------------------------------- 1 file changed, 207 deletions(-) delete mode 100644 examples/gke-a3-ultragpu.yaml diff --git a/examples/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu.yaml deleted file mode 100644 index 59285836c3..0000000000 --- a/examples/gke-a3-ultragpu.yaml +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright 2024 "Google LLC" -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -blueprint_name: a3ultra-staging-gke - -vars: - project_id: hpc-toolkit-dev-staging - deployment_name: a3ultra-staging-gke - region: us-east5 - zone: us-east5-staginga - # Cidr block containing the IP of the machine calling terraform. - # The following line must be updated for this example to work. - - # For staging purposes authorized_cidr has been left completely open - # The value can be more specific if the IPs are known which will run kubectl - # e.g. the local system running Terraform or a remote node - authorized_cidr: 0.0.0.0/0 - nccl_installer_path: /home/user/nccl_installer.yaml - -validators: -- validator: test_zone_exists - inputs: {} - skip: true -- validator: test_zone_in_region - inputs: {} - skip: true -- validator: test_region_exists - inputs: {} - skip: true - -terraform_providers: - google: - source: hashicorp/google - version: 5.38.0 - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) - container_custom_endpoint: "https://hpc-toolkit-rdma-sandbox-1-test-container.sandbox.googleapis.com/" - compute_custom_endpoint: "https://www.googleapis.com/compute/staging_v1/" - - google-beta: - source: hashicorp/google-beta - version: 5.38.0 - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) - container_custom_endpoint: "https://hpc-toolkit-rdma-sandbox-1-test-container.sandbox.googleapis.com/" - compute_custom_endpoint: "https://www.googleapis.com/compute/staging_v1/" - - google-private: - source: hashicorp/google-private - version: 0.0.1962 # This version should not change - google-private is inherently brittle - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) - container_custom_endpoint: "https://hpc-toolkit-rdma-sandbox-1-test-container.sandbox.googleapis.com/" - compute_custom_endpoint: "https://www.googleapis.com/compute/staging_alpha/" - -deployment_groups: -- group: primary - modules: - - id: a3ultra-gke-net-0 - source: modules/network/vpc - settings: - network_name: a3ultra-gke-net-0 - allowed_ssh_ip_ranges: [0.0.0.0/0] - subnetworks: - - subnet_name: a3ultra-gke-sub-0 - subnet_region: $(vars.region) - subnet_ip: 192.168.0.0/18 - secondary_ranges: - a3ultra-gke-sub-0: - - range_name: pods - ip_cidr_range: 10.4.0.0/14 - - range_name: services - ip_cidr_range: 10.0.32.0/20 - firewall_rules: - - name: a3ultra-gke-internal-0 - ranges: [192.168.0.0/16] - allow: - - protocol: tcp - ports: ["0-65535"] - - protocol: udp - ports: ["0-65535"] - - protocol: icmp - - - id: a3ultra-gke-net-1 - source: modules/network/vpc - settings: - network_name: a3ultra-gke-net-1 - subnetworks: - - subnet_name: a3ultra-gke-sub-1 - subnet_region: $(vars.region) - subnet_ip: 192.168.64.0/18 - firewall_rules: - - name: a3ultra-gke-internal-1 - ranges: [192.168.0.0/16] - allow: - - protocol: tcp - ports: ["0-65535"] - - protocol: udp - ports: ["0-65535"] - - protocol: icmp - - - id: a3ultra-gke-rdma-net - source: community/modules/network/rdma-vpc - settings: - network_name: a3ultra-gke-rdma-net - network_profile: https://www.googleapis.com/compute/staging_alpha/projects/$(vars.project_id)/global/networkProfiles/titanium-mrdma - network_routing_mode: REGIONAL - subnetworks_template: - name_prefix: a3ultra-gke-mrdma-sub - count: 8 - ip_range: 192.168.128.0/18 - region: $(vars.region) - - - id: a3-ultragpu-cluster - source: modules/scheduler/gke-cluster - use: [a3ultra-gke-net-0] - settings: - cluster_availability_type: MULTI_ZONAL - cluster_reference_type: NAME - min_master_version: 1.30.4-gke.1348000 - enable_private_endpoint: false # Allows for access from authorized public IPs - zone: $(vars.zone) # Temporarily used to target the GKE sandbox - master_authorized_networks: - - cidr_block: $(vars.authorized_cidr) # Allows your machine run kubectl command. It's required for the multi-network setup. - display_name: "kubectl-access-network" - additional_networks: - $(concat( - [{ - network=a3ultra-gke-net-1.network_name, - subnetwork=a3ultra-gke-net-1.subnetwork_name, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip=null, - stack_type=null, - access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], - ipv6_access_config=[], - alias_ip_range=[] - }], - a3ultra-gke-rdma-net.subnetwork_interfaces_gke - )) - outputs: [instructions] - - - id: nccl_installer - source: modules/management/kubectl-apply - use: [a3-ultragpu-cluster] - settings: - apply_manifests: - - source: $(vars.nccl_installer_path) - - - id: a3-ultragpu-pool - source: modules/compute/gke-node-pool - use: [a3-ultragpu-cluster] - settings: - node_version: 1.30.4-gke.1348000 # Temporarily used to target the GKE sandbox - machine_type: a3-ultragpu-8g - zones: [$(vars.zone)] - disk_type: hyperdisk-balanced - guest_accelerator: - - type: nvidia-h200-141gb - count: 8 - gpu_driver_installation_config: - - gpu_driver_version: "LATEST" - additional_networks: - $(concat( - [{ - network=a3ultra-gke-net-1.network_name, - subnetwork=a3ultra-gke-net-1.subnetwork_name, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip=null, - stack_type=null, - access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], - ipv6_access_config=[], - alias_ip_range=[] - }], - a3ultra-gke-rdma-net.subnetwork_interfaces_gke - )) - outputs: [instructions] - - - id: job_template - source: modules/compute/gke-job-template - use: [a3-ultragpu-pool] - settings: - image: nvidia/cuda:11.0.3-runtime-ubuntu20.04 - command: - - nvidia-smi - node_count: 2 - outputs: [instructions] From 7ea3677da2b679a565e90c6ce4b7f3d50fcfed57 Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Fri, 1 Nov 2024 14:12:21 +0000 Subject: [PATCH 14/26] Updating rdma-vpc to require users to enter a nic type to be associated with a VPC --- community/modules/network/rdma-vpc/README.md | 2 +- community/modules/network/rdma-vpc/main.tf | 2 +- community/modules/network/rdma-vpc/variables.tf | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/community/modules/network/rdma-vpc/README.md b/community/modules/network/rdma-vpc/README.md index d9ff0993f2..a534b362e0 100644 --- a/community/modules/network/rdma-vpc/README.md +++ b/community/modules/network/rdma-vpc/README.md @@ -61,7 +61,7 @@ No resources. | [network\_name](#input\_network\_name) | The name of the network to be created (if unsupplied, will default to "{deployment\_name}-net") | `string` | `null` | no | | [network\_profile](#input\_network\_profile) | Profile name for VPC configuration | `string` | `null` | no | | [network\_routing\_mode](#input\_network\_routing\_mode) | The network routing mode (default "GLOBAL") | `string` | `"GLOBAL"` | no | -| [nic\_type](#input\_nic\_type) | NIC type for use in modules that use the output | `string` | `null` | no | +| [nic\_type](#input\_nic\_type) | NIC type for use in modules that use the output | `string` | `"MRDMA"` | no | | [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created | `string` | n/a | yes | | [region](#input\_region) | The default region for Cloud resources | `string` | n/a | yes | | [secondary\_ranges](#input\_secondary\_ranges) | Secondary ranges that will be used in some of the subnets. Please see https://goo.gle/hpc-toolkit-vpc-deprecation for migration instructions. | `map(list(object({ range_name = string, ip_cidr_range = string })))` | `{}` | no | diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index b4532f36f3..cf4d76fb73 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -117,7 +117,7 @@ locals { subnetwork = subnet.self_link subnetwork_project = null # will populate from subnetwork_self_link network_ip = "" - nic_type = coalesce(var.nic_type, try(regex("IRDMA", local.profile_name), regex("MRDMA", local.profile_name), "RDMA")) + nic_type = var.nic_type stack_type = null queue_count = null access_config = [] diff --git a/community/modules/network/rdma-vpc/variables.tf b/community/modules/network/rdma-vpc/variables.tf index 642f822131..a5ba44d455 100644 --- a/community/modules/network/rdma-vpc/variables.tf +++ b/community/modules/network/rdma-vpc/variables.tf @@ -178,5 +178,6 @@ variable "network_profile" { variable "nic_type" { description = "NIC type for use in modules that use the output" type = string - default = null + nullable = false + default = "MRDMA" } From 1d982d2139e0443ba6b486d0dffc35b07d71ad09 Mon Sep 17 00:00:00 2001 From: Atul Rajmane Date: Tue, 5 Nov 2024 10:38:30 +0000 Subject: [PATCH 15/26] Support Extended Reservations --- modules/compute/gke-node-pool/main.tf | 2 +- .../gke-node-pool/reservation_definitions.tf | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 4d8d8d1536..1e4200b0bf 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -161,7 +161,7 @@ resource "google_container_node_pool" "node_pool" { reservation_affinity { consume_reservation_type = var.reservation_affinity.consume_reservation_type key = length(local.verified_specific_reservations) != 1 ? null : local.reservation_resource_api_label - values = length(local.verified_specific_reservations) != 1 ? null : [for r in local.verified_specific_reservations : "projects/${r.project}/reservations/${r.name}"] + values = length(local.verified_specific_reservations) != 1 ? null : [for i, r in local.verified_specific_reservations : "projects/${r.project}/reservations/${format("%s%s", r.name, local.input_reservation_suffixes[i])}"] } dynamic "host_maintenance_policy" { diff --git a/modules/compute/gke-node-pool/reservation_definitions.tf b/modules/compute/gke-node-pool/reservation_definitions.tf index d40cc5b01f..4a8530f975 100644 --- a/modules/compute/gke-node-pool/reservation_definitions.tf +++ b/modules/compute/gke-node-pool/reservation_definitions.tf @@ -14,6 +14,16 @@ * limitations under the License. */ +# Split the input into three different lists where the details of a given reservation are at the same index across these lists. +locals { + # Specific block of an extended reservation can be targeted with exr-one/reservationBlocks/exr-one-block-1 + # Data source needs to be queried with the reservation name only. So, we extract the reservation name + input_reservation_names = [for r in try(var.reservation_affinity.specific_reservations, []) : split("/", r.name)[0]] + input_reservation_projects = [for r in try(var.reservation_affinity.specific_reservations, []) : coalesce(r.project, var.project_id)] + # We, also, remember the suffix "/reservationBlocks/exr-one-block-1" for use elsewhere afterwards + input_reservation_suffixes = [for r in try(var.reservation_affinity.specific_reservations, []) : substr(r.name, length(split("/", r.name)[0]), -1)] +} + data "google_compute_reservation" "specific_reservations" { for_each = ( local.input_specific_reservations_count == 0 ? @@ -21,11 +31,11 @@ data "google_compute_reservation" "specific_reservations" { { for pair in flatten([ for zone in try(var.zones, []) : [ - for reservation in try(var.reservation_affinity.specific_reservations, []) : { - key : "${coalesce(reservation.project, var.project_id)}/${zone}/${reservation.name}" + for i, reservation_name in try(local.input_reservation_names, []) : { + key : "${local.input_reservation_projects[i]}/${zone}/${reservation_name}" zone : zone - reservation_name : reservation.name - project : reservation.project == null ? var.project_id : reservation.project + reservation_name : reservation_name + project : local.input_reservation_projects[i] } ] ]) : From b6ab698f1bd1859228a07b9b3294040dd9287058 Mon Sep 17 00:00:00 2001 From: annuay Date: Fri, 8 Nov 2024 10:35:01 +0000 Subject: [PATCH 16/26] delete terraform dir --- examples/gke-a3-ultragpu.yaml | 193 ++++++++++++++++++ .../compute/gke-node-pool/disk_definitions.tf | 5 +- 2 files changed, 196 insertions(+), 2 deletions(-) create mode 100644 examples/gke-a3-ultragpu.yaml diff --git a/examples/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu.yaml new file mode 100644 index 0000000000..59fd40b32c --- /dev/null +++ b/examples/gke-a3-ultragpu.yaml @@ -0,0 +1,193 @@ +# Copyright 2024 "Google LLC" +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +blueprint_name: a3ultra-staging-gke + +vars: + project_id: gsc-demo-440819 + deployment_name: a3ultra-gke + region: europe-west1 + zone: europe-west1-b + # Cidr block containing the IP of the machine calling terraform. + # The following line must be updated for this example to work. + + # For staging purposes authorized_cidr has been left completely open + # The value can be more specific if the IPs are known which will run kubectl + # e.g. the local system running Terraform or a remote node + authorized_cidr: 0.0.0.0/0 + nccl_installer_path: /home/user/nccl_installer.yaml + +terraform_providers: + google: + source: hashicorp/google + version: 5.38.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + container_custom_endpoint: "https://gcp-prod-gke-staging-test-container.sandbox.googleapis.com/" + + google-beta: + source: hashicorp/google-beta + version: 5.38.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + container_custom_endpoint: "https://gcp-prod-gke-staging-test-container.sandbox.googleapis.com/" + + google-private: + source: hashicorp/google-private + version: 0.0.1962 # This version should not change - google-private is inherently brittle + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + container_custom_endpoint: "https://gcp-prod-gke-staging-test-container.sandbox.googleapis.com/" + +deployment_groups: +- group: primary + modules: + - id: a3ultra-gke-net-0 + source: modules/network/vpc + settings: + network_name: a3ultra-gke-net-0 + allowed_ssh_ip_ranges: [0.0.0.0/0] + subnetworks: + - subnet_name: a3ultra-gke-sub-0 + subnet_region: $(vars.region) + subnet_ip: 192.168.0.0/18 + secondary_ranges: + a3ultra-gke-sub-0: + - range_name: pods + ip_cidr_range: 10.4.0.0/14 + - range_name: services + ip_cidr_range: 10.0.32.0/20 + firewall_rules: + - name: a3ultra-gke-internal-0 + ranges: [192.168.0.0/16] + allow: + - protocol: tcp + ports: ["0-65535"] + - protocol: udp + ports: ["0-65535"] + - protocol: icmp + + - id: a3ultra-gke-net-1 + source: modules/network/vpc + settings: + network_name: a3ultra-gke-net-1 + subnetworks: + - subnet_name: a3ultra-gke-sub-1 + subnet_region: $(vars.region) + subnet_ip: 192.168.64.0/18 + firewall_rules: + - name: a3ultra-gke-internal-1 + ranges: [192.168.0.0/16] + allow: + - protocol: tcp + ports: ["0-65535"] + - protocol: udp + ports: ["0-65535"] + - protocol: icmp + + - id: a3ultra-gke-rdma-net + source: community/modules/network/rdma-vpc + settings: + network_name: a3ultra-gke-rdma-net + network_profile: https://www.googleapis.com/compute/alpha/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-roce + network_routing_mode: REGIONAL + subnetworks_template: + name_prefix: a3ultra-gke-mrdma-sub + count: 8 + ip_range: 192.168.128.0/18 + region: $(vars.region) + + - id: a3-ultragpu-cluster + source: modules/scheduler/gke-cluster + use: [a3ultra-gke-net-0] + settings: + cluster_availability_type: MULTI_ZONAL + cluster_reference_type: NAME + min_master_version: 1.30.4-gke.1348000 + enable_private_endpoint: false # Allows for access from authorized public IPs + zone: $(vars.zone) # Temporarily used to target the GKE sandbox + master_authorized_networks: + - cidr_block: $(vars.authorized_cidr) # Allows your machine run kubectl command. It's required for the multi-network setup. + display_name: "kubectl-access-network" + additional_networks: + $(concat( + [{ + network=a3ultra-gke-net-1.network_name, + subnetwork=a3ultra-gke-net-1.subnetwork_name, + subnetwork_project=vars.project_id, + nic_type="GVNIC", + queue_count=null, + network_ip=null, + stack_type=null, + access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], + ipv6_access_config=[], + alias_ip_range=[] + }], + a3ultra-gke-rdma-net.subnetwork_interfaces_gke + )) + outputs: [instructions] + + - id: nccl_installer + source: modules/management/kubectl-apply + use: [a3-ultragpu-cluster] + settings: + apply_manifests: + - source: $(vars.nccl_installer_path) + + - id: a3-ultragpu-pool + source: modules/compute/gke-node-pool + use: [a3-ultragpu-cluster] + settings: + node_version: 1.30.4-gke.1348000 # Temporarily used to target the GKE sandbox + machine_type: a3-ultragpu-8g + zones: [$(vars.zone)] + disk_type: hyperdisk-balanced + guest_accelerator: + - type: nvidia-h200-141gb + count: 8 + gpu_driver_installation_config: + - gpu_driver_version: "LATEST" + additional_networks: + $(concat( + [{ + network=a3ultra-gke-net-1.network_name, + subnetwork=a3ultra-gke-net-1.subnetwork_name, + subnetwork_project=vars.project_id, + nic_type="GVNIC", + queue_count=null, + network_ip=null, + stack_type=null, + access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], + ipv6_access_config=[], + alias_ip_range=[] + }], + a3ultra-gke-rdma-net.subnetwork_interfaces_gke + )) + outputs: [instructions] + + - id: job_template + source: modules/compute/gke-job-template + use: [a3-ultragpu-pool] + settings: + image: nvidia/cuda:11.0.3-runtime-ubuntu20.04 + command: + - nvidia-smi + node_count: 2 + outputs: [instructions] diff --git a/modules/compute/gke-node-pool/disk_definitions.tf b/modules/compute/gke-node-pool/disk_definitions.tf index f7dbebea0a..3d250ef768 100644 --- a/modules/compute/gke-node-pool/disk_definitions.tf +++ b/modules/compute/gke-node-pool/disk_definitions.tf @@ -22,8 +22,9 @@ locals { local_ssd_machines = { - "a3-highgpu-8g" = { local_ssd_count_ephemeral_storage = 16, local_ssd_count_nvme_block = null }, - "a3-megagpu-8g" = { local_ssd_count_ephemeral_storage = 16, local_ssd_count_nvme_block = null }, + "a3-highgpu-8g" = { local_ssd_count_ephemeral_storage = 16, local_ssd_count_nvme_block = null }, + "a3-megagpu-8g" = { local_ssd_count_ephemeral_storage = 16, local_ssd_count_nvme_block = null }, + "a3-ultragpu-8g" = { local_ssd_count_ephemeral_storage = 32, local_ssd_count_nvme_block = null }, } generated_local_ssd_config = lookup(local.local_ssd_machines, var.machine_type, { local_ssd_count_ephemeral_storage = null, local_ssd_count_nvme_block = null }) From 71b4b9e0d3652fa9337eaebb62431af6711a4885 Mon Sep 17 00:00:00 2001 From: annuay Date: Fri, 8 Nov 2024 10:36:19 +0000 Subject: [PATCH 17/26] add ssd config for a3u --- examples/gke-a3-ultragpu.yaml | 193 ---------------------------------- 1 file changed, 193 deletions(-) delete mode 100644 examples/gke-a3-ultragpu.yaml diff --git a/examples/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu.yaml deleted file mode 100644 index 59fd40b32c..0000000000 --- a/examples/gke-a3-ultragpu.yaml +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright 2024 "Google LLC" -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -blueprint_name: a3ultra-staging-gke - -vars: - project_id: gsc-demo-440819 - deployment_name: a3ultra-gke - region: europe-west1 - zone: europe-west1-b - # Cidr block containing the IP of the machine calling terraform. - # The following line must be updated for this example to work. - - # For staging purposes authorized_cidr has been left completely open - # The value can be more specific if the IPs are known which will run kubectl - # e.g. the local system running Terraform or a remote node - authorized_cidr: 0.0.0.0/0 - nccl_installer_path: /home/user/nccl_installer.yaml - -terraform_providers: - google: - source: hashicorp/google - version: 5.38.0 - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) - container_custom_endpoint: "https://gcp-prod-gke-staging-test-container.sandbox.googleapis.com/" - - google-beta: - source: hashicorp/google-beta - version: 5.38.0 - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) - container_custom_endpoint: "https://gcp-prod-gke-staging-test-container.sandbox.googleapis.com/" - - google-private: - source: hashicorp/google-private - version: 0.0.1962 # This version should not change - google-private is inherently brittle - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) - container_custom_endpoint: "https://gcp-prod-gke-staging-test-container.sandbox.googleapis.com/" - -deployment_groups: -- group: primary - modules: - - id: a3ultra-gke-net-0 - source: modules/network/vpc - settings: - network_name: a3ultra-gke-net-0 - allowed_ssh_ip_ranges: [0.0.0.0/0] - subnetworks: - - subnet_name: a3ultra-gke-sub-0 - subnet_region: $(vars.region) - subnet_ip: 192.168.0.0/18 - secondary_ranges: - a3ultra-gke-sub-0: - - range_name: pods - ip_cidr_range: 10.4.0.0/14 - - range_name: services - ip_cidr_range: 10.0.32.0/20 - firewall_rules: - - name: a3ultra-gke-internal-0 - ranges: [192.168.0.0/16] - allow: - - protocol: tcp - ports: ["0-65535"] - - protocol: udp - ports: ["0-65535"] - - protocol: icmp - - - id: a3ultra-gke-net-1 - source: modules/network/vpc - settings: - network_name: a3ultra-gke-net-1 - subnetworks: - - subnet_name: a3ultra-gke-sub-1 - subnet_region: $(vars.region) - subnet_ip: 192.168.64.0/18 - firewall_rules: - - name: a3ultra-gke-internal-1 - ranges: [192.168.0.0/16] - allow: - - protocol: tcp - ports: ["0-65535"] - - protocol: udp - ports: ["0-65535"] - - protocol: icmp - - - id: a3ultra-gke-rdma-net - source: community/modules/network/rdma-vpc - settings: - network_name: a3ultra-gke-rdma-net - network_profile: https://www.googleapis.com/compute/alpha/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-roce - network_routing_mode: REGIONAL - subnetworks_template: - name_prefix: a3ultra-gke-mrdma-sub - count: 8 - ip_range: 192.168.128.0/18 - region: $(vars.region) - - - id: a3-ultragpu-cluster - source: modules/scheduler/gke-cluster - use: [a3ultra-gke-net-0] - settings: - cluster_availability_type: MULTI_ZONAL - cluster_reference_type: NAME - min_master_version: 1.30.4-gke.1348000 - enable_private_endpoint: false # Allows for access from authorized public IPs - zone: $(vars.zone) # Temporarily used to target the GKE sandbox - master_authorized_networks: - - cidr_block: $(vars.authorized_cidr) # Allows your machine run kubectl command. It's required for the multi-network setup. - display_name: "kubectl-access-network" - additional_networks: - $(concat( - [{ - network=a3ultra-gke-net-1.network_name, - subnetwork=a3ultra-gke-net-1.subnetwork_name, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip=null, - stack_type=null, - access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], - ipv6_access_config=[], - alias_ip_range=[] - }], - a3ultra-gke-rdma-net.subnetwork_interfaces_gke - )) - outputs: [instructions] - - - id: nccl_installer - source: modules/management/kubectl-apply - use: [a3-ultragpu-cluster] - settings: - apply_manifests: - - source: $(vars.nccl_installer_path) - - - id: a3-ultragpu-pool - source: modules/compute/gke-node-pool - use: [a3-ultragpu-cluster] - settings: - node_version: 1.30.4-gke.1348000 # Temporarily used to target the GKE sandbox - machine_type: a3-ultragpu-8g - zones: [$(vars.zone)] - disk_type: hyperdisk-balanced - guest_accelerator: - - type: nvidia-h200-141gb - count: 8 - gpu_driver_installation_config: - - gpu_driver_version: "LATEST" - additional_networks: - $(concat( - [{ - network=a3ultra-gke-net-1.network_name, - subnetwork=a3ultra-gke-net-1.subnetwork_name, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip=null, - stack_type=null, - access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], - ipv6_access_config=[], - alias_ip_range=[] - }], - a3ultra-gke-rdma-net.subnetwork_interfaces_gke - )) - outputs: [instructions] - - - id: job_template - source: modules/compute/gke-job-template - use: [a3-ultragpu-pool] - settings: - image: nvidia/cuda:11.0.3-runtime-ubuntu20.04 - command: - - nvidia-smi - node_count: 2 - outputs: [instructions] From a7830eb693710df30985fbd0859b16bce968237f Mon Sep 17 00:00:00 2001 From: Atul Rajmane Date: Mon, 11 Nov 2024 12:10:58 +0000 Subject: [PATCH 18/26] GKE doesn't support shared extended reservations yet --- modules/compute/gke-node-pool/main.tf | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 1e4200b0bf..0eb360cd1c 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -161,7 +161,10 @@ resource "google_container_node_pool" "node_pool" { reservation_affinity { consume_reservation_type = var.reservation_affinity.consume_reservation_type key = length(local.verified_specific_reservations) != 1 ? null : local.reservation_resource_api_label - values = length(local.verified_specific_reservations) != 1 ? null : [for i, r in local.verified_specific_reservations : "projects/${r.project}/reservations/${format("%s%s", r.name, local.input_reservation_suffixes[i])}"] + values = length(local.verified_specific_reservations) != 1 ? null : [ + for i, r in local.verified_specific_reservations : + (length(local.input_reservation_suffixes[i]) > 0 ? format("%s%s", r.name, local.input_reservation_suffixes[i]) : "projects/${r.project}/reservations/${r.name}") + ] } dynamic "host_maintenance_policy" { @@ -231,6 +234,14 @@ resource "google_container_node_pool" "node_pool" { 3. Its VM Properties must match with those of the Node Pool; Machine type, Accelerators (GPU Type and count), Local SSD disk type and count EOT } + precondition { + condition = ( + (local.input_specific_reservations_count == 0) || + (local.input_specific_reservations_count == 1 && length(local.input_reservation_suffixes) == 0) || + (local.input_specific_reservations_count == 1 && length(local.input_reservation_suffixes) > 0 && try(local.input_reservation_projects[0], var.project_id) == var.project_id) + ) + error_message = "Shared Extended reservations are not supported by GKE." + } } } From 58aeb27ccbbb437be93183b85710d330c86725a9 Mon Sep 17 00:00:00 2001 From: annuay Date: Tue, 12 Nov 2024 09:37:11 +0000 Subject: [PATCH 19/26] Invert SSD and NVME counts --- modules/compute/gke-node-pool/disk_definitions.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/compute/gke-node-pool/disk_definitions.tf b/modules/compute/gke-node-pool/disk_definitions.tf index 44b7183959..3afefa9354 100644 --- a/modules/compute/gke-node-pool/disk_definitions.tf +++ b/modules/compute/gke-node-pool/disk_definitions.tf @@ -24,7 +24,7 @@ locals { local_ssd_machines = { "a3-highgpu-8g" = { local_ssd_count_ephemeral_storage = null, local_ssd_count_nvme_block = 16 }, "a3-megagpu-8g" = { local_ssd_count_ephemeral_storage = null, local_ssd_count_nvme_block = 16 }, - "a3-ultragpu-8g" = { local_ssd_count_ephemeral_storage = 32, local_ssd_count_nvme_block = null }, + "a3-ultragpu-8g" = { local_ssd_count_ephemeral_storage = null, local_ssd_count_nvme_block = 32 }, } generated_local_ssd_config = lookup(local.local_ssd_machines, var.machine_type, { local_ssd_count_ephemeral_storage = null, local_ssd_count_nvme_block = null }) From cd02ceaf2897448ad6331b229380a0cfc3605ac3 Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Thu, 14 Nov 2024 18:37:13 +0000 Subject: [PATCH 20/26] upgrade kueue default version to v0.9.0 to support TAS --- modules/management/kubectl-apply/README.md | 2 +- .../kubectl-apply/manifests/kueue-v0.9.0.yaml | 13131 ++++++++++++++++ modules/management/kubectl-apply/variables.tf | 6 +- 3 files changed, 13135 insertions(+), 4 deletions(-) create mode 100644 modules/management/kubectl-apply/manifests/kueue-v0.9.0.yaml diff --git a/modules/management/kubectl-apply/README.md b/modules/management/kubectl-apply/README.md index 3e3bebecc0..3b70cd9864 100644 --- a/modules/management/kubectl-apply/README.md +++ b/modules/management/kubectl-apply/README.md @@ -134,7 +134,7 @@ limitations under the License. | [apply\_manifests](#input\_apply\_manifests) | A list of manifests to apply to GKE cluster using kubectl. For more details see [kubectl module's inputs](kubectl/README.md). |
list(object({
content = optional(string, null)
source = optional(string, null)
template_vars = optional(map(any), null)
server_side_apply = optional(bool, false)
wait_for_rollout = optional(bool, true)
}))
| `[]` | no | | [cluster\_id](#input\_cluster\_id) | An identifier for the gke cluster resource with format projects//locations//clusters/. | `string` | n/a | yes | | [jobset](#input\_jobset) | Install [Jobset](https://github.com/kubernetes-sigs/jobset) which manages a group of K8s [jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) as a unit. |
object({
install = optional(bool, false)
version = optional(string, "v0.5.2")
})
| `{}` | no | -| [kueue](#input\_kueue) | Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config\_path to be applied right after kueue installation. If a template file provided, its variables can be set to config\_template\_vars. |
object({
install = optional(bool, false)
version = optional(string, "v0.8.1")
config_path = optional(string, null)
config_template_vars = optional(map(any), null)
})
| `{}` | no | +| [kueue](#input\_kueue) | Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config\_path to be applied right after kueue installation. If a template file provided, its variables can be set to config\_template\_vars. |
object({
install = optional(bool, false)
version = optional(string, "v0.9.0")
config_path = optional(string, null)
config_template_vars = optional(map(any), null)
})
| `{}` | no | | [project\_id](#input\_project\_id) | The project ID that hosts the gke cluster. | `string` | n/a | yes | ## Outputs diff --git a/modules/management/kubectl-apply/manifests/kueue-v0.9.0.yaml b/modules/management/kubectl-apply/manifests/kueue-v0.9.0.yaml new file mode 100644 index 0000000000..67166b222e --- /dev/null +++ b/modules/management/kubectl-apply/manifests/kueue-v0.9.0.yaml @@ -0,0 +1,13131 @@ +# Copyright 2024 "Google LLC" +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-system +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: admissionchecks.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: AdmissionCheck + listKind: AdmissionCheckList + plural: admissionchecks + singular: admissioncheck + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: AdmissionCheck is the Schema for the admissionchecks API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: AdmissionCheckSpec defines the desired state of AdmissionCheck + properties: + controllerName: + description: |- + controllerName identifies the controller that processes the AdmissionCheck, + not necessarily a Kubernetes Pod or Deployment name. Cannot be empty. + type: string + x-kubernetes-validations: + - message: field is immutable + rule: self == oldSelf + parameters: + description: |- + Parameters identifies a configuration with additional parameters for the + check. + properties: + apiGroup: + description: ApiGroup is the group for the resource being referenced. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + description: Kind is the type of the resource being referenced. + maxLength: 63 + pattern: ^(?i)[a-z]([-a-z0-9]*[a-z0-9])?$ + type: string + name: + description: Name is the name of the resource being referenced. + maxLength: 63 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - apiGroup + - kind + - name + type: object + retryDelayMinutes: + default: 15 + description: |- + RetryDelayMinutes specifies how long to keep the workload suspended after + a failed check (after it transitioned to False). When the delay period has passed, the check + state goes to "Unknown". The default is 15 min. + Deprecated: retryDelayMinutes has already been deprecated since v0.8 and will be removed in v1beta2. + format: int64 + type: integer + required: + - controllerName + type: object + status: + description: AdmissionCheckStatus defines the observed state of AdmissionCheck + properties: + conditions: + description: |- + conditions hold the latest available observations of the AdmissionCheck + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: clusterqueues.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: ClusterQueue + listKind: ClusterQueueList + plural: clusterqueues + shortNames: + - cq + singular: clusterqueue + scope: Cluster + versions: + - additionalPrinterColumns: + - description: Cohort that this ClusterQueue belongs to + jsonPath: .spec.cohort + name: Cohort + type: string + - description: The queueing strategy used to prioritize workloads + jsonPath: .spec.queueingStrategy + name: Strategy + priority: 1 + type: string + - description: Number of pending workloads + jsonPath: .status.pendingWorkloads + name: Pending Workloads + type: integer + - description: Number of admitted workloads that haven't finished yet + jsonPath: .status.admittedWorkloads + name: Admitted Workloads + priority: 1 + type: integer + name: v1beta1 + schema: + openAPIV3Schema: + description: ClusterQueue is the Schema for the clusterQueue API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ClusterQueueSpec defines the desired state of ClusterQueue + properties: + admissionChecks: + description: |- + admissionChecks lists the AdmissionChecks required by this ClusterQueue. + Cannot be used along with AdmissionCheckStrategy. + items: + type: string + type: array + admissionChecksStrategy: + description: |- + admissionCheckStrategy defines a list of strategies to determine which ResourceFlavors require AdmissionChecks. + This property cannot be used in conjunction with the 'admissionChecks' property. + properties: + admissionChecks: + description: admissionChecks is a list of strategies for AdmissionChecks + items: + description: AdmissionCheckStrategyRule defines rules for a + single AdmissionCheck + properties: + name: + description: name is an AdmissionCheck's name. + type: string + onFlavors: + description: |- + onFlavors is a list of ResourceFlavors' names that this AdmissionCheck should run for. + If empty, the AdmissionCheck will run for all workloads submitted to the ClusterQueue. + items: + description: ResourceFlavorReference is the name of the + ResourceFlavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + type: array + required: + - name + type: object + type: array + type: object + cohort: + description: |- + cohort that this ClusterQueue belongs to. CQs that belong to the + same cohort can borrow unused resources from each other. + + A CQ can be a member of a single borrowing cohort. A workload submitted + to a queue referencing this CQ can borrow quota from any CQ in the cohort. + Only quota for the [resource, flavor] pairs listed in the CQ can be + borrowed. + If empty, this ClusterQueue cannot borrow from any other ClusterQueue and + vice versa. + + A cohort is a name that links CQs together, but it doesn't reference any + object. + + Validation of a cohort name is equivalent to that of object names: + subdomain in DNS (RFC 1123). + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + fairSharing: + description: |- + fairSharing defines the properties of the ClusterQueue when participating in fair sharing. + The values are only relevant if fair sharing is enabled in the Kueue configuration. + properties: + weight: + anyOf: + - type: integer + - type: string + default: 1 + description: |- + weight gives a comparative advantage to this ClusterQueue when competing for unused + resources in the cohort against other ClusterQueues. + The share of a ClusterQueue is based on the dominant resource usage above nominal + quotas for each resource, divided by the weight. + Admission prioritizes scheduling workloads from ClusterQueues with the lowest share + and preempting workloads from the ClusterQueues with the highest share. + A zero weight implies infinite share value, meaning that this ClusterQueue will always + be at disadvantage against other ClusterQueues. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + flavorFungibility: + default: {} + description: |- + flavorFungibility defines whether a workload should try the next flavor + before borrowing or preempting in the flavor being evaluated. + properties: + whenCanBorrow: + default: Borrow + description: |- + whenCanBorrow determines whether a workload should try the next flavor + before borrowing in current flavor. The possible values are: + + - `Borrow` (default): allocate in current flavor if borrowing + is possible. + - `TryNextFlavor`: try next flavor even if the current + flavor has enough resources to borrow. + enum: + - Borrow + - TryNextFlavor + type: string + whenCanPreempt: + default: TryNextFlavor + description: |- + whenCanPreempt determines whether a workload should try the next flavor + before borrowing in current flavor. The possible values are: + + - `Preempt`: allocate in current flavor if it's possible to preempt some workloads. + - `TryNextFlavor` (default): try next flavor even if there are enough + candidates for preemption in the current flavor. + enum: + - Preempt + - TryNextFlavor + type: string + type: object + namespaceSelector: + description: |- + namespaceSelector defines which namespaces are allowed to submit workloads to + this clusterQueue. Beyond this basic support for policy, a policy agent like + Gatekeeper should be used to enforce more advanced policies. + Defaults to null which is a nothing selector (no namespaces eligible). + If set to an empty selector `{}`, then all namespaces are eligible. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + preemption: + default: {} + description: |- + preemption describes policies to preempt Workloads from this ClusterQueue + or the ClusterQueue's cohort. + + Preemption can happen in two scenarios: + + - When a Workload fits within the nominal quota of the ClusterQueue, but + the quota is currently borrowed by other ClusterQueues in the cohort. + Preempting Workloads in other ClusterQueues allows this ClusterQueue to + reclaim its nominal quota. + - When a Workload doesn't fit within the nominal quota of the ClusterQueue + and there are admitted Workloads in the ClusterQueue with lower priority. + + The preemption algorithm tries to find a minimal set of Workloads to + preempt to accomomdate the pending Workload, preempting Workloads with + lower priority first. + properties: + borrowWithinCohort: + default: {} + description: |- + borrowWithinCohort provides configuration to allow preemption within + cohort while borrowing. + properties: + maxPriorityThreshold: + description: |- + maxPriorityThreshold allows to restrict the set of workloads which + might be preempted by a borrowing workload, to only workloads with + priority less than or equal to the specified threshold priority. + When the threshold is not specified, then any workload satisfying the + policy can be preempted by the borrowing workload. + format: int32 + type: integer + policy: + default: Never + description: |- + policy determines the policy for preemption to reclaim quota within cohort while borrowing. + Possible values are: + - `Never` (default): do not allow for preemption, in other + ClusterQueues within the cohort, for a borrowing workload. + - `LowerPriority`: allow preemption, in other ClusterQueues + within the cohort, for a borrowing workload, but only if + the preempted workloads are of lower priority. + enum: + - Never + - LowerPriority + type: string + type: object + reclaimWithinCohort: + default: Never + description: |- + reclaimWithinCohort determines whether a pending Workload can preempt + Workloads from other ClusterQueues in the cohort that are using more than + their nominal quota. The possible values are: + + - `Never` (default): do not preempt Workloads in the cohort. + - `LowerPriority`: **Classic Preemption** if the pending Workload + fits within the nominal quota of its ClusterQueue, only preempt + Workloads in the cohort that have lower priority than the pending + Workload. **Fair Sharing** only preempt Workloads in the cohort that + have lower priority than the pending Workload and that satisfy the + fair sharing preemptionStategies. + - `Any`: **Classic Preemption** if the pending Workload fits within + the nominal quota of its ClusterQueue, preempt any Workload in the + cohort, irrespective of priority. **Fair Sharing** preempt Workloads + in the cohort that satisfy the fair sharing preemptionStrategies. + enum: + - Never + - LowerPriority + - Any + type: string + withinClusterQueue: + default: Never + description: |- + withinClusterQueue determines whether a pending Workload that doesn't fit + within the nominal quota for its ClusterQueue, can preempt active Workloads in + the ClusterQueue. The possible values are: + + - `Never` (default): do not preempt Workloads in the ClusterQueue. + - `LowerPriority`: only preempt Workloads in the ClusterQueue that have + lower priority than the pending Workload. + - `LowerOrNewerEqualPriority`: only preempt Workloads in the ClusterQueue that + either have a lower priority than the pending workload or equal priority + and are newer than the pending workload. + enum: + - Never + - LowerPriority + - LowerOrNewerEqualPriority + type: string + type: object + x-kubernetes-validations: + - message: reclaimWithinCohort=Never and borrowWithinCohort.Policy!=Never + rule: '!(self.reclaimWithinCohort == ''Never'' && has(self.borrowWithinCohort) + && self.borrowWithinCohort.policy != ''Never'')' + queueingStrategy: + default: BestEffortFIFO + description: |- + QueueingStrategy indicates the queueing strategy of the workloads + across the queues in this ClusterQueue. + Current Supported Strategies: + + - StrictFIFO: workloads are ordered strictly by creation time. + Older workloads that can't be admitted will block admitting newer + workloads even if they fit available quota. + - BestEffortFIFO: workloads are ordered by creation time, + however older workloads that can't be admitted will not block + admitting newer workloads that fit existing quota. + enum: + - StrictFIFO + - BestEffortFIFO + type: string + resourceGroups: + description: |- + resourceGroups describes groups of resources. + Each resource group defines the list of resources and a list of flavors + that provide quotas for these resources. + Each resource and each flavor can only form part of one resource group. + resourceGroups can be up to 16. + items: + properties: + coveredResources: + description: |- + coveredResources is the list of resources covered by the flavors in this + group. + Examples: cpu, memory, vendor.com/gpu. + The list cannot be empty and it can contain up to 16 resources. + items: + description: ResourceName is the name identifying various + resources in a ResourceList. + type: string + maxItems: 16 + minItems: 1 + type: array + flavors: + description: |- + flavors is the list of flavors that provide the resources of this group. + Typically, different flavors represent different hardware models + (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot + cpus). + Each flavor MUST list all the resources listed for this group in the same + order as the .resources field. + The list cannot be empty and it can contain up to 16 flavors. + items: + properties: + name: + description: |- + name of this flavor. The name should match the .metadata.name of a + ResourceFlavor. If a matching ResourceFlavor does not exist, the + ClusterQueue will have an Active condition set to False. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: |- + resources is the list of quotas for this flavor per resource. + There could be up to 16 resources. + items: + properties: + borrowingLimit: + anyOf: + - type: integer + - type: string + description: |- + borrowingLimit is the maximum amount of quota for the [flavor, resource] + combination that this ClusterQueue is allowed to borrow from the unused + quota of other ClusterQueues in the same cohort. + In total, at a given time, Workloads in a ClusterQueue can consume a + quantity of quota equal to nominalQuota+borrowingLimit, assuming the other + ClusterQueues in the cohort have enough unused quota. + If null, it means that there is no borrowing limit. + If not null, it must be non-negative. + borrowingLimit must be null if spec.cohort is empty. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + lendingLimit: + anyOf: + - type: integer + - type: string + description: |- + lendingLimit is the maximum amount of unused quota for the [flavor, resource] + combination that this ClusterQueue can lend to other ClusterQueues in the same cohort. + In total, at a given time, ClusterQueue reserves for its exclusive use + a quantity of quota equals to nominalQuota - lendingLimit. + If null, it means that there is no lending limit, meaning that + all the nominalQuota can be borrowed by other clusterQueues in the cohort. + If not null, it must be non-negative. + lendingLimit must be null if spec.cohort is empty. + This field is in beta stage and is enabled by default. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of this resource. + type: string + nominalQuota: + anyOf: + - type: integer + - type: string + description: |- + nominalQuota is the quantity of this resource that is available for + Workloads admitted by this ClusterQueue at a point in time. + The nominalQuota must be non-negative. + nominalQuota should represent the resources in the cluster available for + running jobs (after discounting resources consumed by system components + and pods not managed by kueue). In an autoscaled cluster, nominalQuota + should account for resources that can be provided by a component such as + Kubernetes cluster-autoscaler. + + If the ClusterQueue belongs to a cohort, the sum of the quotas for each + (flavor, resource) combination defines the maximum quantity that can be + allocated by a ClusterQueue in the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + - nominalQuota + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - coveredResources + - flavors + type: object + x-kubernetes-validations: + - message: flavors must have the same number of resources as the + coveredResources + rule: self.flavors.all(x, size(x.resources) == size(self.coveredResources)) + maxItems: 16 + type: array + x-kubernetes-list-type: atomic + stopPolicy: + default: None + description: |- + stopPolicy - if set to a value different from None, the ClusterQueue is considered Inactive, no new reservation being + made. + + Depending on its value, its associated workloads will: + + - None - Workloads are admitted + - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation. + - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation. + enum: + - None + - Hold + - HoldAndDrain + type: string + type: object + x-kubernetes-validations: + - message: borrowingLimit must be nil when cohort is empty + rule: '!has(self.cohort) && has(self.resourceGroups) ? self.resourceGroups.all(rg, + rg.flavors.all(f, f.resources.all(r, !has(r.borrowingLimit)))) : true' + status: + description: ClusterQueueStatus defines the observed state of ClusterQueue + properties: + admittedWorkloads: + description: |- + admittedWorkloads is the number of workloads currently admitted to this + clusterQueue and haven't finished yet. + format: int32 + type: integer + conditions: + description: |- + conditions hold the latest available observations of the ClusterQueue + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + fairSharing: + description: FairSharing contains the information about the current + status of fair sharing. + properties: + weightedShare: + description: |- + WeightedShare represent the maximum of the ratios of usage above nominal + quota to the lendable resources in the cohort, among all the resources + provided by the ClusterQueue, and divided by the weight. + If zero, it means that the usage of the ClusterQueue is below the nominal quota. + If the ClusterQueue has a weight of zero, this will return 9223372036854775807, + the maximum possible share value. + format: int64 + type: integer + required: + - weightedShare + type: object + flavorsReservation: + description: |- + flavorsReservation are the reserved quotas, by flavor, currently in use by the + workloads assigned to this ClusterQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: resources lists the quota usage for the resources + in this flavor. + items: + properties: + borrowed: + anyOf: + - type: integer + - type: string + description: |- + Borrowed is quantity of quota that is borrowed from the cohort. In other + words, it's the used quota that is over the nominalQuota. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of the resource + type: string + total: + anyOf: + - type: integer + - type: string + description: |- + total is the total quantity of used quota, including the amount borrowed + from the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + flavorsUsage: + description: |- + flavorsUsage are the used quotas, by flavor, currently in use by the + workloads admitted in this ClusterQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: resources lists the quota usage for the resources + in this flavor. + items: + properties: + borrowed: + anyOf: + - type: integer + - type: string + description: |- + Borrowed is quantity of quota that is borrowed from the cohort. In other + words, it's the used quota that is over the nominalQuota. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of the resource + type: string + total: + anyOf: + - type: integer + - type: string + description: |- + total is the total quantity of used quota, including the amount borrowed + from the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + pendingWorkloads: + description: |- + pendingWorkloads is the number of workloads currently waiting to be + admitted to this clusterQueue. + format: int32 + type: integer + pendingWorkloadsStatus: + description: |- + PendingWorkloadsStatus contains the information exposed about the current + status of the pending workloads in the cluster queue. + Deprecated: This field will be removed on v1beta2, use VisibilityOnDemand + (https://kueue.sigs.k8s.io/docs/tasks/manage/monitor_pending_workloads/pending_workloads_on_demand/) + instead. + properties: + clusterQueuePendingWorkload: + description: Head contains the list of top pending workloads. + items: + description: |- + ClusterQueuePendingWorkload contains the information identifying a pending workload + in the cluster queue. + properties: + name: + description: Name indicates the name of the pending workload. + type: string + namespace: + description: Namespace indicates the name of the pending + workload. + type: string + required: + - name + - namespace + type: object + type: array + x-kubernetes-list-type: atomic + lastChangeTime: + description: LastChangeTime indicates the time of the last change + of the structure. + format: date-time + type: string + required: + - lastChangeTime + type: object + reservingWorkloads: + description: |- + reservingWorkloads is the number of workloads currently reserving quota in this + clusterQueue. + format: int32 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: cohorts.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: Cohort + listKind: CohortList + plural: cohorts + singular: cohort + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + Cohort is the Schema for the cohorts API. Using Hierarchical + Cohorts (any Cohort which has a parent) with Fair Sharing + results in undefined behavior in 0.9 + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: CohortSpec defines the desired state of Cohort + properties: + parent: + description: |- + Parent references the name of the Cohort's parent, if + any. It satisfies one of three cases: + 1) Unset. This Cohort is the root of its Cohort tree. + 2) References a non-existent Cohort. We use default Cohort (no borrowing/lending limits). + 3) References an existent Cohort. + + If a cycle is created, we disable all members of the + Cohort, including ClusterQueues, until the cycle is + removed. We prevent further admission while the cycle + exists. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resourceGroups: + description: |- + ResourceGroups describes groupings of Resources and + Flavors. Each ResourceGroup defines a list of Resources + and a list of Flavors which provide quotas for these + Resources. Each Resource and each Flavor may only form part + of one ResourceGroup. There may be up to 16 ResourceGroups + within a Cohort. + + BorrowingLimit limits how much members of this Cohort + subtree can borrow from the parent subtree. + + LendingLimit limits how much members of this Cohort subtree + can lend to the parent subtree. + + Borrowing and Lending limits must only be set when the + Cohort has a parent. Otherwise, the Cohort create/update + will be rejected by the webhook. + items: + properties: + coveredResources: + description: |- + coveredResources is the list of resources covered by the flavors in this + group. + Examples: cpu, memory, vendor.com/gpu. + The list cannot be empty and it can contain up to 16 resources. + items: + description: ResourceName is the name identifying various + resources in a ResourceList. + type: string + maxItems: 16 + minItems: 1 + type: array + flavors: + description: |- + flavors is the list of flavors that provide the resources of this group. + Typically, different flavors represent different hardware models + (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot + cpus). + Each flavor MUST list all the resources listed for this group in the same + order as the .resources field. + The list cannot be empty and it can contain up to 16 flavors. + items: + properties: + name: + description: |- + name of this flavor. The name should match the .metadata.name of a + ResourceFlavor. If a matching ResourceFlavor does not exist, the + ClusterQueue will have an Active condition set to False. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: |- + resources is the list of quotas for this flavor per resource. + There could be up to 16 resources. + items: + properties: + borrowingLimit: + anyOf: + - type: integer + - type: string + description: |- + borrowingLimit is the maximum amount of quota for the [flavor, resource] + combination that this ClusterQueue is allowed to borrow from the unused + quota of other ClusterQueues in the same cohort. + In total, at a given time, Workloads in a ClusterQueue can consume a + quantity of quota equal to nominalQuota+borrowingLimit, assuming the other + ClusterQueues in the cohort have enough unused quota. + If null, it means that there is no borrowing limit. + If not null, it must be non-negative. + borrowingLimit must be null if spec.cohort is empty. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + lendingLimit: + anyOf: + - type: integer + - type: string + description: |- + lendingLimit is the maximum amount of unused quota for the [flavor, resource] + combination that this ClusterQueue can lend to other ClusterQueues in the same cohort. + In total, at a given time, ClusterQueue reserves for its exclusive use + a quantity of quota equals to nominalQuota - lendingLimit. + If null, it means that there is no lending limit, meaning that + all the nominalQuota can be borrowed by other clusterQueues in the cohort. + If not null, it must be non-negative. + lendingLimit must be null if spec.cohort is empty. + This field is in beta stage and is enabled by default. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of this resource. + type: string + nominalQuota: + anyOf: + - type: integer + - type: string + description: |- + nominalQuota is the quantity of this resource that is available for + Workloads admitted by this ClusterQueue at a point in time. + The nominalQuota must be non-negative. + nominalQuota should represent the resources in the cluster available for + running jobs (after discounting resources consumed by system components + and pods not managed by kueue). In an autoscaled cluster, nominalQuota + should account for resources that can be provided by a component such as + Kubernetes cluster-autoscaler. + + If the ClusterQueue belongs to a cohort, the sum of the quotas for each + (flavor, resource) combination defines the maximum quantity that can be + allocated by a ClusterQueue in the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + - nominalQuota + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - coveredResources + - flavors + type: object + x-kubernetes-validations: + - message: flavors must have the same number of resources as the + coveredResources + rule: self.flavors.all(x, size(x.resources) == size(self.coveredResources)) + maxItems: 16 + type: array + x-kubernetes-list-type: atomic + type: object + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: localqueues.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: LocalQueue + listKind: LocalQueueList + plural: localqueues + shortNames: + - queue + - queues + - lq + singular: localqueue + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Backing ClusterQueue + jsonPath: .spec.clusterQueue + name: ClusterQueue + type: string + - description: Number of pending workloads + jsonPath: .status.pendingWorkloads + name: Pending Workloads + type: integer + - description: Number of admitted workloads that haven't finished yet. + jsonPath: .status.admittedWorkloads + name: Admitted Workloads + type: integer + name: v1beta1 + schema: + openAPIV3Schema: + description: LocalQueue is the Schema for the localQueues API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: LocalQueueSpec defines the desired state of LocalQueue + properties: + clusterQueue: + description: clusterQueue is a reference to a clusterQueue that backs + this localQueue. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + x-kubernetes-validations: + - message: field is immutable + rule: self == oldSelf + stopPolicy: + default: None + description: |- + stopPolicy - if set to a value different from None, the LocalQueue is considered Inactive, + no new reservation being made. + + Depending on its value, its associated workloads will: + + - None - Workloads are admitted + - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation. + - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation. + enum: + - None + - Hold + - HoldAndDrain + type: string + type: object + status: + description: LocalQueueStatus defines the observed state of LocalQueue + properties: + admittedWorkloads: + description: |- + admittedWorkloads is the number of workloads in this LocalQueue + admitted to a ClusterQueue and that haven't finished yet. + format: int32 + type: integer + conditions: + description: |- + Conditions hold the latest available observations of the LocalQueue + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + flavorUsage: + description: |- + flavorsUsage are the used quotas, by flavor currently in use by the + workloads assigned to this LocalQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: resources lists the quota usage for the resources + in this flavor. + items: + properties: + name: + description: name of the resource. + type: string + total: + anyOf: + - type: integer + - type: string + description: total is the total quantity of used quota. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + flavors: + description: flavors lists all currently available ResourceFlavors + in specified ClusterQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + nodeLabels: + additionalProperties: + type: string + description: |- + nodeLabels are labels that associate the ResourceFlavor with Nodes that + have the same labels. + maxProperties: 8 + type: object + x-kubernetes-map-type: atomic + nodeTaints: + description: |- + nodeTaints are taints that the nodes associated with this ResourceFlavor + have. + items: + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. + properties: + effect: + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to + a node. + type: string + timeAdded: + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint + key. + type: string + required: + - effect + - key + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + resources: + description: resources used in the flavor. + items: + description: ResourceName is the name identifying various + resources in a ResourceList. + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + flavorsReservation: + description: |- + flavorsReservation are the reserved quotas, by flavor currently in use by the + workloads assigned to this LocalQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: resources lists the quota usage for the resources + in this flavor. + items: + properties: + name: + description: name of the resource. + type: string + total: + anyOf: + - type: integer + - type: string + description: total is the total quantity of used quota. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + pendingWorkloads: + description: PendingWorkloads is the number of Workloads in the LocalQueue + not yet admitted to a ClusterQueue + format: int32 + type: integer + reservingWorkloads: + description: |- + reservingWorkloads is the number of workloads in this LocalQueue + reserving quota in a ClusterQueue and that haven't finished yet. + format: int32 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: multikueueclusters.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: MultiKueueCluster + listKind: MultiKueueClusterList + plural: multikueueclusters + singular: multikueuecluster + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: MultiKueueCluster is the Schema for the multikueue API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + kubeConfig: + description: Information how to connect to the cluster. + properties: + location: + description: |- + Location of the KubeConfig. + + If LocationType is Secret then Location is the name of the secret inside the namespace in + which the kueue controller manager is running. The config should be stored in the "kubeconfig" key. + type: string + locationType: + default: Secret + description: Type of the KubeConfig location. + enum: + - Secret + - Path + type: string + required: + - location + - locationType + type: object + required: + - kubeConfig + type: object + status: + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: multikueueconfigs.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: MultiKueueConfig + listKind: MultiKueueConfigList + plural: multikueueconfigs + singular: multikueueconfig + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: MultiKueueConfig is the Schema for the multikueue API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: MultiKueueConfigSpec defines the desired state of MultiKueueConfig + properties: + clusters: + description: List of MultiKueueClusters names where the workloads + from the ClusterQueue should be distributed. + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + required: + - clusters + type: object + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: provisioningrequestconfigs.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: ProvisioningRequestConfig + listKind: ProvisioningRequestConfigList + plural: provisioningrequestconfigs + singular: provisioningrequestconfig + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: ProvisioningRequestConfig is the Schema for the provisioningrequestconfig + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ProvisioningRequestConfigSpec defines the desired state of + ProvisioningRequestConfig + properties: + managedResources: + description: |- + managedResources contains the list of resources managed by the autoscaling. + + If empty, all resources are considered managed. + + If not empty, the ProvisioningRequest will contain only the podsets that are + requesting at least one of them. + + If none of the workloads podsets is requesting at least a managed resource, + the workload is considered ready. + items: + description: ResourceName is the name identifying various resources + in a ResourceList. + type: string + maxItems: 100 + type: array + x-kubernetes-list-type: set + parameters: + additionalProperties: + description: Parameter is limited to 255 characters. + maxLength: 255 + type: string + description: Parameters contains all other parameters classes may + require. + maxProperties: 100 + type: object + provisioningClassName: + description: |- + ProvisioningClassName describes the different modes of provisioning the resources. + Check autoscaling.x-k8s.io ProvisioningRequestSpec.ProvisioningClassName for details. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + retryStrategy: + default: + backoffBaseSeconds: 60 + backoffLimitCount: 3 + backoffMaxSeconds: 1800 + description: |- + retryStrategy defines strategy for retrying ProvisioningRequest. + If null, then the default configuration is applied with the following parameter values: + backoffLimitCount: 3 + backoffBaseSeconds: 60 - 1 min + backoffMaxSeconds: 1800 - 30 mins + + To switch off retry mechanism + set retryStrategy.backoffLimitCount to 0. + properties: + backoffBaseSeconds: + default: 60 + description: |- + BackoffBaseSeconds defines the base for the exponential backoff for + re-queuing an evicted workload. + + Defaults to 60. + format: int32 + type: integer + backoffLimitCount: + default: 3 + description: |- + BackoffLimitCount defines the maximum number of re-queuing retries. + Once the number is reached, the workload is deactivated (`.spec.activate`=`false`). + + Every backoff duration is about "b*2^(n-1)+Rand" where: + - "b" represents the base set by "BackoffBaseSeconds" parameter, + - "n" represents the "workloadStatus.requeueState.count", + - "Rand" represents the random jitter. + During this time, the workload is taken as an inadmissible and + other workloads will have a chance to be admitted. + By default, the consecutive requeue delays are around: (60s, 120s, 240s, ...). + + Defaults to 3. + format: int32 + type: integer + backoffMaxSeconds: + default: 1800 + description: |- + BackoffMaxSeconds defines the maximum backoff time to re-queue an evicted workload. + + Defaults to 1800. + format: int32 + type: integer + type: object + required: + - provisioningClassName + type: object + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: resourceflavors.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: ResourceFlavor + listKind: ResourceFlavorList + plural: resourceflavors + shortNames: + - flavor + - flavors + - rf + singular: resourceflavor + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: ResourceFlavor is the Schema for the resourceflavors API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ResourceFlavorSpec defines the desired state of the ResourceFlavor + properties: + nodeLabels: + additionalProperties: + type: string + description: |- + nodeLabels are labels that associate the ResourceFlavor with Nodes that + have the same labels. + When a Workload is admitted, its podsets can only get assigned + ResourceFlavors whose nodeLabels match the nodeSelector and nodeAffinity + fields. + Once a ResourceFlavor is assigned to a podSet, the ResourceFlavor's + nodeLabels should be injected into the pods of the Workload by the + controller that integrates with the Workload object. + + nodeLabels can be up to 8 elements. + maxProperties: 8 + type: object + x-kubernetes-map-type: atomic + nodeTaints: + description: |- + nodeTaints are taints that the nodes associated with this ResourceFlavor + have. + Workloads' podsets must have tolerations for these nodeTaints in order to + get assigned this ResourceFlavor during admission. + + An example of a nodeTaint is + cloud.provider.com/preemptible="true":NoSchedule + + nodeTaints can be up to 8 elements. + items: + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. + properties: + effect: + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to a node. + type: string + timeAdded: + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint key. + type: string + required: + - effect + - key + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + x-kubernetes-validations: + - message: 'supported taint effect values: ''NoSchedule'', ''PreferNoSchedule'', + ''NoExecute''' + rule: self.all(x, x.effect in ['NoSchedule', 'PreferNoSchedule', + 'NoExecute']) + tolerations: + description: |- + tolerations are extra tolerations that will be added to the pods admitted in + the quota associated with this resource flavor. + + An example of a toleration is + cloud.provider.com/preemptible="true":NoSchedule + + tolerations can be up to 8 elements. + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + x-kubernetes-validations: + - message: operator must be Exists when 'key' is empty, which means + 'match all values and all keys' + rule: 'self.all(x, !has(x.key) ? x.operator == ''Exists'' : true)' + - message: effect must be 'NoExecute' when 'tolerationSeconds' is + set + rule: 'self.all(x, has(x.tolerationSeconds) ? x.effect == ''NoExecute'' + : true)' + - message: 'supported toleration values: ''Equal''(default), ''Exists''' + rule: self.all(x, !has(x.operator) || x.operator in ['Equal', 'Exists']) + - message: a value must be empty when 'operator' is 'Exists' + rule: 'self.all(x, has(x.operator) && x.operator == ''Exists'' ? + !has(x.value) : true)' + - message: 'supported taint effect values: ''NoSchedule'', ''PreferNoSchedule'', + ''NoExecute''' + rule: self.all(x, !has(x.effect) || x.effect in ['NoSchedule', 'PreferNoSchedule', + 'NoExecute']) + topologyName: + description: |- + topologyName indicates topology for the TAS ResourceFlavor. + When specified, it enables scraping of the topology information from the + nodes matching to the Resource Flavor node labels. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + type: object + x-kubernetes-validations: + - message: at least one nodeLabel is required when topology is set + rule: '!has(self.topologyName) || self.nodeLabels.size() >= 1' + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: topologies.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: Topology + listKind: TopologyList + plural: topologies + singular: topology + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Topology is the Schema for the topology API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: TopologySpec defines the desired state of Topology + properties: + levels: + description: levels define the levels of topology. + items: + description: TopologyLevel defines the desired state of TopologyLevel + properties: + nodeLabel: + description: |- + nodeLabel indicates the name of the node label for a specific topology + level. + + Examples: + - cloud.provider.com/topology-block + - cloud.provider.com/topology-rack + maxLength: 316 + minLength: 1 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - nodeLabel + type: object + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + required: + - levels + type: object + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: workloadpriorityclasses.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: WorkloadPriorityClass + listKind: WorkloadPriorityClassList + plural: workloadpriorityclasses + singular: workloadpriorityclass + scope: Cluster + versions: + - additionalPrinterColumns: + - description: Value of workloadPriorityClass's Priority + jsonPath: .value + name: Value + type: integer + name: v1beta1 + schema: + openAPIV3Schema: + description: WorkloadPriorityClass is the Schema for the workloadPriorityClass + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + description: + description: |- + description is an arbitrary string that usually provides guidelines on + when this workloadPriorityClass should be used. + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + value: + description: |- + value represents the integer value of this workloadPriorityClass. This is the actual priority that workloads + receive when jobs have the name of this class in their workloadPriorityClass label. + Changing the value of workloadPriorityClass doesn't affect the priority of workloads that were already created. + format: int32 + type: integer + required: + - value + type: object + served: true + storage: true + subresources: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: workloads.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: Workload + listKind: WorkloadList + plural: workloads + shortNames: + - wl + singular: workload + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Name of the queue this workload was submitted to + jsonPath: .spec.queueName + name: Queue + type: string + - description: Name of the ClusterQueue where the workload is reserving quota + jsonPath: .status.admission.clusterQueue + name: Reserved in + type: string + - description: Admission status + jsonPath: .status.conditions[?(@.type=='Admitted')].status + name: Admitted + type: string + - description: Workload finished + jsonPath: .status.conditions[?(@.type=='Finished')].status + name: Finished + type: string + - description: Time this workload was created + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: Workload is the Schema for the workloads API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: WorkloadSpec defines the desired state of Workload + properties: + active: + default: true + description: |- + Active determines if a workload can be admitted into a queue. + Changing active from true to false will evict any running workloads. + Possible values are: + + - false: indicates that a workload should never be admitted and evicts running workloads + - true: indicates that a workload can be evaluated for admission into it's respective queue. + + Defaults to true + type: boolean + maximumExecutionTimeSeconds: + description: |- + maximumExecutionTimeSeconds if provided, determines the maximum time, in seconds, + the workload can be admitted before it's automatically deactivated. + + If unspecified, no execution time limit is enforced on the Workload. + format: int32 + minimum: 1 + type: integer + podSets: + description: |- + podSets is a list of sets of homogeneous pods, each described by a Pod spec + and a count. + There must be at least one element and at most 8. + podSets cannot be changed. + items: + properties: + count: + default: 1 + description: count is the number of pods for the spec. + format: int32 + minimum: 0 + type: integer + minCount: + description: |- + minCount is the minimum number of pods for the spec acceptable + if the workload supports partial admission. + + If not provided, partial admission for the current PodSet is not + enabled. + + Only one podSet within the workload can use this. + + This is an alpha field and requires enabling PartialAdmission feature gate. + format: int32 + minimum: 1 + type: integer + name: + default: main + description: name is the PodSet name. + maxLength: 63 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + template: + description: |- + template is the Pod template. + + The only allowed fields in template.metadata are labels and annotations. + + If requests are omitted for a container or initContainer, + they default to the limits if they are explicitly specified for the + container or initContainer. + + During admission, the rules in nodeSelector and + nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution that match + the keys in the nodeLabels from the ResourceFlavors considered for this + Workload are used to filter the ResourceFlavors that can be assigned to + this podSet. + properties: + metadata: + description: |- + Standard object's metadata. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: |- + Specification of the desired behavior of the pod. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + activeDeadlineSeconds: + description: |- + Optional duration in seconds the pod may be active on the node relative to + StartTime before the system will actively try to mark it failed and kill associated containers. + Value must be a positive integer. + format: int64 + type: integer + affinity: + description: If specified, the pod's scheduling constraints + properties: + nodeAffinity: + description: Describes node affinity scheduling + rules for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, + in the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added + per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity + term, associated with the corresponding + weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same + node, zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added + per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity + term, associated with the corresponding + weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + automountServiceAccountToken: + description: AutomountServiceAccountToken indicates + whether a service account token should be automatically + mounted. + type: boolean + containers: + description: |- + List of containers belonging to the pod. + Containers cannot currently be added or removed. + There must be at least one container in a Pod. + Cannot be updated. + items: + description: A single application container that you + want to run within a pod. + properties: + args: + description: |- + Arguments to the entrypoint. + The container image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The container image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment + variable's value. Cannot be used if value + is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env + vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace + properties: + key: + description: The key of the secret + to select from. Must be a valid + secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be + a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: |- + Actions that the management system should take in response to container lifecycle events. + Cannot be updated. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: |- + List of ports to expose from the container. Not specifying a port here + DOES NOT prevent that port from being exposed. Any port which is + listening on the default "0.0.0.0" address inside a container will be + accessible from the network. + Modifying this array with strategic merge patch may corrupt the data. + For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network + port in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents + resource resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Compute Resources required by this container. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + RestartPolicy defines the restart behavior of individual containers in a pod. + This field may only be set for init containers, and the only allowed value is "Always". + For non-init containers or when this field is not specified, + the restart behavior is defined by the Pod's restart policy and the container type. + Setting the RestartPolicy as "Always" for the init container will have the following effect: + this init container will be continually restarted on + exit until all regular containers have terminated. Once all regular + containers have completed, all init containers with restartPolicy "Always" + will be shut down. This lifecycle differs from normal init containers and + is often referred to as a "sidecar" container. Although this init + container still starts in the init container sequence, it does not wait + for the container to complete before proceeding to the next init + container. Instead, the next init container starts immediately after this + init container is started, or after any startupProbe has successfully + completed. + type: string + securityContext: + description: |- + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label + that applies to the container. + type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is + the name of the GMSA credential spec + to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + This cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. + items: + description: volumeDevice describes a mapping + of a raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside + of the container that the device will + be mapped to. + type: string + name: + description: name must match the name of + a persistentVolumeClaim in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of + a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + If ReadOnly is false, this field has no meaning and must be unspecified. + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + dnsConfig: + description: |- + Specifies the DNS parameters of a pod. + Parameters specified here will be merged to the generated DNS + configuration based on DNSPolicy. + properties: + nameservers: + description: |- + A list of DNS name server IP addresses. + This will be appended to the base nameservers generated from DNSPolicy. + Duplicated nameservers will be removed. + items: + type: string + type: array + x-kubernetes-list-type: atomic + options: + description: |- + A list of DNS resolver options. + This will be merged with the base options generated from DNSPolicy. + Duplicated entries will be removed. Resolution options given in Options + will override those that appear in the base DNSPolicy. + items: + description: PodDNSConfigOption defines DNS resolver + options of a pod. + properties: + name: + description: Required. + type: string + value: + type: string + type: object + type: array + x-kubernetes-list-type: atomic + searches: + description: |- + A list of DNS search domains for host-name lookup. + This will be appended to the base search paths generated from DNSPolicy. + Duplicated search paths will be removed. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + dnsPolicy: + description: |- + Set DNS policy for the pod. + Defaults to "ClusterFirst". + Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. + DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. + To have DNS options set along with hostNetwork, you have to specify DNS policy + explicitly to 'ClusterFirstWithHostNet'. + type: string + enableServiceLinks: + description: |- + EnableServiceLinks indicates whether information about services should be injected into pod's + environment variables, matching the syntax of Docker links. + Optional: Defaults to true. + type: boolean + ephemeralContainers: + description: |- + List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing + pod to perform user-initiated actions such as debugging. This list cannot be specified when + creating a pod, and it cannot be modified by updating the pod spec. In order to add an + ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. + items: + description: |- + An EphemeralContainer is a temporary container that you may add to an existing Pod for + user-initiated activities such as debugging. Ephemeral containers have no resource or + scheduling guarantees, and they will not be restarted when they exit or when a Pod is + removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the + Pod to exceed its resource allocation. + + To add an ephemeral container, use the ephemeralcontainers subresource of an existing + Pod. Ephemeral containers may not be removed or restarted. + properties: + args: + description: |- + Arguments to the entrypoint. + The image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment + variable's value. Cannot be used if value + is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env + vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace + properties: + key: + description: The key of the secret + to select from. Must be a valid + secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be + a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: Lifecycle is not allowed for ephemeral + containers. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: Probes are not allowed for ephemeral + containers. + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the ephemeral container specified as a DNS_LABEL. + This name must be unique among all containers, init containers and ephemeral containers. + type: string + ports: + description: Ports are not allowed for ephemeral + containers. + items: + description: ContainerPort represents a network + port in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: Probes are not allowed for ephemeral + containers. + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents + resource resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources + already allocated to the pod. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + Restart policy for the container to manage the restart behavior of each + container within a pod. + This may only be set for init containers. You cannot set this field on + ephemeral containers. + type: string + securityContext: + description: |- + Optional: SecurityContext defines the security options the ephemeral container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label + that applies to the container. + type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is + the name of the GMSA credential spec + to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: Probes are not allowed for ephemeral + containers. + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + targetContainerName: + description: |- + If set, the name of the container from PodSpec that this ephemeral container targets. + The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. + If not set then the ephemeral container uses the namespaces configured in the Pod spec. + + The container runtime must implement support for this feature. If the runtime does not + support namespace targeting then the result of setting this field is undefined. + type: string + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. + items: + description: volumeDevice describes a mapping + of a raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside + of the container that the device will + be mapped to. + type: string + name: + description: name must match the name of + a persistentVolumeClaim in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of + a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + If ReadOnly is false, this field has no meaning and must be unspecified. + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + hostAliases: + description: |- + HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts + file if specified. + items: + description: |- + HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the + pod's hosts file. + properties: + hostnames: + description: Hostnames for the above IP address. + items: + type: string + type: array + x-kubernetes-list-type: atomic + ip: + description: IP address of the host file entry. + type: string + required: + - ip + type: object + type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map + hostIPC: + description: |- + Use the host's ipc namespace. + Optional: Default to false. + type: boolean + hostNetwork: + description: |- + Host networking requested for this pod. Use the host's network namespace. + If this option is set, the ports that will be used must be specified. + Default to false. + type: boolean + hostPID: + description: |- + Use the host's pid namespace. + Optional: Default to false. + type: boolean + hostUsers: + description: |- + Use the host's user namespace. + Optional: Default to true. + If set to true or not present, the pod will be run in the host user namespace, useful + for when the pod needs a feature only available to the host user namespace, such as + loading a kernel module with CAP_SYS_MODULE. + When set to false, a new userns is created for the pod. Setting false is useful for + mitigating container breakout vulnerabilities even allowing users to run their + containers as root without actually having root privileges on the host. + This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature. + type: boolean + hostname: + description: |- + Specifies the hostname of the Pod + If not specified, the pod's hostname will be set to a system-defined value. + type: string + imagePullSecrets: + description: |- + ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. + If specified, these secrets will be passed to individual puller implementations for them to use. + More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + initContainers: + description: |- + List of initialization containers belonging to the pod. + Init containers are executed in order prior to containers being started. If any + init container fails, the pod is considered to have failed and is handled according + to its restartPolicy. The name for an init container or normal container must be + unique among all containers. + Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes. + The resourceRequirements of an init container are taken into account during scheduling + by finding the highest request/limit for each resource type, and then using the max of + of that value or the sum of the normal containers. Limits are applied to init containers + in a similar fashion. + Init containers cannot currently be added or removed. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ + items: + description: A single application container that you + want to run within a pod. + properties: + args: + description: |- + Arguments to the entrypoint. + The container image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The container image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment + variable's value. Cannot be used if value + is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env + vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace + properties: + key: + description: The key of the secret + to select from. Must be a valid + secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be + a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: |- + Actions that the management system should take in response to container lifecycle events. + Cannot be updated. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: |- + List of ports to expose from the container. Not specifying a port here + DOES NOT prevent that port from being exposed. Any port which is + listening on the default "0.0.0.0" address inside a container will be + accessible from the network. + Modifying this array with strategic merge patch may corrupt the data. + For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network + port in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents + resource resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Compute Resources required by this container. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + RestartPolicy defines the restart behavior of individual containers in a pod. + This field may only be set for init containers, and the only allowed value is "Always". + For non-init containers or when this field is not specified, + the restart behavior is defined by the Pod's restart policy and the container type. + Setting the RestartPolicy as "Always" for the init container will have the following effect: + this init container will be continually restarted on + exit until all regular containers have terminated. Once all regular + containers have completed, all init containers with restartPolicy "Always" + will be shut down. This lifecycle differs from normal init containers and + is often referred to as a "sidecar" container. Although this init + container still starts in the init container sequence, it does not wait + for the container to complete before proceeding to the next init + container. Instead, the next init container starts immediately after this + init container is started, or after any startupProbe has successfully + completed. + type: string + securityContext: + description: |- + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label + that applies to the container. + type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is + the name of the GMSA credential spec + to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + This cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. + items: + description: volumeDevice describes a mapping + of a raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside + of the container that the device will + be mapped to. + type: string + name: + description: name must match the name of + a persistentVolumeClaim in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of + a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + If ReadOnly is false, this field has no meaning and must be unspecified. + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + nodeName: + description: |- + NodeName indicates in which node this pod is scheduled. + If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. + Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. + This field should not be used to express a desire for the pod to be scheduled on a specific node. + https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodename + type: string + nodeSelector: + additionalProperties: + type: string + description: |- + NodeSelector is a selector which must be true for the pod to fit on a node. + Selector which must match a node's labels for the pod to be scheduled on that node. + More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + type: object + x-kubernetes-map-type: atomic + os: + description: |- + Specifies the OS of the containers in the pod. + Some pod and container fields are restricted if this is set. + + If the OS field is set to linux, the following fields must be unset: + -securityContext.windowsOptions + + If the OS field is set to windows, following fields must be unset: + - spec.hostPID + - spec.hostIPC + - spec.hostUsers + - spec.securityContext.appArmorProfile + - spec.securityContext.seLinuxOptions + - spec.securityContext.seccompProfile + - spec.securityContext.fsGroup + - spec.securityContext.fsGroupChangePolicy + - spec.securityContext.sysctls + - spec.shareProcessNamespace + - spec.securityContext.runAsUser + - spec.securityContext.runAsGroup + - spec.securityContext.supplementalGroups + - spec.securityContext.supplementalGroupsPolicy + - spec.containers[*].securityContext.appArmorProfile + - spec.containers[*].securityContext.seLinuxOptions + - spec.containers[*].securityContext.seccompProfile + - spec.containers[*].securityContext.capabilities + - spec.containers[*].securityContext.readOnlyRootFilesystem + - spec.containers[*].securityContext.privileged + - spec.containers[*].securityContext.allowPrivilegeEscalation + - spec.containers[*].securityContext.procMount + - spec.containers[*].securityContext.runAsUser + - spec.containers[*].securityContext.runAsGroup + properties: + name: + description: |- + Name is the name of the operating system. The currently supported values are linux and windows. + Additional value may be defined in future and can be one of: + https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration + Clients should expect to handle additional values and treat unrecognized values in this field as os: null + type: string + required: + - name + type: object + overhead: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. + This field will be autopopulated at admission time by the RuntimeClass admission controller. If + the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. + The RuntimeClass admission controller will reject Pod create requests which have the overhead already + set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value + defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero. + More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md + type: object + preemptionPolicy: + description: |- + PreemptionPolicy is the Policy for preempting pods with lower priority. + One of Never, PreemptLowerPriority. + Defaults to PreemptLowerPriority if unset. + type: string + priority: + description: |- + The priority value. Various system components use this field to find the + priority of the pod. When Priority Admission Controller is enabled, it + prevents users from setting this field. The admission controller populates + this field from PriorityClassName. + The higher the value, the higher the priority. + format: int32 + type: integer + priorityClassName: + description: |- + If specified, indicates the pod's priority. "system-node-critical" and + "system-cluster-critical" are two special keywords which indicate the + highest priorities with the former being the highest priority. Any other + name must be defined by creating a PriorityClass object with that name. + If not specified, the pod priority will be default or zero if there is no + default. + type: string + readinessGates: + description: |- + If specified, all readiness gates will be evaluated for pod readiness. + A pod is ready when all its containers are ready AND + all conditions specified in the readiness gates have status equal to "True" + More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates + items: + description: PodReadinessGate contains the reference + to a pod condition + properties: + conditionType: + description: ConditionType refers to a condition + in the pod's condition list with matching type. + type: string + required: + - conditionType + type: object + type: array + x-kubernetes-list-type: atomic + resourceClaims: + description: |- + ResourceClaims defines which ResourceClaims must be allocated + and reserved before the Pod is allowed to start. The resources + will be made available to those containers which consume them + by name. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. + items: + description: |- + PodResourceClaim references exactly one ResourceClaim, either directly + or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim + for the pod. + + It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. + Containers that need access to the ResourceClaim reference it with this name. + properties: + name: + description: |- + Name uniquely identifies this resource claim inside the pod. + This must be a DNS_LABEL. + type: string + resourceClaimName: + description: |- + ResourceClaimName is the name of a ResourceClaim object in the same + namespace as this pod. + + Exactly one of ResourceClaimName and ResourceClaimTemplateName must + be set. + type: string + resourceClaimTemplateName: + description: |- + ResourceClaimTemplateName is the name of a ResourceClaimTemplate + object in the same namespace as this pod. + + The template will be used to create a new ResourceClaim, which will + be bound to this pod. When this pod is deleted, the ResourceClaim + will also be deleted. The pod name and resource name, along with a + generated component, will be used to form a unique name for the + ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses. + + This field is immutable and no changes will be made to the + corresponding ResourceClaim by the control plane after creating the + ResourceClaim. + + Exactly one of ResourceClaimName and ResourceClaimTemplateName must + be set. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + restartPolicy: + description: |- + Restart policy for all containers within the pod. + One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. + Default to Always. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy + type: string + runtimeClassName: + description: |- + RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used + to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. + If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an + empty definition that uses the default runtime handler. + More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class + type: string + schedulerName: + description: |- + If specified, the pod will be dispatched by specified scheduler. + If not specified, the pod will be dispatched by default scheduler. + type: string + schedulingGates: + description: |- + SchedulingGates is an opaque list of values that if specified will block scheduling the pod. + If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the + scheduler will not attempt to schedule the pod. + + SchedulingGates can only be set at pod creation time, and be removed only afterwards. + items: + description: PodSchedulingGate is associated to a + Pod to guard its scheduling. + properties: + name: + description: |- + Name of the scheduling gate. + Each scheduling gate must have a unique name field. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + securityContext: + description: |- + SecurityContext holds pod-level security attributes and common container settings. + Optional: Defaults to empty. See type description for default values of each field. + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that + applies to the container. + type: string + role: + description: Role is a SELinux role label that + applies to the container. + type: string + type: + description: Type is a SELinux type label that + applies to the container. + type: string + user: + description: User is a SELinux user label that + applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter + to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name + of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + serviceAccount: + description: |- + DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. + Deprecated: Use serviceAccountName instead. + type: string + serviceAccountName: + description: |- + ServiceAccountName is the name of the ServiceAccount to use to run this pod. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + type: string + setHostnameAsFQDN: + description: |- + If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). + In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). + In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN. + If a pod does not have FQDN, this has no effect. + Default to false. + type: boolean + shareProcessNamespace: + description: |- + Share a single process namespace between all of the containers in a pod. + When this is set containers will be able to view and signal processes from other containers + in the same pod, and the first process in each container will not be assigned PID 1. + HostPID and ShareProcessNamespace cannot both be set. + Optional: Default to false. + type: boolean + subdomain: + description: |- + If specified, the fully qualified Pod hostname will be "...svc.". + If not specified, the pod will not have a domainname at all. + type: string + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + If this value is nil, the default grace period will be used instead. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + Defaults to 30 seconds. + format: int64 + type: integer + tolerations: + description: If specified, the pod's tolerations. + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + x-kubernetes-list-type: atomic + topologySpreadConstraints: + description: |- + TopologySpreadConstraints describes how a group of pods ought to spread across topology + domains. Scheduler will schedule pods in a way which abides by the constraints. + All topologySpreadConstraints are ANDed. + items: + description: TopologySpreadConstraint specifies how + to spread matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of + label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that + the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. + MatchLabelKeys cannot be set when LabelSelector isn't set. + Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + + This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array + x-kubernetes-list-map-keys: + - topologyKey + - whenUnsatisfiable + x-kubernetes-list-type: map + volumes: + description: |- + List of volumes that can be mounted by containers belonging to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes + items: + description: Volume represents a named volume in a + pod that may be accessed by any container in the + pod. + properties: + awsElasticBlockStore: + description: |- + awsElasticBlockStore represents an AWS Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + format: int32 + type: integer + readOnly: + description: |- + readOnly value true will force the readOnly setting in VolumeMounts. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: boolean + volumeID: + description: |- + volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: string + required: + - volumeID + type: object + azureDisk: + description: azureDisk represents an Azure Data + Disk mount on the host and bind mount to the + pod. + properties: + cachingMode: + description: 'cachingMode is the Host Caching + mode: None, Read Only, Read Write.' + type: string + diskName: + description: diskName is the Name of the data + disk in the blob storage + type: string + diskURI: + description: diskURI is the URI of data disk + in the blob storage + type: string + fsType: + default: ext4 + description: |- + fsType is Filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + kind: + description: 'kind expected values are Shared: + multiple blob disks per storage account Dedicated: + single blob disk per storage account Managed: + azure managed data disk (only in managed + availability set). defaults to shared' + type: string + readOnly: + default: false + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + required: + - diskName + - diskURI + type: object + azureFile: + description: azureFile represents an Azure File + Service mount on the host and bind mount to + the pod. + properties: + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretName: + description: secretName is the name of secret + that contains Azure Storage Account Name + and Key + type: string + shareName: + description: shareName is the azure share + Name + type: string + required: + - secretName + - shareName + type: object + cephfs: + description: cephFS represents a Ceph FS mount + on the host that shares a pod's lifetime + properties: + monitors: + description: |- + monitors is Required: Monitors is a collection of Ceph monitors + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + items: + type: string + type: array + x-kubernetes-list-type: atomic + path: + description: 'path is Optional: Used as the + mounted root, rather than the full Ceph + tree, default is /' + type: string + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: boolean + secretFile: + description: |- + secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + secretRef: + description: |- + secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + user: + description: |- + user is optional: User is the rados user name, default is admin + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + required: + - monitors + type: object + cinder: + description: |- + cinder represents a cinder volume attached and mounted on kubelets host machine. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: boolean + secretRef: + description: |- + secretRef is optional: points to a secret object containing parameters used to connect + to OpenStack. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + volumeID: + description: |- + volumeID used to identify the volume in cinder. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + required: + - volumeID + type: object + configMap: + description: configMap represents a configMap + that should populate this volume + properties: + defaultMode: + description: |- + defaultMode is optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path + within a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional specify whether the + ConfigMap or its keys must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + csi: + description: csi (Container Storage Interface) + represents ephemeral storage that is handled + by certain external CSI drivers (Beta feature). + properties: + driver: + description: |- + driver is the name of the CSI driver that handles this volume. + Consult with your admin for the correct name as registered in the cluster. + type: string + fsType: + description: |- + fsType to mount. Ex. "ext4", "xfs", "ntfs". + If not provided, the empty value is passed to the associated CSI driver + which will determine the default filesystem to apply. + type: string + nodePublishSecretRef: + description: |- + nodePublishSecretRef is a reference to the secret object containing + sensitive information to pass to the CSI driver to complete the CSI + NodePublishVolume and NodeUnpublishVolume calls. + This field is optional, and may be empty if no secret is required. If the + secret object contains more than one secret, all secret references are passed. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + readOnly: + description: |- + readOnly specifies a read-only configuration for the volume. + Defaults to false (read/write). + type: boolean + volumeAttributes: + additionalProperties: + type: string + description: |- + volumeAttributes stores driver-specific properties that are passed to the CSI + driver. Consult your driver's documentation for supported values. + type: object + required: + - driver + type: object + downwardAPI: + description: downwardAPI represents downward API + about the pod that should populate this volume + properties: + defaultMode: + description: |- + Optional: mode bits to use on created files by default. Must be a + Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: Items is a list of downward API + volume file + items: + description: DownwardAPIVolumeFile represents + information to create the file containing + the pod field + properties: + fieldRef: + description: 'Required: Selects a field + of the pod: only annotations, labels, + name, namespace and uid are supported.' + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the + relative path name of the file to + be created. Must not be absolute or + contain the ''..'' path. Must be utf-8 + encoded. The first item of the relative + path must not start with ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env + vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + emptyDir: + description: |- + emptyDir represents a temporary directory that shares a pod's lifetime. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + properties: + medium: + description: |- + medium represents what type of storage medium should back this directory. + The default is "" which means to use the node's default medium. + Must be an empty string (default) or Memory. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + type: string + sizeLimit: + anyOf: + - type: integer + - type: string + description: |- + sizeLimit is the total amount of local storage required for this EmptyDir volume. + The size limit is also applicable for memory medium. + The maximum usage on memory medium EmptyDir would be the minimum value between + the SizeLimit specified here and the sum of memory limits of all containers in a pod. + The default is nil which means that the limit is undefined. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + ephemeral: + description: |- + ephemeral represents a volume that is handled by a cluster storage driver. + The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, + and deleted when the pod is removed. + + Use this if: + a) the volume is only needed while the pod runs, + b) features of normal volumes like restoring from snapshot or capacity + tracking are needed, + c) the storage driver is specified through a storage class, and + d) the storage driver supports dynamic volume provisioning through + a PersistentVolumeClaim (see EphemeralVolumeSource for more + information on the connection between this volume type + and PersistentVolumeClaim). + + Use PersistentVolumeClaim or one of the vendor-specific + APIs for volumes that persist for longer than the lifecycle + of an individual pod. + + Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to + be used that way - see the documentation of the driver for + more information. + + A pod can use both types of ephemeral volumes and + persistent volumes at the same time. + properties: + volumeClaimTemplate: + description: |- + Will be used to create a stand-alone PVC to provision the volume. + The pod in which this EphemeralVolumeSource is embedded will be the + owner of the PVC, i.e. the PVC will be deleted together with the + pod. The name of the PVC will be `-` where + `` is the name from the `PodSpec.Volumes` array + entry. Pod validation will reject the pod if the concatenated name + is not valid for a PVC (for example, too long). + + An existing PVC with that name that is not owned by the pod + will *not* be used for the pod to avoid using an unrelated + volume by mistake. Starting the pod is then blocked until + the unrelated PVC is removed. If such a pre-created PVC is + meant to be used by the pod, the PVC has to updated with an + owner reference to the pod once the pod exists. Normally + this should not be necessary, but it may be useful when + manually reconstructing a broken cluster. + + This field is read-only and no changes will be made by Kubernetes + to the PVC after it has been created. + + Required, must not be nil. + properties: + metadata: + description: |- + May contain labels and annotations that will be copied into the PVC + when creating it. No other fields are allowed and will be rejected during + validation. + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: |- + The specification for the PersistentVolumeClaim. The entire content is + copied unchanged into the PVC that gets created from this + template. The same fields as in a PersistentVolumeClaim + are also valid here. + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type + of resource being referenced + type: string + name: + description: Name is the name + of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type + of resource being referenced + type: string + name: + description: Name is the name + of resource being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query + over volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding + reference to the PersistentVolume + backing this claim. + type: string + type: object + required: + - spec + type: object + type: object + fc: + description: fc represents a Fibre Channel resource + that is attached to a kubelet's host machine + and then exposed to the pod. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + lun: + description: 'lun is Optional: FC target lun + number' + format: int32 + type: integer + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + targetWWNs: + description: 'targetWWNs is Optional: FC target + worldwide names (WWNs)' + items: + type: string + type: array + x-kubernetes-list-type: atomic + wwids: + description: |- + wwids Optional: FC volume world wide identifiers (wwids) + Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + flexVolume: + description: |- + flexVolume represents a generic volume resource that is + provisioned/attached using an exec based plugin. + properties: + driver: + description: driver is the name of the driver + to use for this volume. + type: string + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. + type: string + options: + additionalProperties: + type: string + description: 'options is Optional: this field + holds extra command options if any.' + type: object + readOnly: + description: |- + readOnly is Optional: defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef is Optional: secretRef is reference to the secret object containing + sensitive information to pass to the plugin scripts. This may be + empty if no secret object is specified. If the secret object + contains more than one secret, all secrets are passed to the plugin + scripts. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + required: + - driver + type: object + flocker: + description: flocker represents a Flocker volume + attached to a kubelet's host machine. This depends + on the Flocker control service being running + properties: + datasetName: + description: |- + datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker + should be considered as deprecated + type: string + datasetUUID: + description: datasetUUID is the UUID of the + dataset. This is unique identifier of a + Flocker dataset + type: string + type: object + gcePersistentDisk: + description: |- + gcePersistentDisk represents a GCE Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + properties: + fsType: + description: |- + fsType is filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + format: int32 + type: integer + pdName: + description: |- + pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: boolean + required: + - pdName + type: object + gitRepo: + description: |- + gitRepo represents a git repository at a particular revision. + DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an + EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir + into the Pod's container. + properties: + directory: + description: |- + directory is the target directory name. + Must not contain or start with '..'. If '.' is supplied, the volume directory will be the + git repository. Otherwise, if specified, the volume will contain the git repository in + the subdirectory with the given name. + type: string + repository: + description: repository is the URL + type: string + revision: + description: revision is the commit hash for + the specified revision. + type: string + required: + - repository + type: object + glusterfs: + description: |- + glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/glusterfs/README.md + properties: + endpoints: + description: |- + endpoints is the endpoint name that details Glusterfs topology. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + path: + description: |- + path is the Glusterfs volume path. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + readOnly: + description: |- + readOnly here will force the Glusterfs volume to be mounted with read-only permissions. + Defaults to false. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: boolean + required: + - endpoints + - path + type: object + hostPath: + description: |- + hostPath represents a pre-existing file or directory on the host + machine that is directly exposed to the container. This is generally + used for system agents or other privileged things that are allowed + to see the host machine. Most containers will NOT need this. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + properties: + path: + description: |- + path of the directory on the host. + If the path is a symlink, it will follow the link to the real path. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + type: + description: |- + type for HostPath Volume + Defaults to "" + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + required: + - path + type: object + image: + description: |- + image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. + The volume is resolved at pod startup depending on which PullPolicy value is provided: + + - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. + - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. + - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails. + + The volume gets re-resolved if the pod gets deleted and recreated, which means that new remote content will become available on pod recreation. + A failure to resolve or pull the image during pod startup will block containers from starting and may add significant latency. Failures will be retried using normal volume backoff and will be reported on the pod reason and message. + The types of objects that may be mounted by this volume are defined by the container runtime implementation on a host machine and at minimum must include all valid types supported by the container image field. + The OCI object gets mounted in a single directory (spec.containers[*].volumeMounts.mountPath) by merging the manifest layers in the same way as for container images. + The volume will be mounted read-only (ro) and non-executable files (noexec). + Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath). + The field spec.securityContext.fsGroupChangePolicy has no effect on this volume type. + properties: + pullPolicy: + description: |- + Policy for pulling OCI objects. Possible values are: + Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. + Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. + IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + type: string + reference: + description: |- + Required: Image or artifact reference to be used. + Behaves in the same way as pod.spec.containers[*].image. + Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + type: object + iscsi: + description: |- + iscsi represents an ISCSI Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://examples.k8s.io/volumes/iscsi/README.md + properties: + chapAuthDiscovery: + description: chapAuthDiscovery defines whether + support iSCSI Discovery CHAP authentication + type: boolean + chapAuthSession: + description: chapAuthSession defines whether + support iSCSI Session CHAP authentication + type: boolean + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi + type: string + initiatorName: + description: |- + initiatorName is the custom iSCSI Initiator Name. + If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface + : will be created for the connection. + type: string + iqn: + description: iqn is the target iSCSI Qualified + Name. + type: string + iscsiInterface: + default: default + description: |- + iscsiInterface is the interface Name that uses an iSCSI transport. + Defaults to 'default' (tcp). + type: string + lun: + description: lun represents iSCSI Target Lun + number. + format: int32 + type: integer + portals: + description: |- + portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + items: + type: string + type: array + x-kubernetes-list-type: atomic + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + type: boolean + secretRef: + description: secretRef is the CHAP Secret + for iSCSI target and initiator authentication + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + targetPortal: + description: |- + targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + type: string + required: + - iqn + - lun + - targetPortal + type: object + name: + description: |- + name of the volume. + Must be a DNS_LABEL and unique within the pod. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + nfs: + description: |- + nfs represents an NFS mount on the host that shares a pod's lifetime + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + properties: + path: + description: |- + path that is exported by the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + readOnly: + description: |- + readOnly here will force the NFS export to be mounted with read-only permissions. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: boolean + server: + description: |- + server is the hostname or IP address of the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + required: + - path + - server + type: object + persistentVolumeClaim: + description: |- + persistentVolumeClaimVolumeSource represents a reference to a + PersistentVolumeClaim in the same namespace. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + properties: + claimName: + description: |- + claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + type: string + readOnly: + description: |- + readOnly Will force the ReadOnly setting in VolumeMounts. + Default false. + type: boolean + required: + - claimName + type: object + photonPersistentDisk: + description: photonPersistentDisk represents a + PhotonController persistent disk attached and + mounted on kubelets host machine + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + pdID: + description: pdID is the ID that identifies + Photon Controller persistent disk + type: string + required: + - pdID + type: object + portworxVolume: + description: portworxVolume represents a portworx + volume attached and mounted on kubelets host + machine + properties: + fsType: + description: |- + fSType represents the filesystem type to mount + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + volumeID: + description: volumeID uniquely identifies + a Portworx volume + type: string + required: + - volumeID + type: object + projected: + description: projected items for all in one resources + secrets, configmaps, and downward API + properties: + defaultMode: + description: |- + defaultMode are the mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + sources: + description: |- + sources is the list of volume projections. Each entry in this list + handles one source. + items: + description: |- + Projection that may be projected along with other supported volume types. + Exactly one of these fields must be set. + properties: + clusterTrustBundle: + description: |- + ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field + of ClusterTrustBundle objects in an auto-updating file. + + Alpha, gated by the ClusterTrustBundleProjection feature gate. + + ClusterTrustBundle objects can either be selected by name, or by the + combination of signer name and a label selector. + + Kubelet performs aggressive normalization of the PEM contents written + into the pod filesystem. Esoteric PEM features such as inter-block + comments and block headers are stripped. Certificates are deduplicated. + The ordering of certificates within the file is arbitrary, and Kubelet + may change the order over time. + properties: + labelSelector: + description: |- + Select all ClusterTrustBundles that match this label selector. Only has + effect if signerName is set. Mutually-exclusive with name. If unset, + interpreted as "match nothing". If set but empty, interpreted as "match + everything". + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + name: + description: |- + Select a single ClusterTrustBundle by object name. Mutually-exclusive + with signerName and labelSelector. + type: string + optional: + description: |- + If true, don't block pod startup if the referenced ClusterTrustBundle(s) + aren't available. If using name, then the named ClusterTrustBundle is + allowed not to exist. If using signerName, then the combination of + signerName and labelSelector is allowed to match zero + ClusterTrustBundles. + type: boolean + path: + description: Relative path from + the volume root to write the bundle. + type: string + signerName: + description: |- + Select all ClusterTrustBundles that match this signer name. + Mutually-exclusive with name. The contents of all selected + ClusterTrustBundles will be unified and deduplicated. + type: string + required: + - path + type: object + configMap: + description: configMap information about + the configMap data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key + to a path within a volume. + properties: + key: + description: key is the key + to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional specify whether + the ConfigMap or its keys must + be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + downwardAPI: + description: downwardAPI information + about the downwardAPI data to project + properties: + items: + description: Items is a list of + DownwardAPIVolume file + items: + description: DownwardAPIVolumeFile + represents information to create + the file containing the pod + field + properties: + fieldRef: + description: 'Required: Selects + a field of the pod: only + annotations, labels, name, + namespace and uid are supported.' + properties: + apiVersion: + description: Version of + the schema the FieldPath + is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the + field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path + is the relative path name + of the file to be created. + Must not be absolute or + contain the ''..'' path. + Must be utf-8 encoded. The + first item of the relative + path must not start with + ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container + name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies + the output format of + the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: + resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + secret: + description: secret information about + the secret data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key + to a path within a volume. + properties: + key: + description: key is the key + to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional field specify + whether the Secret or its key + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + serviceAccountToken: + description: serviceAccountToken is + information about the serviceAccountToken + data to project + properties: + audience: + description: |- + audience is the intended audience of the token. A recipient of a token + must identify itself with an identifier specified in the audience of the + token, and otherwise should reject the token. The audience defaults to the + identifier of the apiserver. + type: string + expirationSeconds: + description: |- + expirationSeconds is the requested duration of validity of the service + account token. As the token approaches expiration, the kubelet volume + plugin will proactively rotate the service account token. The kubelet will + start trying to rotate the token if the token is older than 80 percent of + its time to live or if the token is older than 24 hours.Defaults to 1 hour + and must be at least 10 minutes. + format: int64 + type: integer + path: + description: |- + path is the path relative to the mount point of the file to project the + token into. + type: string + required: + - path + type: object + type: object + type: array + x-kubernetes-list-type: atomic + type: object + quobyte: + description: quobyte represents a Quobyte mount + on the host that shares a pod's lifetime + properties: + group: + description: |- + group to map volume access to + Default is no group + type: string + readOnly: + description: |- + readOnly here will force the Quobyte volume to be mounted with read-only permissions. + Defaults to false. + type: boolean + registry: + description: |- + registry represents a single or multiple Quobyte Registry services + specified as a string as host:port pair (multiple entries are separated with commas) + which acts as the central registry for volumes + type: string + tenant: + description: |- + tenant owning the given Quobyte volume in the Backend + Used with dynamically provisioned Quobyte volumes, value is set by the plugin + type: string + user: + description: |- + user to map volume access to + Defaults to serivceaccount user + type: string + volume: + description: volume is a string that references + an already created Quobyte volume by name. + type: string + required: + - registry + - volume + type: object + rbd: + description: |- + rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/rbd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd + type: string + image: + description: |- + image is the rados image name. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + keyring: + default: /etc/ceph/keyring + description: |- + keyring is the path to key ring for RBDUser. + Default is /etc/ceph/keyring. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + monitors: + description: |- + monitors is a collection of Ceph monitors. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + items: + type: string + type: array + x-kubernetes-list-type: atomic + pool: + default: rbd + description: |- + pool is the rados pool name. + Default is rbd. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: boolean + secretRef: + description: |- + secretRef is name of the authentication secret for RBDUser. If provided + overrides keyring. + Default is nil. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + user: + default: admin + description: |- + user is the rados user name. + Default is admin. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + required: + - image + - monitors + type: object + scaleIO: + description: scaleIO represents a ScaleIO persistent + volume attached and mounted on Kubernetes nodes. + properties: + fsType: + default: xfs + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". + Default is "xfs". + type: string + gateway: + description: gateway is the host address of + the ScaleIO API Gateway. + type: string + protectionDomain: + description: protectionDomain is the name + of the ScaleIO Protection Domain for the + configured storage. + type: string + readOnly: + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef references to the secret for ScaleIO user and other + sensitive information. If this is not provided, Login operation will fail. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + sslEnabled: + description: sslEnabled Flag enable/disable + SSL communication with Gateway, default + false + type: boolean + storageMode: + default: ThinProvisioned + description: |- + storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. + Default is ThinProvisioned. + type: string + storagePool: + description: storagePool is the ScaleIO Storage + Pool associated with the protection domain. + type: string + system: + description: system is the name of the storage + system as configured in ScaleIO. + type: string + volumeName: + description: |- + volumeName is the name of a volume already created in the ScaleIO system + that is associated with this volume source. + type: string + required: + - gateway + - secretRef + - system + type: object + secret: + description: |- + secret represents a secret that should populate this volume. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + properties: + defaultMode: + description: |- + defaultMode is Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values + for mode bits. Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items If unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path + within a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + optional: + description: optional field specify whether + the Secret or its keys must be defined + type: boolean + secretName: + description: |- + secretName is the name of the secret in the pod's namespace to use. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + type: string + type: object + storageos: + description: storageOS represents a StorageOS + volume attached and mounted on Kubernetes nodes. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef specifies the secret to use for obtaining the StorageOS API + credentials. If not specified, default values will be attempted. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + volumeName: + description: |- + volumeName is the human-readable name of the StorageOS volume. Volume + names are only unique within a namespace. + type: string + volumeNamespace: + description: |- + volumeNamespace specifies the scope of the volume within StorageOS. If no + namespace is specified then the Pod's namespace will be used. This allows the + Kubernetes name scoping to be mirrored within StorageOS for tighter integration. + Set VolumeName to any name to override the default behaviour. + Set to "default" if you are not using namespaces within StorageOS. + Namespaces that do not pre-exist within StorageOS will be created. + type: string + type: object + vsphereVolume: + description: vsphereVolume represents a vSphere + volume attached and mounted on kubelets host + machine + properties: + fsType: + description: |- + fsType is filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + storagePolicyID: + description: storagePolicyID is the storage + Policy Based Management (SPBM) profile ID + associated with the StoragePolicyName. + type: string + storagePolicyName: + description: storagePolicyName is the storage + Policy Based Management (SPBM) profile name. + type: string + volumePath: + description: volumePath is the path that identifies + vSphere volume vmdk + type: string + required: + - volumePath + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - containers + type: object + type: object + topologyRequest: + description: topologyRequest defines the topology request for + the PodSet. + properties: + preferred: + description: |- + preferred indicates the topology level preferred by the PodSet, as + indicated by the `kueue.x-k8s.io/podset-preferred-topology` PodSet + annotation. + type: string + required: + description: |- + required indicates the topology level required by the PodSet, as + indicated by the `kueue.x-k8s.io/podset-required-topology` PodSet + annotation. + type: string + type: object + required: + - count + - template + type: object + x-kubernetes-validations: + - message: minCount should be positive and less or equal to count + rule: 'has(self.minCount) ? self.minCount <= self.count : true' + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + priority: + description: |- + Priority determines the order of access to the resources managed by the + ClusterQueue where the workload is queued. + The priority value is populated from PriorityClassName. + The higher the value, the higher the priority. + If priorityClassName is specified, priority must not be null. + format: int32 + type: integer + priorityClassName: + description: |- + If specified, indicates the workload's priority. + "system-node-critical" and "system-cluster-critical" are two special + keywords which indicate the highest priorities with the former being + the highest priority. Any other name must be defined by creating a + PriorityClass object with that name. If not specified, the workload + priority will be default or zero if there is no default. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + priorityClassSource: + default: "" + description: |- + priorityClassSource determines whether the priorityClass field refers to a pod PriorityClass or kueue.x-k8s.io/workloadpriorityclass. + Workload's PriorityClass can accept the name of a pod priorityClass or a workloadPriorityClass. + When using pod PriorityClass, a priorityClassSource field has the scheduling.k8s.io/priorityclass value. + enum: + - kueue.x-k8s.io/workloadpriorityclass + - scheduling.k8s.io/priorityclass + - "" + type: string + queueName: + description: |- + queueName is the name of the LocalQueue the Workload is associated with. + queueName cannot be changed while .status.admission is not null. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - podSets + type: object + x-kubernetes-validations: + - message: priority should not be nil when priorityClassName is set + rule: 'has(self.priorityClassName) ? has(self.priority) : true' + status: + description: WorkloadStatus defines the observed state of Workload + properties: + accumulatedPastExexcutionTimeSeconds: + description: |- + accumulatedPastExexcutionTimeSeconds holds the total time, in seconds, the workload spent + in Admitted state, in the previous `Admit` - `Evict` cycles. + format: int32 + type: integer + admission: + description: |- + admission holds the parameters of the admission of the workload by a + ClusterQueue. admission can be set back to null, but its fields cannot be + changed once set. + properties: + clusterQueue: + description: clusterQueue is the name of the ClusterQueue that + admitted this workload. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + podSetAssignments: + description: PodSetAssignments hold the admission results for + each of the .spec.podSets entries. + items: + properties: + count: + description: |- + count is the number of pods taken into account at admission time. + This field will not change in case of quota reclaim. + Value could be missing for Workloads created before this field was added, + in that case spec.podSets[*].count value will be used. + format: int32 + minimum: 0 + type: integer + flavors: + additionalProperties: + description: ResourceFlavorReference is the name of the + ResourceFlavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + description: Flavors are the flavors assigned to the workload + for each resource. + type: object + name: + default: main + description: Name is the name of the podSet. It should match + one of the names in .spec.podSets. + maxLength: 63 + pattern: ^(?i)[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + resourceUsage: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + resourceUsage keeps track of the total resources all the pods in the podset need to run. + + Beside what is provided in podSet's specs, this calculation takes into account + the LimitRange defaults and RuntimeClass overheads at the moment of admission. + This field will not change in case of quota reclaim. + type: object + topologyAssignment: + description: |- + topologyAssignment indicates the topology assignment divided into + topology domains corresponding to the lowest level of the topology. + The assignment specifies the number of Pods to be scheduled per topology + domain and specifies the node selectors for each topology domain, in the + following way: the node selector keys are specified by the levels field + (same for all domains), and the corresponding node selector value is + specified by the domains.values subfield. + + Example: + + topologyAssignment: + levels: + - cloud.provider.com/topology-block + - cloud.provider.com/topology-rack + domains: + - values: [block-1, rack-1] + count: 4 + - values: [block-1, rack-2] + count: 2 + + Here: + - 4 Pods are to be scheduled on nodes matching the node selector: + cloud.provider.com/topology-block: block-1 + cloud.provider.com/topology-rack: rack-1 + - 2 Pods are to be scheduled on nodes matching the node selector: + cloud.provider.com/topology-block: block-1 + cloud.provider.com/topology-rack: rack-2 + properties: + domains: + description: |- + domains is a list of topology assignments split by topology domains at + the lowest level of the topology. + items: + properties: + count: + description: |- + count indicates the number of Pods to be scheduled in the topology + domain indicated by the values field. + format: int32 + minimum: 1 + type: integer + values: + description: |- + values is an ordered list of node selector values describing a topology + domain. The values correspond to the consecutive topology levels, from + the highest to the lowest. + items: + type: string + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + required: + - count + - values + type: object + type: array + levels: + description: |- + levels is an ordered list of keys denoting the levels of the assigned + topology (i.e. node label keys), from the highest to the lowest level of + the topology. + items: + type: string + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + required: + - domains + - levels + type: object + required: + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - clusterQueue + - podSetAssignments + type: object + admissionChecks: + description: admissionChecks list all the admission checks required + by the workload and the current status + items: + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + name: + description: name identifies the admission check. + maxLength: 316 + type: string + podSetUpdates: + items: + description: |- + PodSetUpdate contains a list of pod set modifications suggested by AdmissionChecks. + The modifications should be additive only - modifications of already existing keys + or having the same key provided by multiple AdmissionChecks is not allowed and will + result in failure during workload admission. + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + name: + description: Name of the PodSet to modify. Should match + to one of the Workload's PodSets. + type: string + nodeSelector: + additionalProperties: + type: string + type: object + tolerations: + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + maxItems: 8 + type: array + x-kubernetes-validations: + - message: operator must be Exists when 'key' is empty, + which means 'match all values and all keys' + rule: 'self.all(x, !has(x.key) ? x.operator == ''Exists'' + : true)' + - message: effect must be 'NoExecute' when 'tolerationSeconds' + is set + rule: 'self.all(x, has(x.tolerationSeconds) ? x.effect + == ''NoExecute'' : true)' + - message: 'supported toleration values: ''Equal''(default), + ''Exists''' + rule: self.all(x, !has(x.operator) || x.operator in + ['Equal', 'Exists']) + - message: a value must be empty when 'operator' is 'Exists' + rule: 'self.all(x, has(x.operator) && x.operator == + ''Exists'' ? !has(x.value) : true)' + - message: 'supported taint effect values: ''NoSchedule'', + ''PreferNoSchedule'', ''NoExecute''' + rule: self.all(x, !has(x.effect) || x.effect in ['NoSchedule', + 'PreferNoSchedule', 'NoExecute']) + required: + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + state: + description: state of the admissionCheck, one of Pending, Ready, + Retry, Rejected + enum: + - Pending + - Ready + - Retry + - Rejected + type: string + required: + - lastTransitionTime + - message + - name + - state + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + conditions: + description: |- + conditions hold the latest available observations of the Workload + current state. + + The type of the condition could be: + + - Admitted: the Workload was admitted through a ClusterQueue. + - Finished: the associated workload finished running (failed or succeeded). + - PodsReady: at least `.spec.podSets[*].count` Pods are ready or have + succeeded. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + reclaimablePods: + description: |- + reclaimablePods keeps track of the number pods within a podset for which + the resource reservation is no longer needed. + items: + properties: + count: + description: count is the number of pods for which the requested + resources are no longer needed. + format: int32 + minimum: 0 + type: integer + name: + description: name is the PodSet name. + type: string + required: + - count + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + requeueState: + description: |- + requeueState holds the re-queue state + when a workload meets Eviction with PodsReadyTimeout reason. + properties: + count: + description: |- + count records the number of times a workload has been re-queued + When a deactivated (`.spec.activate`=`false`) workload is reactivated (`.spec.activate`=`true`), + this count would be reset to null. + format: int32 + minimum: 0 + type: integer + requeueAt: + description: |- + requeueAt records the time when a workload will be re-queued. + When a deactivated (`.spec.activate`=`false`) workload is reactivated (`.spec.activate`=`true`), + this time would be reset to null. + format: date-time + type: string + type: object + resourceRequests: + description: |- + resourceRequests provides a detailed view of the resources that were + requested by a non-admitted workload when it was considered for admission. + If admission is non-null, resourceRequests will be empty because + admission.resourceUsage contains the detailed information. + items: + properties: + name: + default: main + description: name is the name of the podSet. It should match + one of the names in .spec.podSets. + maxLength: 63 + pattern: ^(?i)[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + resources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + resources is the total resources all the pods in the podset need to run. + + Beside what is provided in podSet's specs, this value also takes into account + the LimitRange defaults and RuntimeClass overheads at the moment of consideration + and the application of resource.excludeResourcePrefixes and resource.transformations. + type: object + required: + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + type: object + x-kubernetes-validations: + - message: podSetAssignments must have the same number of podSets as the spec + rule: 'has(self.status) && has(self.status.conditions) && self.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'') && has(self.status.admission) + ? size(self.spec.podSets) == size(self.status.admission.podSetAssignments) + : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'')) ? (oldSelf.spec.priorityClassSource + == self.spec.priorityClassSource) : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'') && has(oldSelf.spec.priorityClassName) + && has(self.spec.priorityClassName)) ? (oldSelf.spec.priorityClassName + == self.spec.priorityClassName) : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'')) && (has(self.status) + && has(self.status.conditions) && self.status.conditions.exists(c, c.type + == ''QuotaReserved'' && c.status == ''True'')) && has(oldSelf.spec.queueName) + && has(self.spec.queueName) ? oldSelf.spec.queueName == self.spec.queueName + : true' + - message: maximumExecutionTimeSeconds is immutable while admitted + rule: ((has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == 'Admitted' && c.status == 'True')) && (has(self.status) && has(self.status.conditions) + && self.status.conditions.exists(c, c.type == 'Admitted' && c.status == + 'True')))?((has(oldSelf.spec.maximumExecutionTimeSeconds)?oldSelf.spec.maximumExecutionTimeSeconds:0) + == (has(self.spec.maximumExecutionTimeSeconds)?self.spec.maximumExecutionTimeSeconds:0)):true + served: true + storage: true + subresources: + status: {} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-leader-election-role + namespace: kueue-system +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +--- +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.kueue.x-k8s.io/batch-admin: "true" +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-batch-admin-role +--- +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.kueue.x-k8s.io/batch-user: "true" +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-batch-user-role +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-clusterqueue-editor-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - clusterqueues + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - clusterqueues/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-clusterqueue-viewer-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - clusterqueues + verbs: + - get + - list + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - clusterqueues/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-job-editor-role +rules: +- apiGroups: + - batch + resources: + - jobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-job-viewer-role +rules: +- apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-jobset-editor-role +rules: +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-jobset-viewer-role +rules: +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets + verbs: + - get + - list + - watch +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-localqueue-editor-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - localqueues + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - localqueues/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-localqueue-viewer-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - localqueues + verbs: + - get + - list + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - localqueues/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-manager-role +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - watch +- apiGroups: + - "" + resources: + - limitranges + - namespaces + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods/finalizers + verbs: + - get + - update +- apiGroups: + - "" + resources: + - pods/status + verbs: + - get + - patch +- apiGroups: + - "" + resources: + - podtemplates + verbs: + - create + - delete + - get + - list + - update + - watch +- apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - update + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - get + - list + - update + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - validatingadmissionpolicies + - validatingadmissionpolicybindings + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling.x-k8s.io + resources: + - provisioningrequests + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - autoscaling.x-k8s.io + resources: + - provisioningrequests/status + verbs: + - get +- apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs/finalizers + - jobs/status + verbs: + - get + - patch + - update +- apiGroups: + - flowcontrol.apiserver.k8s.io + resources: + - flowschemas + - prioritylevelconfigurations + verbs: + - list + - watch +- apiGroups: + - flowcontrol.apiserver.k8s.io + resources: + - flowschemas/status + verbs: + - patch +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/finalizers + verbs: + - get + - update +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/status + verbs: + - get + - patch + - update +- apiGroups: + - kubeflow.org + resources: + - mpijobs + - mxjobs + - paddlejobs + - pytorchjobs + - tfjobs + - xgboostjobs + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - mpijobs/finalizers + - mxjobs/finalizers + - mxjobs/status + - paddlejobs/finalizers + - pytorchjobs/finalizers + - tfjobs/finalizers + - xgboostjobs/finalizers + verbs: + - get + - update +- apiGroups: + - kubeflow.org + resources: + - mpijobs/status + - paddlejobs/status + - pytorchjobs/status + - tfjobs/status + - xgboostjobs/status + verbs: + - get + - patch + - update +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks + - clusterqueues + - cohorts + - localqueues + - workloads + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks/finalizers + - clusterqueues/finalizers + - localqueues/finalizers + - resourceflavors/finalizers + - workloads/finalizers + verbs: + - update +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks/status + - clusterqueues/status + - localqueues/status + - multikueueclusters/status + - workloads/status + verbs: + - get + - patch + - update +- apiGroups: + - kueue.x-k8s.io + resources: + - multikueueclusters + - multikueueconfigs + - provisioningrequestconfigs + - topologies + - workloadpriorityclasses + verbs: + - get + - list + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - resourceflavors + verbs: + - delete + - get + - list + - update + - watch +- apiGroups: + - node.k8s.io + resources: + - runtimeclasses + verbs: + - get + - list + - watch +- apiGroups: + - ray.io + resources: + - rayclusters + - rayjobs + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayclusters/finalizers + - rayclusters/status + - rayjobs/finalizers + - rayjobs/status + verbs: + - get + - update +- apiGroups: + - scheduling.k8s.io + resources: + - priorityclasses + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-metrics-reader +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-mpijob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - mpijobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - mpijobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-mpijob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - mpijobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - mpijobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-mxjob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - mxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - mxjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-mxjob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - mxjobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - mxjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-paddlejob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - paddlejobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - paddlejobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-paddlejob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - paddlejobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - paddlejobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-pending-workloads-cq-viewer-role +rules: +- apiGroups: + - visibility.kueue.x-k8s.io + resources: + - clusterqueues/pendingworkloads + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-pending-workloads-lq-viewer-role +rules: +- apiGroups: + - visibility.kueue.x-k8s.io + resources: + - localqueues/pendingworkloads + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-proxy-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-pytorchjob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-pytorchjob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-raycluster-editor-role +rules: +- apiGroups: + - ray.io + resources: + - rayclusters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayclusters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-raycluster-viewer-role +rules: +- apiGroups: + - ray.io + resources: + - rayclusters + verbs: + - get + - list + - watch +- apiGroups: + - ray.io + resources: + - rayclusters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-rayjob-editor-role +rules: +- apiGroups: + - ray.io + resources: + - rayjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-rayjob-viewer-role +rules: +- apiGroups: + - ray.io + resources: + - rayjobs + verbs: + - get + - list + - watch +- apiGroups: + - ray.io + resources: + - rayjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-resourceflavor-editor-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - resourceflavors + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-resourceflavor-viewer-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - resourceflavors + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-tfjob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - tfjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - tfjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-tfjob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - tfjobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - tfjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-workload-editor-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-workload-viewer-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads + verbs: + - get + - list + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-xgboostjob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - xgboostjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - xgboostjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-xgboostjob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - xgboostjobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - xgboostjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-visibility-server-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: +- kind: ServiceAccount + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-leader-election-rolebinding + namespace: kueue-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kueue-leader-election-role +subjects: +- kind: ServiceAccount + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kueue-manager-role +subjects: +- kind: ServiceAccount + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-proxy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kueue-proxy-role +subjects: +- kind: ServiceAccount + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: v1 +data: + controller_manager_config.yaml: | + apiVersion: config.kueue.x-k8s.io/v1beta1 + kind: Configuration + health: + healthProbeBindAddress: :8081 + metrics: + bindAddress: :8080 + # enableClusterQueueResources: true + webhook: + port: 9443 + leaderElection: + leaderElect: true + resourceName: c1f6bfd2.kueue.x-k8s.io + controller: + groupKindConcurrency: + Job.batch: 5 + Pod: 5 + Workload.kueue.x-k8s.io: 5 + LocalQueue.kueue.x-k8s.io: 1 + Cohort.kueue.x-k8s.io: 1 + ClusterQueue.kueue.x-k8s.io: 1 + ResourceFlavor.kueue.x-k8s.io: 1 + clientConnection: + qps: 50 + burst: 100 + #pprofBindAddress: :8083 + #waitForPodsReady: + # enable: false + # timeout: 5m + # blockAdmission: false + # requeuingStrategy: + # timestamp: Eviction + # backoffLimitCount: null # null indicates infinite requeuing + # backoffBaseSeconds: 60 + # backoffMaxSeconds: 3600 + #manageJobsWithoutQueueName: true + #internalCertManagement: + # enable: false + # webhookServiceName: "" + # webhookSecretName: "" + integrations: + frameworks: + - "batch/job" + - "kubeflow.org/mpijob" + - "ray.io/rayjob" + - "ray.io/raycluster" + - "jobset.x-k8s.io/jobset" + - "kubeflow.org/mxjob" + - "kubeflow.org/paddlejob" + - "kubeflow.org/pytorchjob" + - "kubeflow.org/tfjob" + - "kubeflow.org/xgboostjob" + # - "pod" + # - "deployment" # requires enabling pod integration + # - "statefulset" # requires enabling pod integration + # externalFrameworks: + # - "Foo.v1.example.com" + # podOptions: + # namespaceSelector: + # matchExpressions: + # - key: kubernetes.io/metadata.name + # operator: NotIn + # values: [ kube-system, kueue-system ] + #fairSharing: + # enable: true + # preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare] + #resources: + # excludeResourcePrefixes: [] + # transformations: + # - input: nvidia.com/mig-4g.5gb + # strategy: Replace | Retain + # outputs: + # example.com/accelerator-memory: 5Gi + # example.com/accelerator-gpc: 4 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-manager-config + namespace: kueue-system +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-webhook-server-cert + namespace: kueue-system +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-controller-manager-metrics-service + namespace: kueue-system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: https + selector: + control-plane: controller-manager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-visibility-server + namespace: kueue-system +spec: + ports: + - name: https + port: 443 + protocol: TCP + targetPort: 8082 + selector: + control-plane: controller-manager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-webhook-service + namespace: kueue-system +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 9443 + selector: + control-plane: controller-manager +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-controller-manager + namespace: kueue-system +spec: + replicas: 1 + selector: + matchLabels: + control-plane: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + spec: + containers: + - args: + - --config=/controller_manager_config.yaml + - --zap-log-level=5 + - --feature-gates=TopologyAwareScheduling=true + command: + - /manager + image: registry.k8s.io/kueue/kueue:v0.9.0 + imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + ports: + - containerPort: 8082 + name: visibility + protocol: TCP + - containerPort: 9443 + name: webhook-server + protocol: TCP + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + securityContext: + allowPrivilegeEscalation: false + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + - mountPath: /controller_manager_config.yaml + name: manager-config + subPath: controller_manager_config.yaml + - args: + - --secure-listen-address=0.0.0.0:8443 + - --upstream=http://127.0.0.1:8080/ + - --logtostderr=true + - --v=10 + image: gcr.io/kubebuilder/kube-rbac-proxy:v0.8.0 + name: kube-rbac-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + securityContext: + runAsNonRoot: true + serviceAccountName: kueue-controller-manager + terminationGracePeriodSeconds: 10 + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: kueue-webhook-server-cert + - configMap: + name: kueue-manager-config + name: manager-config + tolerations: + - effect: NoSchedule + key: components.gke.io/gke-managed-components + operator: Equal + value: "true" +--- +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: v1alpha1.visibility.kueue.x-k8s.io +spec: + group: visibility.kueue.x-k8s.io + groupPriorityMinimum: 100 + insecureSkipTLSVerify: true + service: + name: kueue-visibility-server + namespace: kueue-system + version: v1alpha1 + versionPriority: 100 +--- +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: v1beta1.visibility.kueue.x-k8s.io +spec: + group: visibility.kueue.x-k8s.io + groupPriorityMinimum: 100 + insecureSkipTLSVerify: true + service: + name: kueue-visibility-server + namespace: kueue-system + version: v1beta1 + versionPriority: 100 +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-mutating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate--v1-pod + failurePolicy: Fail + name: mpod.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kueue-system + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-apps-v1-deployment + failurePolicy: Fail + name: mdeployment.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kueue-system + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + resources: + - deployments + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-batch-v1-job + failurePolicy: Fail + name: mjob.kb.io + rules: + - apiGroups: + - batch + apiVersions: + - v1 + operations: + - CREATE + resources: + - jobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-jobset-x-k8s-io-v1alpha2-jobset + failurePolicy: Fail + name: mjobset.kb.io + rules: + - apiGroups: + - jobset.x-k8s.io + apiVersions: + - v1alpha2 + operations: + - CREATE + resources: + - jobsets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-mxjob + failurePolicy: Fail + name: mmxjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - mxjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-paddlejob + failurePolicy: Fail + name: mpaddlejob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - paddlejobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-pytorchjob + failurePolicy: Fail + name: mpytorchjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - pytorchjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-tfjob + failurePolicy: Fail + name: mtfjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - tfjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-xgboostjob + failurePolicy: Fail + name: mxgboostjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - xgboostjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v2beta1-mpijob + failurePolicy: Fail + name: mmpijob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v2beta1 + operations: + - CREATE + resources: + - mpijobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-ray-io-v1-raycluster + failurePolicy: Fail + name: mraycluster.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + resources: + - rayclusters + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-ray-io-v1-rayjob + failurePolicy: Fail + name: mrayjob.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + resources: + - rayjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-apps-v1-statefulset + failurePolicy: Fail + name: mstatefulset.kb.io + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + resources: + - statefulsets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kueue-x-k8s-io-v1beta1-clusterqueue + failurePolicy: Fail + name: mclusterqueue.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + resources: + - clusterqueues + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kueue-x-k8s-io-v1beta1-resourceflavor + failurePolicy: Fail + name: mresourceflavor.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + resources: + - resourceflavors + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kueue-x-k8s-io-v1beta1-workload + failurePolicy: Fail + name: mworkload.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + resources: + - workloads + sideEffects: None +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-validating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate--v1-pod + failurePolicy: Fail + name: vpod.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kueue-system + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - pods + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-apps-v1-deployment + failurePolicy: Fail + name: vdeployment.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kueue-system + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - deployments + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-batch-v1-job + failurePolicy: Fail + name: vjob.kb.io + rules: + - apiGroups: + - batch + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - jobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-jobset-x-k8s-io-v1alpha2-jobset + failurePolicy: Fail + name: vjobset.kb.io + rules: + - apiGroups: + - jobset.x-k8s.io + apiVersions: + - v1alpha2 + operations: + - CREATE + - UPDATE + resources: + - jobsets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-mxjob + failurePolicy: Fail + name: vmxjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - mxjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-paddlejob + failurePolicy: Fail + name: vpaddlejob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - paddlejobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-pytorchjob + failurePolicy: Fail + name: vpytorchjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - pytorchjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-tfjob + failurePolicy: Fail + name: vtfjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - tfjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-xgboostjob + failurePolicy: Fail + name: vxgboostjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - xgboostjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v2beta1-mpijob + failurePolicy: Fail + name: vmpijob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v2beta1 + operations: + - CREATE + - UPDATE + resources: + - mpijobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-ray-io-v1-raycluster + failurePolicy: Fail + name: vraycluster.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - rayclusters + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-ray-io-v1-rayjob + failurePolicy: Fail + name: vrayjob.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - rayjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-apps-v1-statefulset + failurePolicy: Fail + name: vstatefulset.kb.io + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - statefulsets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kueue-x-k8s-io-v1beta1-clusterqueue + failurePolicy: Fail + name: vclusterqueue.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - clusterqueues + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kueue-x-k8s-io-v1alpha1-cohort + failurePolicy: Fail + name: vcohort.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - cohorts + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kueue-x-k8s-io-v1beta1-resourceflavor + failurePolicy: Fail + name: vresourceflavor.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - resourceflavors + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kueue-x-k8s-io-v1beta1-workload + failurePolicy: Fail + name: vworkload.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - workloads + - workloads/status + sideEffects: None diff --git a/modules/management/kubectl-apply/variables.tf b/modules/management/kubectl-apply/variables.tf index 2e0a36603d..943ecf3ec7 100644 --- a/modules/management/kubectl-apply/variables.tf +++ b/modules/management/kubectl-apply/variables.tf @@ -41,15 +41,15 @@ variable "kueue" { description = "Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config_path to be applied right after kueue installation. If a template file provided, its variables can be set to config_template_vars." type = object({ install = optional(bool, false) - version = optional(string, "v0.8.1") + version = optional(string, "v0.9.0") config_path = optional(string, null) config_template_vars = optional(map(any), null) }) default = {} validation { - condition = !var.kueue.install || contains(["v0.8.1"], var.kueue.version) - error_message = "Supported version of Kueue is v0.8.1" + condition = !var.kueue.install || contains(["v0.9.0", "v0.8.1"], var.kueue.version) + error_message = "Supported version of Kueue is v0.9.0" } } From 7385b1a0fa82e7269c567850477513713808cd03 Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Thu, 14 Nov 2024 18:57:54 +0000 Subject: [PATCH 21/26] Update kueue error message --- modules/management/kubectl-apply/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/management/kubectl-apply/variables.tf b/modules/management/kubectl-apply/variables.tf index 943ecf3ec7..c9491e855e 100644 --- a/modules/management/kubectl-apply/variables.tf +++ b/modules/management/kubectl-apply/variables.tf @@ -49,7 +49,7 @@ variable "kueue" { validation { condition = !var.kueue.install || contains(["v0.9.0", "v0.8.1"], var.kueue.version) - error_message = "Supported version of Kueue is v0.9.0" + error_message = "Default Supported version of Kueue is v0.9.0. Cluster toolkit also supports v0.8.1" } } From d4e2b2046c5a01a2ea4f9ff3bd4e127b7218d806 Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Fri, 15 Nov 2024 10:00:47 +0000 Subject: [PATCH 22/26] Make variables.tf more modular and set default kueue version to v0.8.1 --- modules/management/kubectl-apply/README.md | 4 +++- modules/management/kubectl-apply/variables.tf | 20 ++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/modules/management/kubectl-apply/README.md b/modules/management/kubectl-apply/README.md index 3b70cd9864..736ff7885f 100644 --- a/modules/management/kubectl-apply/README.md +++ b/modules/management/kubectl-apply/README.md @@ -110,6 +110,7 @@ limitations under the License. | Name | Version | |------|---------| | [google](#provider\_google) | > 5.0 | +| [terraform](#provider\_terraform) | n/a | ## Modules @@ -124,6 +125,7 @@ limitations under the License. | Name | Type | |------|------| +| [terraform_data.kueue_validations](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [google_client_config.default](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_config) | data source | | [google_container_cluster.gke_cluster](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_cluster) | data source | @@ -134,7 +136,7 @@ limitations under the License. | [apply\_manifests](#input\_apply\_manifests) | A list of manifests to apply to GKE cluster using kubectl. For more details see [kubectl module's inputs](kubectl/README.md). |
list(object({
content = optional(string, null)
source = optional(string, null)
template_vars = optional(map(any), null)
server_side_apply = optional(bool, false)
wait_for_rollout = optional(bool, true)
}))
| `[]` | no | | [cluster\_id](#input\_cluster\_id) | An identifier for the gke cluster resource with format projects//locations//clusters/. | `string` | n/a | yes | | [jobset](#input\_jobset) | Install [Jobset](https://github.com/kubernetes-sigs/jobset) which manages a group of K8s [jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) as a unit. |
object({
install = optional(bool, false)
version = optional(string, "v0.5.2")
})
| `{}` | no | -| [kueue](#input\_kueue) | Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config\_path to be applied right after kueue installation. If a template file provided, its variables can be set to config\_template\_vars. |
object({
install = optional(bool, false)
version = optional(string, "v0.9.0")
config_path = optional(string, null)
config_template_vars = optional(map(any), null)
})
| `{}` | no | +| [kueue](#input\_kueue) | Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config\_path to be applied right after kueue installation. If a template file provided, its variables can be set to config\_template\_vars. |
object({
install = optional(bool, false)
version = optional(string, "v0.8.1")
config_path = optional(string, null)
config_template_vars = optional(map(any), null)
})
| `{}` | no | | [project\_id](#input\_project\_id) | The project ID that hosts the gke cluster. | `string` | n/a | yes | ## Outputs diff --git a/modules/management/kubectl-apply/variables.tf b/modules/management/kubectl-apply/variables.tf index c9491e855e..5f78c81927 100644 --- a/modules/management/kubectl-apply/variables.tf +++ b/modules/management/kubectl-apply/variables.tf @@ -14,6 +14,19 @@ * limitations under the License. */ +locals { + supported_versions = ["v0.9.0", "v0.8.1"] +} + +resource "terraform_data" "kueue_validations" { + lifecycle { + precondition { + condition = !var.kueue.install || contains(local.supported_versions, var.kueue.version) + error_message = "Supported version of Kueue are ${join(", ", local.supported_versions)}" + } + } +} + variable "project_id" { description = "The project ID that hosts the gke cluster." type = string @@ -37,20 +50,17 @@ variable "apply_manifests" { default = [] } + variable "kueue" { description = "Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config_path to be applied right after kueue installation. If a template file provided, its variables can be set to config_template_vars." type = object({ install = optional(bool, false) - version = optional(string, "v0.9.0") + version = optional(string, "v0.8.1") config_path = optional(string, null) config_template_vars = optional(map(any), null) }) default = {} - validation { - condition = !var.kueue.install || contains(["v0.9.0", "v0.8.1"], var.kueue.version) - error_message = "Default Supported version of Kueue is v0.9.0. Cluster toolkit also supports v0.8.1" - } } variable "jobset" { From f5e8171dacc23516d0600dd08ae0cdd42237eefd Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Wed, 20 Nov 2024 14:26:59 +0000 Subject: [PATCH 23/26] Updating RDMA-VPC to use v9.3 CFT modules --- community/modules/network/rdma-vpc/main.tf | 3 +-- .../modules/network/rdma-vpc/vpc-submodule/README.md | 6 +++--- community/modules/network/rdma-vpc/vpc-submodule/main.tf | 9 ++++++--- .../modules/network/rdma-vpc/vpc-submodule/versions.tf | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index cf4d76fb73..cd5dafa1fc 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -143,8 +143,7 @@ locals { } module "vpc" { - source = "./vpc-submodule" - + source = "./vpc-submodule" network_name = local.network_name project_id = var.project_id auto_create_subnetworks = false diff --git a/community/modules/network/rdma-vpc/vpc-submodule/README.md b/community/modules/network/rdma-vpc/vpc-submodule/README.md index 5dfd55b4bb..06912e7ae4 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/README.md +++ b/community/modules/network/rdma-vpc/vpc-submodule/README.md @@ -36,9 +36,9 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| -| [firewall\_rules](#module\_firewall\_rules) | github.com/terraform-google-modules/terraform-google-network.git//modules/firewall-rules?depth=1&ref=v9.0.0 | n/a | -| [routes](#module\_routes) | github.com/terraform-google-modules/terraform-google-network.git//modules/routes?depth=1&ref=v9.0.0 | n/a | -| [subnets](#module\_subnets) | github.com/terraform-google-modules/terraform-google-network.git//modules/subnets?depth=1&ref=v9.0.0 | n/a | +| [firewall\_rules](#module\_firewall\_rules) | terraform-google-modules/network/google//modules/firewall-rules | ~> 9.3 | +| [routes](#module\_routes) | terraform-google-modules/network/google//modules/routes | ~> 9.3 | +| [subnets](#module\_subnets) | terraform-google-modules/network/google//modules/subnets | ~> 9.3 | ## Resources diff --git a/community/modules/network/rdma-vpc/vpc-submodule/main.tf b/community/modules/network/rdma-vpc/vpc-submodule/main.tf index e524a4cbff..f0b8488df5 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/main.tf +++ b/community/modules/network/rdma-vpc/vpc-submodule/main.tf @@ -46,7 +46,8 @@ resource "google_compute_shared_vpc_host_project" "shared_vpc_host" { Subnet configuration *****************************************/ module "subnets" { - source = "github.com/terraform-google-modules/terraform-google-network.git//modules/subnets?depth=1&ref=v9.0.0" + source = "terraform-google-modules/network/google//modules/subnets" + version = "~> 9.3" project_id = var.project_id network_name = google_compute_network.network.name subnets = var.subnets @@ -57,7 +58,8 @@ module "subnets" { Routes *****************************************/ module "routes" { - source = "github.com/terraform-google-modules/terraform-google-network.git//modules/routes?depth=1&ref=v9.0.0" + source = "terraform-google-modules/network/google//modules/routes" + version = "~> 9.3" project_id = var.project_id network_name = google_compute_network.network.name routes = var.routes @@ -88,7 +90,8 @@ locals { } module "firewall_rules" { - source = "github.com/terraform-google-modules/terraform-google-network.git//modules/firewall-rules?depth=1&ref=v9.0.0" + source = "terraform-google-modules/network/google//modules/firewall-rules" + version = "~> 9.3" project_id = var.project_id network_name = google_compute_network.network.name rules = local.rules diff --git a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf index a4425f0a46..e56d084da4 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf +++ b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf @@ -28,6 +28,6 @@ terraform { } provider_meta "google" { - module_name = "blueprints/terraform/terraform-google-network/v9.1.0" + module_name = "blueprints/terraform/terraform-google-network/v9.3.0" } } From 700db8b92201e8b3df1397b99e98b59c7b45615b Mon Sep 17 00:00:00 2001 From: cdunbar13 <139253655+cdunbar13@users.noreply.github.com> Date: Wed, 20 Nov 2024 10:33:26 -0500 Subject: [PATCH 24/26] Update community/modules/network/rdma-vpc/vpc-submodule/versions.tf Co-authored-by: Tom Downes --- community/modules/network/rdma-vpc/vpc-submodule/versions.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf index e56d084da4..d24019ea7f 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf +++ b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf @@ -28,6 +28,6 @@ terraform { } provider_meta "google" { - module_name = "blueprints/terraform/terraform-google-network/v9.3.0" + module_name = "blueprints/terraform/hpc-toolkit:rdma-vpc/experimental" } } From 467d1163d7cee4eb559b3870a31690e3dba209f4 Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Tue, 26 Nov 2024 18:06:30 +0000 Subject: [PATCH 25/26] Moving from private provider to google-beta 6.13.0 --- community/modules/network/rdma-vpc/vpc-submodule/README.md | 6 +++--- community/modules/network/rdma-vpc/vpc-submodule/main.tf | 2 +- .../modules/network/rdma-vpc/vpc-submodule/versions.tf | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/community/modules/network/rdma-vpc/vpc-submodule/README.md b/community/modules/network/rdma-vpc/vpc-submodule/README.md index 06912e7ae4..253ebaf772 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/README.md +++ b/community/modules/network/rdma-vpc/vpc-submodule/README.md @@ -23,14 +23,14 @@ limitations under the License. |------|---------| | [terraform](#requirement\_terraform) | >= 1.3 | | [google](#requirement\_google) | >= 4.64 | -| [google-private](#requirement\_google-private) | >= 0.0.1962 | +| [google-beta](#requirement\_google-beta) | >= 6.13.0 | ## Providers | Name | Version | |------|---------| | [google](#provider\_google) | >= 4.64 | -| [google-private](#provider\_google-private) | >= 0.0.1962 | +| [google-beta](#provider\_google-beta) | >= 6.13.0 | ## Modules @@ -44,7 +44,7 @@ limitations under the License. | Name | Type | |------|------| -| [google-private_google_compute_network.network](https://registry.terraform.io/providers/hashicorp/google-private/latest/docs/resources/google_compute_network) | resource | +| [google-beta_google_compute_network.network](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_compute_network) | resource | | [google_compute_shared_vpc_host_project.shared_vpc_host](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_shared_vpc_host_project) | resource | ## Inputs diff --git a/community/modules/network/rdma-vpc/vpc-submodule/main.tf b/community/modules/network/rdma-vpc/vpc-submodule/main.tf index f0b8488df5..48ae8e705d 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/main.tf +++ b/community/modules/network/rdma-vpc/vpc-submodule/main.tf @@ -18,7 +18,7 @@ VPC configuration *****************************************/ resource "google_compute_network" "network" { - provider = google-private + provider = google-beta name = var.network_name auto_create_subnetworks = var.auto_create_subnetworks routing_mode = var.routing_mode diff --git a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf index d24019ea7f..b75f5c66ee 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/versions.tf +++ b/community/modules/network/rdma-vpc/vpc-submodule/versions.tf @@ -21,9 +21,9 @@ terraform { source = "hashicorp/google" version = ">= 4.64" } - google-private = { - source = "hashicorp/google-private" - version = ">= 0.0.1962" + google-beta = { + source = "hashicorp/google-beta" + version = ">= 6.13.0" } } From 49bd1c160e4030baaa25e494cd1172b461fd491f Mon Sep 17 00:00:00 2001 From: Carson Dunbar Date: Tue, 26 Nov 2024 18:43:40 +0000 Subject: [PATCH 26/26] Changing empty network_ip from empty string to null --- community/modules/network/rdma-vpc/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index cd5dafa1fc..71e764a2b6 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -116,7 +116,7 @@ locals { network = null subnetwork = subnet.self_link subnetwork_project = null # will populate from subnetwork_self_link - network_ip = "" + network_ip = null nic_type = var.nic_type stack_type = null queue_count = null