From 32541c7e8c695b8f5552db285ef00d0404f96f0b Mon Sep 17 00:00:00 2001 From: dgouju Date: Tue, 3 Dec 2024 13:35:25 +0100 Subject: [PATCH 001/140] ParallelStore: Stripping configuration --- modules/file-system/parallelstore/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/file-system/parallelstore/README.md b/modules/file-system/parallelstore/README.md index 46f0969b93..d2a0f06b0f 100644 --- a/modules/file-system/parallelstore/README.md +++ b/modules/file-system/parallelstore/README.md @@ -154,6 +154,8 @@ No modules. | [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created. | `string` | n/a | yes | | [size\_gb](#input\_size\_gb) | Storage size of the parallelstore instance in GB. | `number` | `12000` | no | | [zone](#input\_zone) | Location for parallelstore instance. | `string` | n/a | yes | +| [file\_stripe](#input\_file\_stripe) | File-level stripping setting, must be `"FILE_STRIPE_LEVEL_UNSPECIFIED"`, `"FILE_STRIPE_LEVEL_MIN"`, `"FILE_STRIPE_LEVEL_BALANCED"` or `"FILE_STRIPE_LEVEL_MAX"`. More details in the [documentation](https://cloud.google.com/parallelstore/docs/performance#file_striping_setting). | `string` | `"FILE_STRIPE_LEVEL_UNSPECIFIED"` | no | +| [directory\_stripe](#input\_directory\_stripe) | Directory-level stripping setting, must be `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"`, `"DIRECTORY_STRIPE_LEVEL_MIN"`, `"DIRECTORY_STRIPE_LEVEL_BALANCED"` or `"DIRECTORY_STRIPE_LEVEL_MAX"`. More details in the [documentation](https://cloud.google.com/parallelstore/docs/performance#directory_striping_setting). | `string` | `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"` | no | ## Outputs From 6a1e455089fe89db358106f40e3d09e7d1287e96 Mon Sep 17 00:00:00 2001 From: dgouju Date: Tue, 3 Dec 2024 13:36:01 +0100 Subject: [PATCH 002/140] ParallelStore: Stripping configuration --- modules/file-system/parallelstore/main.tf | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/file-system/parallelstore/main.tf b/modules/file-system/parallelstore/main.tf index 3de3b94f3a..bb8eeb9606 100644 --- a/modules/file-system/parallelstore/main.tf +++ b/modules/file-system/parallelstore/main.tf @@ -46,11 +46,13 @@ resource "random_id" "resource_name_suffix" { } resource "google_parallelstore_instance" "instance" { - project = var.project_id - instance_id = local.id - location = var.zone - capacity_gib = var.size_gb - network = var.network_id + project = var.project_id + instance_id = local.id + location = var.zone + capacity_gib = var.size_gb + network = var.network_id + file_stripe_level = var.file_stripe + directory_stripe_level = var.directory_stripe labels = local.labels From 9a18ca9e33901c53a2eb07ae6d848c805f457287 Mon Sep 17 00:00:00 2001 From: dgouju Date: Tue, 3 Dec 2024 13:36:34 +0100 Subject: [PATCH 003/140] ParallelStore: Stripping configuration --- .../file-system/parallelstore/variables.tf | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/modules/file-system/parallelstore/variables.tf b/modules/file-system/parallelstore/variables.tf index 8dcac7c528..fc86008d9b 100644 --- a/modules/file-system/parallelstore/variables.tf +++ b/modules/file-system/parallelstore/variables.tf @@ -91,3 +91,33 @@ variable "import_destination_path" { type = string default = null } + +variable "file_stripe" { + description = "The parallelstore stripe level for files." + type = string + default = "FILE_STRIPE_LEVEL_UNSPECIFIED" + validation { + condition = contains([ + "FILE_STRIPE_LEVEL_UNSPECIFIED", + "FILE_STRIPE_LEVEL_MIN", + "FILE_STRIPE_LEVEL_BALANCED", + "FILE_STRIPE_LEVEL_MAX", + ], var.file_stripe) + error_message = "var.file_stripe must be set to \"FILE_STRIPE_LEVEL_UNSPECIFIED\", \"FILE_STRIPE_LEVEL_MIN\", \"FILE_STRIPE_LEVEL_BALANCED\", or \"FILE_STRIPE_LEVEL_MAX\"" + } +} + +variable "directory_stripe" { + description = "The parallelstore stripe level for directories." + type = string + default = "DIRECTORY_STRIPE_LEVEL_UNSPECIFIED" + validation { + condition = contains([ + "DIRECTORY_STRIPE_LEVEL_UNSPECIFIED", + "DIRECTORY_STRIPE_LEVEL_MIN", + "DIRECTORY_STRIPE_LEVEL_BALANCED", + "DIRECTORY_STRIPE_LEVEL_MAX", + ], var.directory_stripe) + error_message = "var.directory_stripe must be set to \"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED\", \"DIRECTORY_STRIPE_LEVEL_MIN\", \"DIRECTORY_STRIPE_LEVEL_BALANCED\", or \"DIRECTORY_STRIPE_LEVEL_MAX\"" + } +} From 7a639d2f013159634eeb105cc1e1fec4ba094b69 Mon Sep 17 00:00:00 2001 From: dgouju Date: Tue, 3 Dec 2024 13:44:16 +0100 Subject: [PATCH 004/140] Keep alphabetical order --- modules/file-system/parallelstore/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/file-system/parallelstore/README.md b/modules/file-system/parallelstore/README.md index d2a0f06b0f..a977a0e587 100644 --- a/modules/file-system/parallelstore/README.md +++ b/modules/file-system/parallelstore/README.md @@ -143,6 +143,8 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [deployment\_name](#input\_deployment\_name) | Name of the HPC deployment. | `string` | n/a | yes | +| [directory\_stripe](#input\_directory\_stripe) | Directory-level stripping setting, must be `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"`, `"DIRECTORY_STRIPE_LEVEL_MIN"`, `"DIRECTORY_STRIPE_LEVEL_BALANCED"` or `"DIRECTORY_STRIPE_LEVEL_MAX"`. More details in the [documentation](https://cloud.google.com/parallelstore/docs/performance#directory_striping_setting). | `string` | `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"` | no | +| [file\_stripe](#input\_file\_stripe) | File-level stripping setting, must be `"FILE_STRIPE_LEVEL_UNSPECIFIED"`, `"FILE_STRIPE_LEVEL_MIN"`, `"FILE_STRIPE_LEVEL_BALANCED"` or `"FILE_STRIPE_LEVEL_MAX"`. More details in the [documentation](https://cloud.google.com/parallelstore/docs/performance#file_striping_setting). | `string` | `"FILE_STRIPE_LEVEL_UNSPECIFIED"` | no | | [import\_destination\_path](#input\_import\_destination\_path) | The name of local path to import data on parallelstore instance from GCS bucket. | `string` | `null` | no | | [import\_gcs\_bucket\_uri](#input\_import\_gcs\_bucket\_uri) | The name of the GCS bucket to import data from to parallelstore. | `string` | `null` | no | | [labels](#input\_labels) | Labels to add to parallel store instance. | `map(string)` | `{}` | no | @@ -154,8 +156,6 @@ No modules. | [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created. | `string` | n/a | yes | | [size\_gb](#input\_size\_gb) | Storage size of the parallelstore instance in GB. | `number` | `12000` | no | | [zone](#input\_zone) | Location for parallelstore instance. | `string` | n/a | yes | -| [file\_stripe](#input\_file\_stripe) | File-level stripping setting, must be `"FILE_STRIPE_LEVEL_UNSPECIFIED"`, `"FILE_STRIPE_LEVEL_MIN"`, `"FILE_STRIPE_LEVEL_BALANCED"` or `"FILE_STRIPE_LEVEL_MAX"`. More details in the [documentation](https://cloud.google.com/parallelstore/docs/performance#file_striping_setting). | `string` | `"FILE_STRIPE_LEVEL_UNSPECIFIED"` | no | -| [directory\_stripe](#input\_directory\_stripe) | Directory-level stripping setting, must be `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"`, `"DIRECTORY_STRIPE_LEVEL_MIN"`, `"DIRECTORY_STRIPE_LEVEL_BALANCED"` or `"DIRECTORY_STRIPE_LEVEL_MAX"`. More details in the [documentation](https://cloud.google.com/parallelstore/docs/performance#directory_striping_setting). | `string` | `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"` | no | ## Outputs From a66e570ea5b93beb817fdf5bfe974fd051e3c31c Mon Sep 17 00:00:00 2001 From: dgouju Date: Tue, 3 Dec 2024 13:47:26 +0100 Subject: [PATCH 005/140] google-beta 5.42.0 min required for stripping settings --- modules/file-system/parallelstore/versions.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/file-system/parallelstore/versions.tf b/modules/file-system/parallelstore/versions.tf index 24069a479c..55662d1526 100644 --- a/modules/file-system/parallelstore/versions.tf +++ b/modules/file-system/parallelstore/versions.tf @@ -20,7 +20,7 @@ terraform { required_providers { google-beta = { source = "hashicorp/google-beta" - version = ">= 5.25.0" + version = ">= 5.42.0" } random = { From 042203cbdfbccbbba20fb20bc8cd38763cf3088e Mon Sep 17 00:00:00 2001 From: dgouju Date: Tue, 3 Dec 2024 13:52:03 +0100 Subject: [PATCH 006/140] tflint --- modules/file-system/parallelstore/main.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/file-system/parallelstore/main.tf b/modules/file-system/parallelstore/main.tf index bb8eeb9606..a0d6c6a60b 100644 --- a/modules/file-system/parallelstore/main.tf +++ b/modules/file-system/parallelstore/main.tf @@ -54,10 +54,10 @@ resource "google_parallelstore_instance" "instance" { file_stripe_level = var.file_stripe directory_stripe_level = var.directory_stripe - labels = local.labels + labels = local.labels - provider = google-beta - depends_on = [var.private_vpc_connection_peering] + provider = google-beta + depends_on = [var.private_vpc_connection_peering] } resource "null_resource" "hydration" { From 8c1eab2b29c5055650aaa353fe3684d023508fc0 Mon Sep 17 00:00:00 2001 From: dgouju Date: Fri, 6 Dec 2024 21:15:52 +0100 Subject: [PATCH 007/140] Fixing README.md changes --- modules/file-system/parallelstore/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/file-system/parallelstore/README.md b/modules/file-system/parallelstore/README.md index a977a0e587..46073d2643 100644 --- a/modules/file-system/parallelstore/README.md +++ b/modules/file-system/parallelstore/README.md @@ -114,7 +114,7 @@ limitations under the License. | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 0.13 | -| [google-beta](#requirement\_google-beta) | >= 5.25.0 | +| [google-beta](#requirement\_google-beta) | >= 5.42.0 | | [null](#requirement\_null) | ~> 3.0 | | [random](#requirement\_random) | ~> 3.0 | @@ -122,7 +122,7 @@ limitations under the License. | Name | Version | |------|---------| -| [google-beta](#provider\_google-beta) | >= 5.25.0 | +| [google-beta](#provider\_google-beta) | >= 5.42.0 | | [null](#provider\_null) | ~> 3.0 | | [random](#provider\_random) | ~> 3.0 | @@ -143,8 +143,8 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [deployment\_name](#input\_deployment\_name) | Name of the HPC deployment. | `string` | n/a | yes | -| [directory\_stripe](#input\_directory\_stripe) | Directory-level stripping setting, must be `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"`, `"DIRECTORY_STRIPE_LEVEL_MIN"`, `"DIRECTORY_STRIPE_LEVEL_BALANCED"` or `"DIRECTORY_STRIPE_LEVEL_MAX"`. More details in the [documentation](https://cloud.google.com/parallelstore/docs/performance#directory_striping_setting). | `string` | `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"` | no | -| [file\_stripe](#input\_file\_stripe) | File-level stripping setting, must be `"FILE_STRIPE_LEVEL_UNSPECIFIED"`, `"FILE_STRIPE_LEVEL_MIN"`, `"FILE_STRIPE_LEVEL_BALANCED"` or `"FILE_STRIPE_LEVEL_MAX"`. More details in the [documentation](https://cloud.google.com/parallelstore/docs/performance#file_striping_setting). | `string` | `"FILE_STRIPE_LEVEL_UNSPECIFIED"` | no | +| [directory\_stripe](#input\_directory\_stripe) | The parallelstore stripe level for directories. | `string` | `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"` | no | +| [file\_stripe](#input\_file\_stripe) | The parallelstore stripe level for files. | `string` | `"FILE_STRIPE_LEVEL_UNSPECIFIED"` | no | | [import\_destination\_path](#input\_import\_destination\_path) | The name of local path to import data on parallelstore instance from GCS bucket. | `string` | `null` | no | | [import\_gcs\_bucket\_uri](#input\_import\_gcs\_bucket\_uri) | The name of the GCS bucket to import data from to parallelstore. | `string` | `null` | no | | [labels](#input\_labels) | Labels to add to parallel store instance. | `map(string)` | `{}` | no | From 49fc50840cdc1641d754db45977bf3580dbcd2f1 Mon Sep 17 00:00:00 2001 From: dgouju Date: Fri, 6 Dec 2024 21:16:36 +0100 Subject: [PATCH 008/140] Fixing indent --- modules/file-system/parallelstore/main.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/file-system/parallelstore/main.tf b/modules/file-system/parallelstore/main.tf index a0d6c6a60b..bb8eeb9606 100644 --- a/modules/file-system/parallelstore/main.tf +++ b/modules/file-system/parallelstore/main.tf @@ -54,10 +54,10 @@ resource "google_parallelstore_instance" "instance" { file_stripe_level = var.file_stripe directory_stripe_level = var.directory_stripe - labels = local.labels + labels = local.labels - provider = google-beta - depends_on = [var.private_vpc_connection_peering] + provider = google-beta + depends_on = [var.private_vpc_connection_peering] } resource "null_resource" "hydration" { From 0ea8282b577af5a9c8e7f7ae0583a925f603d604 Mon Sep 17 00:00:00 2001 From: dgouju Date: Fri, 6 Dec 2024 21:17:10 +0100 Subject: [PATCH 009/140] Removing space --- modules/file-system/parallelstore/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/file-system/parallelstore/variables.tf b/modules/file-system/parallelstore/variables.tf index fc86008d9b..d2a61d6392 100644 --- a/modules/file-system/parallelstore/variables.tf +++ b/modules/file-system/parallelstore/variables.tf @@ -91,7 +91,7 @@ variable "import_destination_path" { type = string default = null } - + variable "file_stripe" { description = "The parallelstore stripe level for files." type = string From cef26f3a17385454958dbd2ecb0b80f4303b8692 Mon Sep 17 00:00:00 2001 From: Swarna Bharathi Mantena Date: Tue, 10 Dec 2024 12:04:26 +0000 Subject: [PATCH 010/140] a test PR --- modules/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/README.md b/modules/README.md index c5f1df282a..93f9dc73b3 100644 --- a/modules/README.md +++ b/modules/README.md @@ -586,5 +586,5 @@ than having to set it manually. ## Writing Custom Cluster Toolkit Modules -Modules are flexible by design, however we do define some [best practices](../docs/module-guidelines.md) when +Modules are flexible by design, however we define some [best practices](../docs/module-guidelines.md) when creating a new module meant to be used with the Cluster Toolkit. From c60838ca206f52608efee61a93aaefff9f4c8aea Mon Sep 17 00:00:00 2001 From: annuay Date: Wed, 11 Dec 2024 20:45:53 +0000 Subject: [PATCH 011/140] expose deletion protection --- modules/scheduler/gke-cluster/README.md | 1 + modules/scheduler/gke-cluster/main.tf | 3 +-- modules/scheduler/gke-cluster/variables.tf | 10 ++++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index 74f1ac0ba3..675039add6 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -145,6 +145,7 @@ limitations under the License. | [cluster\_reference\_type](#input\_cluster\_reference\_type) | How the google\_container\_node\_pool.system\_node\_pools refers to the cluster. Possible values are: {SELF\_LINK, NAME} | `string` | `"SELF_LINK"` | no | | [configure\_workload\_identity\_sa](#input\_configure\_workload\_identity\_sa) | When true, a kubernetes service account will be created and bound using workload identity to the service account used to create the cluster. | `bool` | `false` | no | | [default\_max\_pods\_per\_node](#input\_default\_max\_pods\_per\_node) | The default maximum number of pods per node in this cluster. | `number` | `null` | no | +| [deletion\_protection](#input\_deletion\_protection) | "Determines if the cluster can be deleted by gcluster commands or not".
To delete a cluster provisioned with deletion\_protection set to true, you must first set it to false and apply the changes.
Then proceed with deletion as usual. | `bool` | `false` | no | | [deployment\_name](#input\_deployment\_name) | Name of the HPC deployment. Used in the GKE cluster name by default and can be configured with `prefix_with_deployment_name`. | `string` | n/a | yes | | [enable\_dataplane\_v2](#input\_enable\_dataplane\_v2) | Enables [Dataplane v2](https://cloud.google.com/kubernetes-engine/docs/concepts/dataplane-v2). This setting is immutable on clusters. If null, will default to false unless using multi-networking, in which case it will default to true | `bool` | `null` | no | | [enable\_dcgm\_monitoring](#input\_enable\_dcgm\_monitoring) | Enable GKE to collect DCGM metrics | `bool` | `false` | no | diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 48a225d5e8..5b416a85bb 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -72,8 +72,7 @@ resource "google_container_cluster" "gke_cluster" { remove_default_node_pool = true initial_node_count = 1 # must be set when remove_default_node_pool is set - # Sets default to false so terraform deletion is not prevented - deletion_protection = false + deletion_protection = var.deletion_protection network = var.network_id subnetwork = var.subnetwork_self_link diff --git a/modules/scheduler/gke-cluster/variables.tf b/modules/scheduler/gke-cluster/variables.tf index 9231b2e193..58bf197763 100644 --- a/modules/scheduler/gke-cluster/variables.tf +++ b/modules/scheduler/gke-cluster/variables.tf @@ -397,3 +397,13 @@ variable "networking_mode" { type = string default = "VPC_NATIVE" } + +variable "deletion_protection" { + description = <<-EOT + "Determines if the cluster can be deleted by gcluster commands or not". + To delete a cluster provisioned with deletion_protection set to true, you must first set it to false and apply the changes. + Then proceed with deletion as usual. + EOT + type = bool + default = false +} From ed820b840fa6979b0dd9c1ab20e8deb25715a068 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 12 Dec 2024 17:39:35 +0000 Subject: [PATCH 012/140] SlurmGCP. Replace `to_hostlist` with `to_hostlist_fast` --- .../modules/slurm_files/scripts/conf.py | 4 ++-- .../modules/slurm_files/scripts/resume.py | 22 +++++++++---------- .../modules/slurm_files/scripts/slurmsync.py | 18 +++++++-------- .../modules/slurm_files/scripts/suspend.py | 12 +++++----- .../slurm_files/scripts/tests/test_util.py | 4 ++-- .../modules/slurm_files/scripts/util.py | 18 +++------------ 6 files changed, 33 insertions(+), 45 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py index c4bb37c579..4af58a7831 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py @@ -410,9 +410,9 @@ def __init__( def conf_line(self) -> str: d = {"SwitchName": self.name} if self.nodes: - d["Nodes"] = util.to_hostlist_fast(self.nodes) + d["Nodes"] = util.to_hostlist(self.nodes) if self.switches: - d["Switches"] = util.to_hostlist_fast(self.switches.keys()) + d["Switches"] = util.to_hostlist(self.switches.keys()) return dict_to_conf(d) def render_conf_lines(self) -> Iterable[str]: diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py index 9a5e0b035b..669ccfc0a7 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py @@ -37,7 +37,7 @@ map_with_futures, run, separate, - to_hostlist_fast, + to_hostlist, trim_self_link, wait_for_operation, ) @@ -220,7 +220,7 @@ def create_instances_request(nodes: List[str], placement_group: Optional[str], e project=lookup().project, body=body.to_dict(), **method_args) - log.debug(f"new request: endpoint={req.methodId} nodes={to_hostlist_fast(nodes)}") + log.debug(f"new request: endpoint={req.methodId} nodes={to_hostlist(nodes)}") log_api_request(req) return req @@ -340,7 +340,7 @@ def resume_nodes(nodes: List[str], resume_data: Optional[ResumeData]): if log.isEnabledFor(logging.DEBUG): grouped_nodelists = { - group: to_hostlist_fast(chunk.nodes) for group, chunk in grouped_nodes.items() + group: to_hostlist(chunk.nodes) for group, chunk in grouped_nodes.items() } log.debug( "node bulk groups: \n{}".format(yaml.safe_dump(grouped_nodelists).rstrip()) @@ -401,7 +401,7 @@ def resume_nodes(nodes: List[str], resume_data: Optional[ResumeData]): bulk_op_name = bulk_op["name"] if "error" in bulk_op: error = bulk_op["error"]["errors"][0] - group_nodes = to_hostlist_fast(grouped_nodes[group].nodes) + group_nodes = to_hostlist(grouped_nodes[group].nodes) log.warning( f"bulkInsert operation errors: {error['code']} name={bulk_op_name} operationGroupId={group_id} nodes={group_nodes}" ) @@ -433,14 +433,14 @@ def resume_nodes(nodes: List[str], resume_data: Optional[ResumeData]): ready_nodes = {trim_self_link(op["targetLink"]) for op in successful_inserts} if len(ready_nodes) > 0: - ready_nodelist = to_hostlist_fast(ready_nodes) + ready_nodelist = to_hostlist(ready_nodes) log.info(f"created {len(ready_nodes)} instances: nodes={ready_nodelist}") all_successful_inserts.extend(successful_inserts) def down_nodes_notify_jobs(nodes: List[str], reason: str, resume_data: Optional[ResumeData]) -> None: """set nodes down with reason""" - nodelist = util.to_hostlist_fast(nodes) + nodelist = util.to_hostlist(nodes) reason_quoted = shlex.quote(reason) log.error(f"Marking nodes {nodelist} as DOWN, reason: {reason}") @@ -536,7 +536,7 @@ def _allocate_nodes_to_placements(nodes: List[str], excl_job_id:Optional[int], l if invalid: placements.append(PlacementAndNodes(placement=None, nodes=invalid)) - log.error(f"Could not find placement for nodes with unexpected names: {to_hostlist_fast(invalid)}") + log.error(f"Could not find placement for nodes with unexpected names: {to_hostlist(invalid)}") return placements @@ -545,7 +545,7 @@ def create_nodeset_placements(nodes: List[str], excl_job_id:Optional[int], lkp: region = lkp.node_region(nodes[0]) if log.isEnabledFor(logging.DEBUG): - debug_p = {p.placement: to_hostlist_fast(p.nodes) for p in placements} + debug_p = {p.placement: to_hostlist(p.nodes) for p in placements} log.debug( f"creating {len(placements)} placement groups: \n{yaml.safe_dump(debug_p).rstrip()}" ) @@ -591,7 +591,7 @@ def classify_result(item): ) log.info( - f"created {len(operations)} placement groups ({to_hostlist_fast(operations.keys())})" + f"created {len(operations)} placement groups ({to_hostlist(operations.keys())})" ) return placements @@ -617,7 +617,7 @@ def main(nodelist: str) -> None: ) if other_nodes: log.error( - f"Ignoring non-power-managed nodes '{to_hostlist_fast(other_nodes)}' from '{nodelist}'" + f"Ignoring non-power-managed nodes '{to_hostlist(other_nodes)}' from '{nodelist}'" ) if not nodes: @@ -625,7 +625,7 @@ def main(nodelist: str) -> None: return resume_data = get_resume_file_data() - log.info(f"resume {util.to_hostlist_fast(nodes)}") + log.info(f"resume {util.to_hostlist(nodes)}") resume_nodes(nodes, resume_data) if __name__ == "__main__": diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py index d21211e8e7..4cb8fea7a5 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py @@ -38,7 +38,7 @@ install_custom_scripts, run, separate, - to_hostlist_fast, + to_hostlist, NSDict, NodeState, TPU, @@ -65,28 +65,28 @@ def __hash__(self): @dataclass(frozen=True) class NodeActionPowerUp(): def apply(self, nodes:List[str]) -> None: - hostlist = util.to_hostlist_fast(nodes) + hostlist = util.to_hostlist(nodes) log.info(f"{len(nodes)} instances to resume ({hostlist})") run(f"{lookup().scontrol} update nodename={hostlist} state=power_up") @dataclass(frozen=True) class NodeActionIdle(): def apply(self, nodes:List[str]) -> None: - hostlist = util.to_hostlist_fast(nodes) + hostlist = util.to_hostlist(nodes) log.info(f"{len(nodes)} nodes to idle ({hostlist})") run(f"{lookup().scontrol} update nodename={hostlist} state=resume") @dataclass(frozen=True) class NodeActionPowerDown(): def apply(self, nodes:List[str]) -> None: - hostlist = util.to_hostlist_fast(nodes) + hostlist = util.to_hostlist(nodes) log.info(f"{len(nodes)} instances to power down ({hostlist})") run(f"{lookup().scontrol} update nodename={hostlist} state=power_down") @dataclass(frozen=True) class NodeActionDelete(): def apply(self, nodes:List[str]) -> None: - hostlist = util.to_hostlist_fast(nodes) + hostlist = util.to_hostlist(nodes) log.info(f"{len(nodes)} instances to delete ({hostlist})") delete_instances(nodes) @@ -94,7 +94,7 @@ def apply(self, nodes:List[str]) -> None: class NodeActionPrempt(): def apply(self, nodes:List[str]) -> None: NodeActionDown(reason="Preempted instance").apply(nodes) - hostlist = util.to_hostlist_fast(nodes) + hostlist = util.to_hostlist(nodes) log.info(f"{len(nodes)} instances restarted ({hostlist})") start_instances(nodes) @@ -108,7 +108,7 @@ class NodeActionDown(): reason: str def apply(self, nodes: List[str]) -> None: - hostlist = util.to_hostlist_fast(nodes) + hostlist = util.to_hostlist(nodes) log.info(f"{len(nodes)} nodes set down ({hostlist}) with reason={self.reason}") run(f"{lookup().scontrol} update nodename={hostlist} state=down reason={shlex.quote(self.reason)}") @@ -118,7 +118,7 @@ class NodeActionUnknown(): instance_state: Optional[str] def apply(self, nodes:List[str]) -> None: - hostlist = util.to_hostlist_fast(nodes) + hostlist = util.to_hostlist(nodes) log.error(f"{len(nodes)} nodes have unexpected {self.slurm_state} and instance state:{self.instance_state}, ({hostlist})") def start_instance_op(inst): @@ -327,7 +327,7 @@ def ignore_err(e) -> bool: if failures: log.error(f"some placement groups failed to delete: {failures}") log.info( - f"deleted {len(done)} of {len(placement_groups)} placement groups ({to_hostlist_fast(done.keys())})" + f"deleted {len(done)} of {len(placement_groups)} placement groups ({to_hostlist(done.keys())})" ) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py index 4866dffb1e..dc901b6aba 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py @@ -24,7 +24,7 @@ groupby_unsorted, log_api_request, batch_execute, - to_hostlist_fast, + to_hostlist, wait_for_operations, separate, execute_with_futures, @@ -96,14 +96,14 @@ def delete_instances(instances): requests = {inst: delete_instance_request(inst) for inst in valid} - log.info(f"delete {len(valid)} instances ({to_hostlist_fast(valid)})") + log.info(f"delete {len(valid)} instances ({to_hostlist(valid)})") done, failed = batch_execute(requests) if failed: for err, nodes in groupby_unsorted(lambda n: failed[n][1], failed.keys()): - log.error(f"instances failed to delete: {err} ({to_hostlist_fast(nodes)})") + log.error(f"instances failed to delete: {err} ({to_hostlist(nodes)})") wait_for_operations(done.values()) # TODO do we need to check each operation for success? That is a lot more API calls - log.info(f"deleted {len(done)} instances {to_hostlist_fast(done.keys())}") + log.info(f"deleted {len(done)} instances {to_hostlist(done.keys())}") def suspend_nodes(nodes: List[str]) -> None: @@ -128,10 +128,10 @@ def main(nodelist): ) if other_nodes: log.debug( - f"Ignoring non-power-managed nodes '{to_hostlist_fast(other_nodes)}' from '{nodelist}'" + f"Ignoring non-power-managed nodes '{to_hostlist(other_nodes)}' from '{nodelist}'" ) if pm_nodes: - log.debug(f"Suspending nodes '{to_hostlist_fast(pm_nodes)}' from '{nodelist}'") + log.debug(f"Suspending nodes '{to_hostlist(pm_nodes)}' from '{nodelist}'") else: log.debug("No cloud nodes to suspend") return diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py index 2807740464..9cf36b2776 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py @@ -108,8 +108,8 @@ def test_node_desc_fail(name): ("seas7-0,seas7-1", "seas7-[0-1]"), ], ) -def test_to_hostlist_fast(names, expected): - assert util.to_hostlist_fast(names.split(",")) == expected +def test_to_hostlist(names, expected): + assert util.to_hostlist(names.split(",")) == expected @pytest.mark.parametrize( diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index 605283c5bb..096e6d974c 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -877,25 +877,13 @@ def atoi(text): return [atoi(w) for w in re.split(r"(\d+)", text)] -# TODO: replace with to_hostlist_fast -def to_hostlist(nodenames) -> str: - """make hostlist from list of node names""" - # use tmp file because list could be large - tmp_file = tempfile.NamedTemporaryFile(mode="w+t", delete=False) - tmp_file.writelines("\n".join(sorted(nodenames, key=natural_sort))) - tmp_file.close() - hostlist = run(f"{lookup().scontrol} show hostlist {tmp_file.name}").stdout.rstrip() - os.remove(tmp_file.name) - return hostlist - - -def to_hostlist_fast(names: Iterable[str]) -> str: +def to_hostlist(names: Iterable[str]) -> str: """ - Fast implementation of to_hostlist that doesn't invoke `scontrol` + Fast implementation of `hostlist` that doesn't invoke `scontrol` IMPORTANT: * Acts as `scontrol show hostlistsorted`, i.e. original order is not preserved - * Achieves worse compression than `to_hostlist` for some cases + * Achieves worse compression than `scontrol show hostlist` for some cases """ pref = defaultdict(list) tokenizer = re.compile(r"^(.*?)(\d*)$") From 97f449e52ec978d38a33ac65a2ffbcd94e14bcf8 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Wed, 11 Dec 2024 01:42:55 +0000 Subject: [PATCH 013/140] Slurmsync. Safeguard against nodes missing from Slurm state. --- .../modules/slurm_files/scripts/slurmsync.py | 23 +++-------- .../slurm_files/scripts/tests/common.py | 1 + .../slurm_files/scripts/tests/test_util.py | 40 +++++++++++++++++++ .../modules/slurm_files/scripts/util.py | 36 ++++++++++++++++- 4 files changed, 81 insertions(+), 19 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py index d21211e8e7..1e682cdeee 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py @@ -225,7 +225,7 @@ def _find_tpu_node_action(nodename, state) -> NodeAction: def get_node_action(nodename: str) -> NodeAction: """Determine node/instance status that requires action""" - state = lookup().slurm_node(nodename) + state = lookup().node_state(nodename) if lookup().node_is_fr(nodename): fr = lookup().future_reservation(lookup().node_nodeset(nodename)) @@ -381,24 +381,13 @@ def sync_placement_groups(): def sync_slurm(): - compute_instances = [ + compute_instances = { name for name, inst in lookup().instances().items() if inst.role == "compute" - ] - slurm_nodes = list(lookup().slurm_nodes().keys()) - - all_nodes = list( - set( - chain( - compute_instances, - slurm_nodes, - ) - ) - ) - log.debug( - f"reconciling {len(compute_instances)} ({len(all_nodes)-len(compute_instances)}) GCP instances and {len(slurm_nodes)} Slurm nodes ({len(all_nodes)-len(slurm_nodes)})." - ) + } + slurm_nodes = set(lookup().slurm_nodes().keys()) + log.debug(f"reconciling {len(compute_instances)} GCP instances and {len(slurm_nodes)} Slurm nodes.") - for action, nodes in util.groupby_unsorted(all_nodes, get_node_action): + for action, nodes in util.groupby_unsorted(compute_instances | slurm_nodes, get_node_action): action.apply(list(nodes)) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py index a807c00f28..54d7f45d43 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py @@ -53,6 +53,7 @@ class TstCfg: partitions: dict[str, Placeholder] = field(default_factory=dict) nodeset: dict[str, TstNodeset] = field(default_factory=dict) nodeset_tpu: dict[str, TstNodeset] = field(default_factory=dict) + nodeset_dyn: dict[str, TstNodeset] = field(default_factory=dict) install_dir: Optional[str] = None output_dir: Optional[str] = None diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py index 2807740464..23fa585f84 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional, Type + import pytest from mock import Mock from common import TstNodeset, TstCfg # needed to import util import util +from util import NodeState from datetime import timedelta from google.api_core.client_options import ClientOptions # noqa: E402 @@ -308,3 +311,40 @@ def test_nodeset_reservation_ok(nodeset, policies, expected): def test_parse_job_info(job_info, expected_job): lkp = util.Lookup(TstCfg()) assert lkp._parse_job_info(job_info) == expected_job + + + +@pytest.mark.parametrize( + "node,state,want", + [ + ("c-n-2", NodeState("DOWN", {}), NodeState("DOWN", {})), # happy scenario + ("c-d-vodoo", None, None), # dynamic nodeset + ("c-x-44", None, None), # unknown(removed) nodeset + ("c-n-7", None, None), # Out of bounds: c-n-[0-4] - downsized nodeset + ("c-t-7", None, None), # Out of bounds: c-t-[0-4] - downsized nodeset TPU + ("c-n-2", None, RuntimeError), # something is wrong + ("c-t-2", None, RuntimeError), # something is wrong, but TPU + + # Check boundaries match [0-5) + ("c-n-5", None, None), # out of boundaries + ("c-n-4", None, RuntimeError), # within boundaries + ]) +def test_node_state(node: str, state: Optional[NodeState], want: NodeState | None | Type[Exception]): + cfg = TstCfg( + slurm_cluster_name="c", + nodeset={ + "n": TstNodeset(node_count_static=2, node_count_dynamic_max=3)}, + nodeset_tpu={ + "t": TstNodeset(node_count_static=2, node_count_dynamic_max=3)}, + nodeset_dyn={ + "d": TstNodeset()}, + ) + lkp = util.Lookup(cfg) + lkp.slurm_nodes = lambda: {node: state} if state else {} + + if type(want) is type and issubclass(want, Exception): + with pytest.raises(want): + lkp.node_state(node) + else: + assert lkp.node_state(node) == want + diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index 605283c5bb..761e506f60 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -1595,6 +1595,7 @@ def node_nodeset(self, node_name=None): nodeset_name = self.node_nodeset_name(node_name) if nodeset_name in self.cfg.nodeset_tpu: return self.cfg.nodeset_tpu[nodeset_name] + return self.cfg.nodeset[nodeset_name] def partition_is_tpu(self, part: str) -> bool: @@ -1700,8 +1701,39 @@ def make_node_tuple(node_line): } return nodes - def slurm_node(self, nodename): - return self.slurm_nodes().get(nodename) + def node_state(self, nodename: str) -> Optional[NodeState]: + state = self.slurm_nodes().get(nodename) + if state is not None: + return state + + # state is None => Slurm doesn't know this node, + # there are two reasons: + # * happy: + # * node belongs to removed nodeset + # * node belongs to downsized portion of nodeset + # * dynamic node that didn't register itself + # * unhappy: + # * there is a drift in Slurm and SlurmGCP configurations + # * `slurm_nodes` function failed to handle `scontrol show nodes`, + # TODO: make `slurm_nodes` robust by using `scontrol show nodes --json` + # In either of "unhappy" cases it's too dangerous to proceed - abort slurmsync. + try: + ns = self.node_nodeset(nodename) + except: + log.info(f"Unknown node {nodename}, belongs to unknown nodeset") + return None # Can't find nodeset, may be belongs to removed nodeset + + if self.node_is_dyn(nodename): + log.info(f"Unknown node {nodename}, belongs to dynamic nodeset") + return None # we can't make any judjment for dynamic nodes + + cnt = sum(self.static_dynamic_sizes(ns)) + if self.node_index(nodename) >= cnt: + log.info(f"Unknown node {nodename}, out of nodeset size boundaries ({cnt})") + return None # node belongs to downsized nodeset + + raise RuntimeError(f"Slurm does not recognize node {nodename}, potential misconfiguration.") + @lru_cache(maxsize=1) def instances(self) -> Dict[str, object]: From ce5a41df14a8edfed6eab03763e4dbc7c4030410 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 12 Dec 2024 18:01:39 +0000 Subject: [PATCH 014/140] SlurmGCP. Fix warning around `file_cache` --- .../modules/slurm_files/scripts/util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index 605283c5bb..2e1474c050 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -373,6 +373,7 @@ def build_request(http, *args, **kwargs): requestBuilder=build_request, credentials=credentials, discoveryServiceUrl=disc_url, + cache_discovery=False, # See https://github.com/googleapis/google-api-python-client/issues/299 ) def storage_client() -> storage.Client: From a93a9abc967a109a097a4d101ea0736640825b39 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 12 Dec 2024 18:44:21 +0000 Subject: [PATCH 015/140] SlurmGCP. Remove redundant terraform from newly migrated modules --- .../internal/slurm-gcp-v6/instance/README.md | 11 +---- .../internal/slurm-gcp-v6/instance/main.tf | 35 +--------------- .../slurm-gcp-v6/instance/variables.tf | 42 ------------------- .../slurm-gcp-v6/instance/versions.tf | 4 -- .../slurm-gcp-v6/instance_template/README.md | 3 +- .../slurm-gcp-v6/instance_template/main.tf | 18 +++----- .../instance_template/variables.tf | 21 +--------- .../internal_instance_template/README.md | 4 +- .../internal_instance_template/main.tf | 3 +- .../internal_instance_template/variables.tf | 21 ---------- .../schedmd-slurm-gcp-v6-controller/login.tf | 10 ++--- 11 files changed, 16 insertions(+), 156 deletions(-) diff --git a/community/modules/internal/slurm-gcp-v6/instance/README.md b/community/modules/internal/slurm-gcp-v6/instance/README.md index fadb65bac6..ae8462d763 100644 --- a/community/modules/internal/slurm-gcp-v6/instance/README.md +++ b/community/modules/internal/slurm-gcp-v6/instance/README.md @@ -48,7 +48,6 @@ limitations under the License. |------|---------| | [terraform](#requirement\_terraform) | ~> 1.0 | | [google](#requirement\_google) | >= 3.43 | -| [local](#requirement\_local) | ~> 2.0 | | [null](#requirement\_null) | ~> 3.0 | ## Providers @@ -56,7 +55,6 @@ limitations under the License. | Name | Version | |------|---------| | [google](#provider\_google) | >= 3.43 | -| [local](#provider\_local) | ~> 2.0 | | [null](#provider\_null) | ~> 3.0 | ## Modules @@ -71,27 +69,20 @@ No modules. | [null_resource.replace_trigger](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [google_compute_instance_template.base](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_instance_template) | data source | | [google_compute_zones.available](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_zones) | data source | -| [local_file.startup](https://registry.terraform.io/providers/hashicorp/local/latest/docs/data-sources/file) | data source | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet. |
list(object({
nat_ip = string
network_tier = string
}))
| `[]` | no | -| [add\_hostname\_suffix](#input\_add\_hostname\_suffix) | Adds a suffix to the hostname | `bool` | `true` | no | | [additional\_networks](#input\_additional\_networks) | Additional network interface details for GCE, if any. |
list(object({
access_config = optional(list(object({
nat_ip = string
network_tier = string
})), [])
alias_ip_range = optional(list(object({
ip_cidr_range = string
subnetwork_range_name = string
})), [])
ipv6_access_config = optional(list(object({
network_tier = string
})), [])
network = optional(string)
network_ip = optional(string, "")
nic_type = optional(string)
queue_count = optional(number)
stack_type = optional(string)
subnetwork = optional(string)
subnetwork_project = optional(string)
}))
| `[]` | no | -| [hostname](#input\_hostname) | Hostname of instances | `string` | `""` | no | -| [hostname\_suffix\_separator](#input\_hostname\_suffix\_separator) | Separator character to compose hostname when add\_hostname\_suffix is set to true. | `string` | `"-"` | no | +| [hostname](#input\_hostname) | Hostname of instances | `string` | n/a | yes | | [instance\_template](#input\_instance\_template) | Instance template self\_link used to create compute instances | `string` | n/a | yes | -| [labels](#input\_labels) | Labels, provided as a map. Merged and takes precedence over labels on instance template | `map(string)` | `{}` | no | -| [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | | [network](#input\_network) | Network to deploy to. Only one of network or subnetwork should be specified. | `string` | `""` | no | | [num\_instances](#input\_num\_instances) | Number of instances to create. This value is ignored if static\_ips is provided. | `number` | `1` | no | | [project\_id](#input\_project\_id) | The GCP project ID | `string` | `null` | no | | [region](#input\_region) | Region where the instances should be created. | `string` | `null` | no | | [replace\_trigger](#input\_replace\_trigger) | Trigger value to replace the instances. | `string` | `""` | no | -| [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Cluster name, used for resource naming. | `string` | n/a | yes | -| [slurm\_instance\_role](#input\_slurm\_instance\_role) | Slurm instance type. Must be one of: controller; login; compute. | `string` | `null` | no | | [static\_ips](#input\_static\_ips) | List of static IPs for VM instances | `list(string)` | `[]` | no | | [subnetwork](#input\_subnetwork) | Subnet to deploy to. Only one of network or subnetwork should be specified. | `string` | `""` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The project that subnetwork belongs to | `string` | `null` | no | diff --git a/community/modules/internal/slurm-gcp-v6/instance/main.tf b/community/modules/internal/slurm-gcp-v6/instance/main.tf index 749ca9d3b3..5f3ce6e0df 100644 --- a/community/modules/internal/slurm-gcp-v6/instance/main.tf +++ b/community/modules/internal/slurm-gcp-v6/instance/main.tf @@ -20,20 +20,13 @@ ########## locals { - hostname = var.hostname == "" ? "default" : var.hostname num_instances = length(var.static_ips) == 0 ? var.num_instances : length(var.static_ips) # local.static_ips is the same as var.static_ips with a dummy element appended # at the end of the list to work around "list does not have any elements so cannot # determine type" error when var.static_ips is empty static_ips = concat(var.static_ips, ["NOT_AN_IP"]) -} - -################# -# LOCALS: SLURM # -################# -locals { network_interfaces = [for index in range(local.num_instances) : concat([ { @@ -52,9 +45,6 @@ locals { var.additional_networks ) ] - - slurm_instance_role = lower(var.slurm_instance_role) - } ################ @@ -71,10 +61,6 @@ data "google_compute_instance_template" "base" { name = var.instance_template } -data "local_file" "startup" { - filename = "${path.module}/../instance_template/files/startup_sh_unlinted" -} - ############# # INSTANCES # ############# @@ -86,7 +72,7 @@ resource "null_resource" "replace_trigger" { resource "google_compute_instance_from_template" "slurm_instance" { count = local.num_instances - name = var.add_hostname_suffix ? format("%s%s%s", local.hostname, var.hostname_suffix_separator, format("%03d", count.index + 1)) : local.hostname + name = format("%s-%s", var.hostname, format("%03d", count.index + 1)) project = var.project_id zone = var.zone == null ? data.google_compute_zones.available.names[count.index % length(data.google_compute_zones.available.names)] : var.zone @@ -128,25 +114,6 @@ resource "google_compute_instance_from_template" "slurm_instance" { source_instance_template = data.google_compute_instance_template.base.self_link - # Slurm - labels = merge( - data.google_compute_instance_template.base.labels, - var.labels, - { - slurm_cluster_name = var.slurm_cluster_name - slurm_instance_role = local.slurm_instance_role - }, - ) - metadata = merge( - data.google_compute_instance_template.base.metadata, - var.metadata, - { - slurm_cluster_name = var.slurm_cluster_name - slurm_instance_role = local.slurm_instance_role - startup-script = data.local_file.startup.content - }, - ) - lifecycle { replace_triggered_by = [null_resource.replace_trigger.id] } diff --git a/community/modules/internal/slurm-gcp-v6/instance/variables.tf b/community/modules/internal/slurm-gcp-v6/instance/variables.tf index 697d5c4b98..11111a2c05 100644 --- a/community/modules/internal/slurm-gcp-v6/instance/variables.tf +++ b/community/modules/internal/slurm-gcp-v6/instance/variables.tf @@ -42,13 +42,6 @@ variable "subnetwork_project" { variable "hostname" { description = "Hostname of instances" type = string - default = "" -} - -variable "add_hostname_suffix" { - description = "Adds a suffix to the hostname" - type = bool - default = true } variable "additional_networks" { @@ -115,45 +108,10 @@ variable "zone" { default = null } -variable "hostname_suffix_separator" { - description = "Separator character to compose hostname when add_hostname_suffix is set to true." - type = string - default = "-" -} - -variable "metadata" { - type = map(string) - description = "Metadata, provided as a map" - default = {} -} - -variable "labels" { - type = map(string) - description = "Labels, provided as a map. Merged and takes precedence over labels on instance template" - default = {} -} - ######### # SLURM # ######### -variable "slurm_instance_role" { - description = "Slurm instance type. Must be one of: controller; login; compute." - type = string - default = null - - validation { - condition = contains(["controller", "login", "compute"], lower(var.slurm_instance_role)) - error_message = "Must be one of: controller; login; compute." - } -} - -variable "slurm_cluster_name" { - description = "Cluster name, used for resource naming." - type = string -} - - variable "replace_trigger" { description = "Trigger value to replace the instances." type = string diff --git a/community/modules/internal/slurm-gcp-v6/instance/versions.tf b/community/modules/internal/slurm-gcp-v6/instance/versions.tf index 293a1ef8ca..a3e84c09bf 100644 --- a/community/modules/internal/slurm-gcp-v6/instance/versions.tf +++ b/community/modules/internal/slurm-gcp-v6/instance/versions.tf @@ -23,10 +23,6 @@ terraform { source = "hashicorp/google" version = ">= 3.43" } - local = { - source = "hashicorp/local" - version = "~> 2.0" - } null = { source = "hashicorp/null" version = "~> 3.0" diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/README.md b/community/modules/internal/slurm-gcp-v6/instance_template/README.md index 8cef4311ca..0cd784b0c4 100644 --- a/community/modules/internal/slurm-gcp-v6/instance_template/README.md +++ b/community/modules/internal/slurm-gcp-v6/instance_template/README.md @@ -53,12 +53,11 @@ | [preemptible](#input\_preemptible) | Allow the instance to be preempted. | `bool` | `false` | no | | [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes | | [region](#input\_region) | Region where the instance template should be created. | `string` | `null` | no | -| [resource\_policies](#input\_resource\_policies) | A list of self\_links of resource policies to attach to the instance.
Currently a max of 1 resource policy is supported. | `list(string)` | `null` | no | | [service\_account](#input\_service\_account) | Service account to attach to the instances. See
'main.tf:local.service\_account' for the default. |
object({
email = string
scopes = set(string)
})
| `null` | no | | [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
enable\_shielded\_vm is 'true'.
- enable\_integrity\_monitoring : Compare the most recent boot measurements to the
integrity policy baseline and return a pair of pass/fail results depending on
whether they match or not.
- enable\_secure\_boot : Verify the digital signature of all boot components, and
halt the boot process if signature verification fails.
- enable\_vtpm : Use a virtualized trusted platform module, which is a
specialized computer chip you can use to encrypt objects like keys and
certificates. |
object({
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
|
{
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
| no | | [slurm\_bucket\_path](#input\_slurm\_bucket\_path) | GCS Bucket URI of Slurm cluster file storage. | `string` | n/a | yes | | [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Cluster name, used for resource naming. | `string` | n/a | yes | -| [slurm\_instance\_role](#input\_slurm\_instance\_role) | Slurm instance type. Must be one of: controller; login; compute; or null. | `string` | `null` | no | +| [slurm\_instance\_role](#input\_slurm\_instance\_role) | Slurm instance type. Must be one of: controller; login; compute; or null. | `string` | n/a | yes | | [source\_image](#input\_source\_image) | Source disk image. | `string` | `""` | no | | [source\_image\_family](#input\_source\_image\_family) | Source image family. | `string` | `""` | no | | [source\_image\_project](#input\_source\_image\_project) | Project where the source image comes from. If it is not provided, the provider project is used. | `string` | `""` | no | diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/main.tf b/community/modules/internal/slurm-gcp-v6/instance_template/main.tf index 64c4caa0a6..70846ed020 100644 --- a/community/modules/internal/slurm-gcp-v6/instance_template/main.tf +++ b/community/modules/internal/slurm-gcp-v6/instance_template/main.tf @@ -29,7 +29,7 @@ locals { disk.disk_labels, { slurm_cluster_name = var.slurm_cluster_name - slurm_instance_role = local.slurm_instance_role + slurm_instance_role = var.slurm_instance_role }, ) } @@ -57,13 +57,8 @@ locals { : "" ) - slurm_instance_role = var.slurm_instance_role != null ? lower(var.slurm_instance_role) : null - name_prefix = ( - local.slurm_instance_role != null - ? "${var.slurm_cluster_name}-${local.slurm_instance_role}-${var.name_prefix}" - : "${var.slurm_cluster_name}-${var.name_prefix}" - ) + name_prefix = "${var.slurm_cluster_name}-${var.slurm_instance_role}-${var.name_prefix}" total_egress_bandwidth_tier = var.bandwidth_tier == "tier_1_enabled" ? "TIER_1" : "DEFAULT" @@ -123,7 +118,7 @@ module "instance_template" { var.labels, { slurm_cluster_name = var.slurm_cluster_name - slurm_instance_role = local.slurm_instance_role + slurm_instance_role = var.slurm_instance_role }, ) instance_termination_action = var.termination_action @@ -136,7 +131,7 @@ module "instance_template" { enable-oslogin = upper(var.enable_oslogin) slurm_bucket_path = var.slurm_bucket_path slurm_cluster_name = var.slurm_cluster_name - slurm_instance_role = local.slurm_instance_role + slurm_instance_role = var.slurm_instance_role }, ) @@ -152,10 +147,9 @@ module "instance_template" { disk_labels = merge( { slurm_cluster_name = var.slurm_cluster_name - slurm_instance_role = local.slurm_instance_role + slurm_instance_role = var.slurm_instance_role }, var.disk_labels, ) - additional_disks = local.additional_disks - resource_policies = var.resource_policies + additional_disks = local.additional_disks } diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/variables.tf b/community/modules/internal/slurm-gcp-v6/instance_template/variables.tf index e8393e9654..d9ff5591d4 100644 --- a/community/modules/internal/slurm-gcp-v6/instance_template/variables.tf +++ b/community/modules/internal/slurm-gcp-v6/instance_template/variables.tf @@ -340,14 +340,10 @@ variable "additional_disks" { variable "slurm_instance_role" { type = string description = "Slurm instance type. Must be one of: controller; login; compute; or null." - default = null validation { - condition = ( - var.slurm_instance_role == null - ? true - : contains(["controller", "login", "compute"], lower(var.slurm_instance_role))) - error_message = "Must be one of: controller; login; compute; or null." + condition = contains(["controller", "login", "compute"], var.slurm_instance_role) + error_message = "Must be one of: controller; login; compute." } } @@ -371,16 +367,3 @@ variable "slurm_bucket_path" { description = "GCS Bucket URI of Slurm cluster file storage." type = string } - -variable "resource_policies" { - description = <<-EOD - A list of self_links of resource policies to attach to the instance. - Currently a max of 1 resource policy is supported. - EOD - type = list(string) - default = null - validation { - condition = try(length(var.resource_policies) <= 1, true) - error_message = "Only one resource policy can be attached to the instance." - } -} diff --git a/community/modules/internal/slurm-gcp-v6/internal_instance_template/README.md b/community/modules/internal/slurm-gcp-v6/internal_instance_template/README.md index eff0d37a02..333886091b 100644 --- a/community/modules/internal/slurm-gcp-v6/internal_instance_template/README.md +++ b/community/modules/internal/slurm-gcp-v6/internal_instance_template/README.md @@ -41,7 +41,6 @@ No modules. | [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB | `string` | `"100"` | no | | [disk\_type](#input\_disk\_type) | Boot disk type, can be either pd-ssd, local-ssd, or pd-standard | `string` | `"pd-standard"` | no | | [enable\_confidential\_vm](#input\_enable\_confidential\_vm) | Whether to enable the Confidential VM configuration on the instance. Note that the instance image must support Confidential VMs. See https://cloud.google.com/compute/docs/images | `bool` | `false` | no | -| [enable\_nested\_virtualization](#input\_enable\_nested\_virtualization) | Defines whether the instance should have nested virtualization enabled. | `bool` | `false` | no | | [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Whether to enable the Shielded VM configuration on the instance. Note that the instance image must support Shielded VMs. See https://cloud.google.com/compute/docs/images | `bool` | `false` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See https://cloud.google.com/compute/docs/gpus more details |
object({
type = string
count = number
})
| `null` | no | | [instance\_termination\_action](#input\_instance\_termination\_action) | Which action to take when Compute Engine preempts the VM. Value can be: 'STOP', 'DELETE'. The default value is 'STOP'.
See https://cloud.google.com/compute/docs/instances/spot for more details. | `string` | `"STOP"` | no | @@ -50,7 +49,7 @@ No modules. | [machine\_type](#input\_machine\_type) | Machine type to create, e.g. n1-standard-1 | `string` | `"n1-standard-1"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | | [min\_cpu\_platform](#input\_min\_cpu\_platform) | Specifies a minimum CPU platform. Applicable values are the friendly names of CPU platforms, such as Intel Haswell or Intel Skylake. See the complete list: https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform | `string` | `null` | no | -| [name\_prefix](#input\_name\_prefix) | Name prefix for the instance template | `string` | `"default-instance-template"` | no | +| [name\_prefix](#input\_name\_prefix) | Name prefix for the instance template | `string` | n/a | yes | | [network](#input\_network) | The name or self\_link of the network to attach this interface to. Use network attribute for Legacy or Auto subnetted networks and subnetwork for custom subnetted networks. | `string` | `""` | no | | [network\_ip](#input\_network\_ip) | Private IP address to assign to the instance if desired. | `string` | `""` | no | | [nic\_type](#input\_nic\_type) | The type of vNIC to be used on this interface. Possible values: GVNIC, VIRTIO\_NET. | `string` | `null` | no | @@ -58,7 +57,6 @@ No modules. | [preemptible](#input\_preemptible) | Allow the instance to be preempted | `bool` | `false` | no | | [project\_id](#input\_project\_id) | The GCP project ID | `string` | `null` | no | | [region](#input\_region) | Region where the instance template should be created. | `string` | `null` | no | -| [resource\_policies](#input\_resource\_policies) | A list of self\_links of resource policies to attach to the instance.
Currently a max of 1 resource policy is supported. | `list(string)` | `null` | no | | [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template#service_account. |
object({
email = optional(string)
scopes = set(string)
})
| n/a | yes | | [shielded\_instance\_config](#input\_shielded\_instance\_config) | Not used unless enable\_shielded\_vm is true. Shielded VM configuration for the instance. |
object({
enable_secure_boot = bool
enable_vtpm = bool
enable_integrity_monitoring = bool
})
|
{
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
| no | | [source\_image](#input\_source\_image) | Source disk image. If neither source\_image nor source\_image\_family is specified, defaults to the latest public CentOS image. | `string` | `""` | no | diff --git a/community/modules/internal/slurm-gcp-v6/internal_instance_template/main.tf b/community/modules/internal/slurm-gcp-v6/internal_instance_template/main.tf index be1fdd600e..eef402fafa 100644 --- a/community/modules/internal/slurm-gcp-v6/internal_instance_template/main.tf +++ b/community/modules/internal/slurm-gcp-v6/internal_instance_template/main.tf @@ -75,7 +75,6 @@ resource "google_compute_instance_template" "tpl" { metadata_startup_script = var.startup_script region = var.region min_cpu_platform = var.min_cpu_platform - resource_policies = var.resource_policies service_account { email = coalesce(var.service_account.email, "${data.google_project.this.number}-compute@developer.gserviceaccount.com") @@ -177,7 +176,7 @@ resource "google_compute_instance_template" "tpl" { } advanced_machine_features { - enable_nested_virtualization = var.enable_nested_virtualization + enable_nested_virtualization = false threads_per_core = var.threads_per_core } diff --git a/community/modules/internal/slurm-gcp-v6/internal_instance_template/variables.tf b/community/modules/internal/slurm-gcp-v6/internal_instance_template/variables.tf index 874fcf51bf..78a178038e 100644 --- a/community/modules/internal/slurm-gcp-v6/internal_instance_template/variables.tf +++ b/community/modules/internal/slurm-gcp-v6/internal_instance_template/variables.tf @@ -21,7 +21,6 @@ variable "project_id" { variable "name_prefix" { description = "Name prefix for the instance template" type = string - default = "default-instance-template" } variable "machine_type" { @@ -96,12 +95,6 @@ variable "region" { default = null } -variable "enable_nested_virtualization" { - type = bool - description = "Defines whether the instance should have nested virtualization enabled." - default = false -} - variable "threads_per_core" { description = "The number of threads per physical core. To disable simultaneous multithreading (SMT) set this to 1." type = number @@ -348,17 +341,3 @@ EOF }) default = null } - - -variable "resource_policies" { - description = <<-EOD - A list of self_links of resource policies to attach to the instance. - Currently a max of 1 resource policy is supported. - EOD - type = list(string) - default = null - validation { - condition = try(length(var.resource_policies) <= 1, true) - error_message = "Only one resource policy can be attached to the instance." - } -} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf index 1f492a1402..cfb61787cb 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf @@ -59,16 +59,12 @@ module "slurm_login_instance" { source = "../../internal/slurm-gcp-v6/instance" for_each = { for x in var.login_nodes : x.name_prefix => x } - access_config = each.value.access_config - add_hostname_suffix = true - hostname = "${local.slurm_cluster_name}-${each.key}" - slurm_instance_role = "login" + access_config = each.value.access_config + hostname = "${local.slurm_cluster_name}-${each.key}" - project_id = var.project_id - slurm_cluster_name = local.slurm_cluster_name + project_id = var.project_id instance_template = module.slurm_login_template[each.key].self_link - labels = each.value.labels num_instances = each.value.num_instances additional_networks = each.value.additional_networks From 1b1615ebcadd012644b8d044f9cb486a6400e9a7 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Fri, 13 Dec 2024 08:43:16 +0000 Subject: [PATCH 016/140] Initial changes for fixing gke module provider --- examples/storage-gke.yaml | 29 +++++++++++++++++++ .../gke-persistent-volume/README.md | 5 ---- .../file-system/gke-persistent-volume/main.tf | 17 ----------- .../gke-persistent-volume/variables.tf | 5 ---- .../gke-persistent-volume/versions.tf | 4 --- modules/management/kubectl-apply/README.md | 7 ----- modules/management/kubectl-apply/main.tf | 25 +++------------- modules/management/kubectl-apply/providers.tf | 8 ----- modules/management/kubectl-apply/variables.tf | 11 ------- modules/management/kubectl-apply/versions.tf | 8 ----- modules/scheduler/gke-cluster/README.md | 3 ++ modules/scheduler/gke-cluster/main.tf | 3 -- modules/scheduler/gke-cluster/outputs.tf | 15 ++++++++++ .../pre-existing-gke-cluster/README.md | 4 +++ .../pre-existing-gke-cluster/main.tf | 5 ++-- .../pre-existing-gke-cluster/outputs.tf | 15 ++++++++++ pkg/config/expand.go | 18 ++++++++++++ 17 files changed, 90 insertions(+), 92 deletions(-) diff --git a/examples/storage-gke.yaml b/examples/storage-gke.yaml index faa587b046..d810b2bad0 100644 --- a/examples/storage-gke.yaml +++ b/examples/storage-gke.yaml @@ -26,6 +26,35 @@ vars: deployment_groups: - group: primary + # terraform_providers: + # kubectl: + # source: "gavinbunney/kubectl" + # version: ">= 1.7.0" + # configuration: + # host: $(gke_cluster.gke_cluster_endpoint) + # cluster_ca_certificate: $(gke_cluster.cluster_ca_certificate) + # token: $(gke_cluster.access_token) + # load_config_file: false + # apply_retry_count: 15 + # # host: "module.gke_cluster.gke_cluster_endpoint" + # # cluster_ca_certificate: "module.gke_cluster.cluster_ca_certificate" + # # token: "module.gke_cluster.access_token" + # # load_config_file: false + # # apply_retry_count: 15 + # google: + # source: hashicorp/google + # version: 6.12.0 + # configuration: + # project: $(vars.project_id) + # region: $(vars.region) + # zone: $(vars.zone) + # google-beta: + # source: hashicorp/google-beta + # version: 6.13.0 + # configuration: + # project: $(vars.project_id) + # region: $(vars.region) + # zone: $(vars.zone) modules: - id: network1 source: modules/network/vpc diff --git a/modules/file-system/gke-persistent-volume/README.md b/modules/file-system/gke-persistent-volume/README.md index f4d94d8c3b..f53eb3067b 100644 --- a/modules/file-system/gke-persistent-volume/README.md +++ b/modules/file-system/gke-persistent-volume/README.md @@ -121,7 +121,6 @@ limitations under the License. | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 1.0 | -| [google](#requirement\_google) | >= 4.42 | | [kubectl](#requirement\_kubectl) | >= 1.7.0 | | [local](#requirement\_local) | >= 2.0.0 | @@ -129,7 +128,6 @@ limitations under the License. | Name | Version | |------|---------| -| [google](#provider\_google) | >= 4.42 | | [kubectl](#provider\_kubectl) | >= 1.7.0 | | [local](#provider\_local) | >= 2.0.0 | @@ -144,15 +142,12 @@ No modules. | [kubectl_manifest.pv](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | | [kubectl_manifest.pvc](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | | [local_file.debug_file](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource | -| [google_client_config.default](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_config) | data source | -| [google_container_cluster.gke_cluster](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_cluster) | data source | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [capacity\_gb](#input\_capacity\_gb) | The storage capacity with which to create the persistent volume. | `number` | n/a | yes | -| [cluster\_id](#input\_cluster\_id) | An identifier for the GKE cluster in the format `projects/{{project}}/locations/{{location}}/clusters/{{cluster}}` | `string` | n/a | yes | | [filestore\_id](#input\_filestore\_id) | An identifier for a filestore with the format `projects/{{project}}/locations/{{location}}/instances/{{name}}`. | `string` | `null` | no | | [gcs\_bucket\_name](#input\_gcs\_bucket\_name) | The gcs bucket to be used with the persistent volume. | `string` | `null` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | diff --git a/modules/file-system/gke-persistent-volume/main.tf b/modules/file-system/gke-persistent-volume/main.tf index 5b52bcc950..4812a799e2 100644 --- a/modules/file-system/gke-persistent-volume/main.tf +++ b/modules/file-system/gke-persistent-volume/main.tf @@ -77,9 +77,6 @@ locals { capacity = "${var.capacity_gb}Gi" } ) - - cluster_name = split("/", var.cluster_id)[5] - cluster_location = split("/", var.cluster_id)[3] } resource "local_file" "debug_file" { @@ -90,20 +87,6 @@ resource "local_file" "debug_file" { filename = "${path.root}/pv-pvc-debug-file-${local.filestore_name}.yaml" } -data "google_container_cluster" "gke_cluster" { - name = local.cluster_name - location = local.cluster_location -} - -data "google_client_config" "default" {} - -provider "kubectl" { - host = "https://${data.google_container_cluster.gke_cluster.endpoint}" - cluster_ca_certificate = base64decode(data.google_container_cluster.gke_cluster.master_auth[0].cluster_ca_certificate) - token = data.google_client_config.default.access_token - load_config_file = false -} - resource "kubectl_manifest" "pv" { yaml_body = local.is_gcs ? local.gcs_pv_contents : local.filestore_pv_contents diff --git a/modules/file-system/gke-persistent-volume/variables.tf b/modules/file-system/gke-persistent-volume/variables.tf index a72fa3857f..ebf411d593 100644 --- a/modules/file-system/gke-persistent-volume/variables.tf +++ b/modules/file-system/gke-persistent-volume/variables.tf @@ -14,11 +14,6 @@ * limitations under the License. */ -variable "cluster_id" { - description = "An identifier for the GKE cluster in the format `projects/{{project}}/locations/{{location}}/clusters/{{cluster}}`" - type = string -} - variable "network_storage" { description = "Network attached storage mount to be configured." type = object({ diff --git a/modules/file-system/gke-persistent-volume/versions.tf b/modules/file-system/gke-persistent-volume/versions.tf index 9aa0deab4c..d0a426f723 100644 --- a/modules/file-system/gke-persistent-volume/versions.tf +++ b/modules/file-system/gke-persistent-volume/versions.tf @@ -15,10 +15,6 @@ terraform { required_version = ">= 1.0" required_providers { - google = { - source = "hashicorp/google" - version = ">= 4.42" - } kubectl = { source = "gavinbunney/kubectl" version = ">= 1.7.0" diff --git a/modules/management/kubectl-apply/README.md b/modules/management/kubectl-apply/README.md index 360929da22..02812a2d96 100644 --- a/modules/management/kubectl-apply/README.md +++ b/modules/management/kubectl-apply/README.md @@ -101,15 +101,12 @@ limitations under the License. | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 1.3 | -| [google](#requirement\_google) | > 5.0 | | [http](#requirement\_http) | ~> 3.0 | -| [kubectl](#requirement\_kubectl) | >= 1.7.0 | ## Providers | Name | Version | |------|---------| -| [google](#provider\_google) | > 5.0 | | [terraform](#provider\_terraform) | n/a | ## Modules @@ -127,18 +124,14 @@ limitations under the License. |------|------| | [terraform_data.jobset_validations](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | | [terraform_data.kueue_validations](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | -| [google_client_config.default](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_config) | data source | -| [google_container_cluster.gke_cluster](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_cluster) | data source | ## Inputs | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [apply\_manifests](#input\_apply\_manifests) | A list of manifests to apply to GKE cluster using kubectl. For more details see [kubectl module's inputs](kubectl/README.md). |
list(object({
content = optional(string, null)
source = optional(string, null)
template_vars = optional(map(any), null)
server_side_apply = optional(bool, false)
wait_for_rollout = optional(bool, true)
}))
| `[]` | no | -| [cluster\_id](#input\_cluster\_id) | An identifier for the gke cluster resource with format projects//locations//clusters/. | `string` | n/a | yes | | [jobset](#input\_jobset) | Install [Jobset](https://github.com/kubernetes-sigs/jobset) which manages a group of K8s [jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) as a unit. |
object({
install = optional(bool, false)
version = optional(string, "v0.5.2")
})
| `{}` | no | | [kueue](#input\_kueue) | Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config\_path to be applied right after kueue installation. If a template file provided, its variables can be set to config\_template\_vars. |
object({
install = optional(bool, false)
version = optional(string, "v0.8.1")
config_path = optional(string, null)
config_template_vars = optional(map(any), null)
})
| `{}` | no | -| [project\_id](#input\_project\_id) | The project ID that hosts the gke cluster. | `string` | n/a | yes | ## Outputs diff --git a/modules/management/kubectl-apply/main.tf b/modules/management/kubectl-apply/main.tf index 5663e01580..1f02677ff0 100644 --- a/modules/management/kubectl-apply/main.tf +++ b/modules/management/kubectl-apply/main.tf @@ -15,11 +15,6 @@ */ locals { - cluster_id_parts = split("/", var.cluster_id) - cluster_name = local.cluster_id_parts[5] - cluster_location = local.cluster_id_parts[3] - project_id = var.project_id != null ? var.project_id : local.cluster_id_parts[1] - apply_manifests_map = tomap({ for index, manifest in var.apply_manifests : index => manifest }) @@ -30,14 +25,6 @@ locals { jobset_install_source = format("${path.module}/manifests/jobset-%s.yaml", try(var.jobset.version, "")) } -data "google_container_cluster" "gke_cluster" { - project = local.project_id - name = local.cluster_name - location = local.cluster_location -} - -data "google_client_config" "default" {} - module "kubectl_apply_manifests" { for_each = local.apply_manifests_map source = "./kubectl" @@ -49,8 +36,7 @@ module "kubectl_apply_manifests" { wait_for_rollout = each.value.wait_for_rollout providers = { - kubectl = kubectl - http = http.h + http = http.h } } @@ -60,8 +46,7 @@ module "install_kueue" { server_side_apply = true providers = { - kubectl = kubectl - http = http.h + http = http.h } } @@ -71,8 +56,7 @@ module "install_jobset" { server_side_apply = true providers = { - kubectl = kubectl - http = http.h + http = http.h } } @@ -86,7 +70,6 @@ module "configure_kueue" { wait_for_rollout = true providers = { - kubectl = kubectl - http = http.h + http = http.h } } diff --git a/modules/management/kubectl-apply/providers.tf b/modules/management/kubectl-apply/providers.tf index 74d157b93b..d5577975f3 100644 --- a/modules/management/kubectl-apply/providers.tf +++ b/modules/management/kubectl-apply/providers.tf @@ -14,14 +14,6 @@ * limitations under the License. */ -provider "kubectl" { - host = "https://${data.google_container_cluster.gke_cluster.endpoint}" - token = data.google_client_config.default.access_token - cluster_ca_certificate = base64decode(data.google_container_cluster.gke_cluster.master_auth[0].cluster_ca_certificate) - load_config_file = false - apply_retry_count = 15 # Terraform may apply resources in parallel, leading to potential dependency issues. This retry mechanism ensures that if a resource's dependencies aren't ready, Terraform will attempt to apply it again. -} - provider "http" { alias = "h" } diff --git a/modules/management/kubectl-apply/variables.tf b/modules/management/kubectl-apply/variables.tf index c493332e7c..7a4f54a0a9 100644 --- a/modules/management/kubectl-apply/variables.tf +++ b/modules/management/kubectl-apply/variables.tf @@ -37,17 +37,6 @@ resource "terraform_data" "jobset_validations" { } } -variable "project_id" { - description = "The project ID that hosts the gke cluster." - type = string -} - -variable "cluster_id" { - description = "An identifier for the gke cluster resource with format projects//locations//clusters/." - type = string - nullable = false -} - variable "apply_manifests" { description = "A list of manifests to apply to GKE cluster using kubectl. For more details see [kubectl module's inputs](kubectl/README.md)." type = list(object({ diff --git a/modules/management/kubectl-apply/versions.tf b/modules/management/kubectl-apply/versions.tf index 227838747c..774cddd7ff 100644 --- a/modules/management/kubectl-apply/versions.tf +++ b/modules/management/kubectl-apply/versions.tf @@ -16,14 +16,6 @@ terraform { required_providers { - google = { - source = "hashicorp/google" - version = "> 5.0" - } - kubectl = { - source = "gavinbunney/kubectl" - version = ">= 1.7.0" - } http = { source = "hashicorp/http" version = "~> 3.0" diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index 675039add6..1ac653f698 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -197,7 +197,10 @@ limitations under the License. | Name | Description | |------|-------------| +| [access\_token](#output\_access\_token) | Google client config access token. | +| [cluster\_ca\_certificate](#output\_cluster\_ca\_certificate) | GKE cluster CA certificate. | | [cluster\_id](#output\_cluster\_id) | An identifier for the resource with format projects/{{project\_id}}/locations/{{region}}/clusters/{{name}}. | +| [gke\_cluster\_endpoint](#output\_gke\_cluster\_endpoint) | GKE cluster endpoint. | | [gke\_cluster\_exists](#output\_gke\_cluster\_exists) | A static flag that signals to downstream modules that a cluster has been created. Needed by community/modules/scripts/kubernetes-operations. | | [gke\_version](#output\_gke\_version) | GKE cluster's version. | | [instructions](#output\_instructions) | Instructions on how to connect to the created cluster. | diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 5b416a85bb..55188acb6b 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -336,9 +336,6 @@ module "workload_identity" { module "kubectl_apply" { source = "../../management/kubectl-apply" - cluster_id = google_container_cluster.gke_cluster.id - project_id = var.project_id - apply_manifests = flatten([ for idx, network_info in var.additional_networks : [ { diff --git a/modules/scheduler/gke-cluster/outputs.tf b/modules/scheduler/gke-cluster/outputs.tf index 28e00171ff..9ffd370664 100644 --- a/modules/scheduler/gke-cluster/outputs.tf +++ b/modules/scheduler/gke-cluster/outputs.tf @@ -79,3 +79,18 @@ output "gke_version" { description = "GKE cluster's version." value = google_container_cluster.gke_cluster.master_version } + +output "gke_cluster_endpoint" { + description = "GKE cluster endpoint." + value = "https://${google_container_cluster.gke_cluster.endpoint}" +} + +output "cluster_ca_certificate" { + description = "GKE cluster CA certificate." + value = base64decode(google_container_cluster.gke_cluster.master_auth[0].cluster_ca_certificate) +} + +output "access_token" { + description = "Google client config access token." + value = data.google_client_config.default.access_token +} diff --git a/modules/scheduler/pre-existing-gke-cluster/README.md b/modules/scheduler/pre-existing-gke-cluster/README.md index aaac5a547f..4faed93294 100644 --- a/modules/scheduler/pre-existing-gke-cluster/README.md +++ b/modules/scheduler/pre-existing-gke-cluster/README.md @@ -94,6 +94,7 @@ limitations under the License. | Name | Type | |------|------| +| [google_client_config.default](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/client_config) | data source | | [google_container_cluster.existing_gke_cluster](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/container_cluster) | data source | ## Inputs @@ -110,7 +111,10 @@ limitations under the License. | Name | Description | |------|-------------| +| [access\_token](#output\_access\_token) | Google client config access token. | +| [cluster\_ca\_certificate](#output\_cluster\_ca\_certificate) | GKE cluster CA certificate. | | [cluster\_id](#output\_cluster\_id) | An identifier for the gke cluster with format projects/{{project\_id}}/locations/{{region}}/clusters/{{name}}. | +| [gke\_cluster\_endpoint](#output\_gke\_cluster\_endpoint) | GKE cluster endpoint. | | [gke\_cluster\_exists](#output\_gke\_cluster\_exists) | A static flag that signals to downstream modules that a cluster exists. | | [gke\_version](#output\_gke\_version) | GKE cluster's version. | diff --git a/modules/scheduler/pre-existing-gke-cluster/main.tf b/modules/scheduler/pre-existing-gke-cluster/main.tf index 926d2be100..e90c8877ed 100644 --- a/modules/scheduler/pre-existing-gke-cluster/main.tf +++ b/modules/scheduler/pre-existing-gke-cluster/main.tf @@ -60,11 +60,10 @@ locals { ]) } +data "google_client_config" "default" {} + module "kubectl_apply" { source = "../../management/kubectl-apply" - cluster_id = data.google_container_cluster.existing_gke_cluster.id - project_id = var.project_id - apply_manifests = concat(local.apply_manifests_non_rdma_networks, local.apply_manifests_rdma_networks) } diff --git a/modules/scheduler/pre-existing-gke-cluster/outputs.tf b/modules/scheduler/pre-existing-gke-cluster/outputs.tf index 8884ee30b0..cab4bf0b22 100644 --- a/modules/scheduler/pre-existing-gke-cluster/outputs.tf +++ b/modules/scheduler/pre-existing-gke-cluster/outputs.tf @@ -31,3 +31,18 @@ output "gke_version" { description = "GKE cluster's version." value = data.google_container_cluster.existing_gke_cluster.master_version } + +output "gke_cluster_endpoint" { + description = "GKE cluster endpoint." + value = "https://${data.google_container_cluster.existing_gke_cluster.endpoint}" +} + +output "cluster_ca_certificate" { + description = "GKE cluster CA certificate." + value = base64decode(data.google_container_cluster.existing_gke_cluster.master_auth[0].cluster_ca_certificate) +} + +output "access_token" { + description = "Google client config access token." + value = data.google_client_config.default.access_token +} diff --git a/pkg/config/expand.go b/pkg/config/expand.go index ae5c30a328..ffabf3b6ce 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -207,6 +207,23 @@ func getDefaultGoogleProviders(bp Blueprint) map[string]TerraformProvider { Configuration: gglConf}} } +// func getDefaultKubectlProviders() map[string]TerraformProvider { +// kubectlConf := Dict{} +// for s, v := range map[string]string{ +// "cluster_ca_certificate": "cluster_ca_certificate", +// "host": "gke_cluster_endpoint", +// "token": "access_token"} { +// kubectlConf = kubectlConf.With(s, ModuleRef("gke_cluster", v).AsValue()) +// } +// kubectlConf = kubectlConf.With("apply_retry_count", cty.NumberIntVal(15)) +// kubectlConf = kubectlConf.With("load_config_file", cty.BoolVal(false)) +// return map[string]TerraformProvider{ +// "kubectl": { +// Source: "gavinbunney/kubectl", +// Version: ">= 1.7.0", +// Configuration: kubectlConf}} +// } + func (bp Blueprint) expandProviders(grp *Group) { // 1. DEFAULT: use TerraformProviders provider dictionary (if supplied) // 2. If top-level TerraformProviders is defined, insert that @@ -216,6 +233,7 @@ func (bp Blueprint) expandProviders(grp *Group) { pv := &grp.TerraformProviders if defaults == nil { defaults = getDefaultGoogleProviders(bp) + // maps.Copy(defaults, getDefaultKubectlProviders()) } if (*pv) == nil { (*pv) = maps.Clone(defaults) From ef89293bc4431289e6fd584952d4e524374270d4 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Fri, 13 Dec 2024 11:30:05 +0000 Subject: [PATCH 017/140] Further refined changes for gke root module --- examples/storage-gke.yaml | 53 ++++++++++++--------------- modules/compute/gke-node-pool/main.tf | 3 -- pkg/config/config.go | 9 +++-- pkg/config/expand.go | 18 --------- pkg/config/path.go | 22 +++++------ 5 files changed, 40 insertions(+), 65 deletions(-) diff --git a/examples/storage-gke.yaml b/examples/storage-gke.yaml index d810b2bad0..e2452e83aa 100644 --- a/examples/storage-gke.yaml +++ b/examples/storage-gke.yaml @@ -26,35 +26,30 @@ vars: deployment_groups: - group: primary - # terraform_providers: - # kubectl: - # source: "gavinbunney/kubectl" - # version: ">= 1.7.0" - # configuration: - # host: $(gke_cluster.gke_cluster_endpoint) - # cluster_ca_certificate: $(gke_cluster.cluster_ca_certificate) - # token: $(gke_cluster.access_token) - # load_config_file: false - # apply_retry_count: 15 - # # host: "module.gke_cluster.gke_cluster_endpoint" - # # cluster_ca_certificate: "module.gke_cluster.cluster_ca_certificate" - # # token: "module.gke_cluster.access_token" - # # load_config_file: false - # # apply_retry_count: 15 - # google: - # source: hashicorp/google - # version: 6.12.0 - # configuration: - # project: $(vars.project_id) - # region: $(vars.region) - # zone: $(vars.zone) - # google-beta: - # source: hashicorp/google-beta - # version: 6.13.0 - # configuration: - # project: $(vars.project_id) - # region: $(vars.region) - # zone: $(vars.zone) + terraform_providers: + kubectl: + source: "gavinbunney/kubectl" + version: ">= 1.7.0" + configuration: + host: $(gke_cluster.gke_cluster_endpoint) + cluster_ca_certificate: $(gke_cluster.cluster_ca_certificate) + token: $(gke_cluster.access_token) + load_config_file: false + apply_retry_count: 15 + google: + source: hashicorp/google + version: 6.12.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + google-beta: + source: hashicorp/google-beta + version: 6.13.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) modules: - id: network1 source: modules/network/vpc diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index f1999cbd0b..5d4bf02fb2 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -355,9 +355,6 @@ resource "null_resource" "enable_tcpxo_in_workload" { module "kubectl_apply" { source = "../../management/kubectl-apply" - cluster_id = var.cluster_id - project_id = var.project_id - apply_manifests = flatten([ for manifest in local.gpu_direct_setting.gpu_direct_manifests : [ { diff --git a/pkg/config/config.go b/pkg/config/config.go index df2192291f..099973f416 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -698,14 +698,15 @@ func (bp *Blueprint) checkToolkitModulesUrlAndVersion() error { func (bp *Blueprint) checkReferences() error { errs := Errors{} bp.visitDicts(func(dp dictPath, d *Dict) { - isModSettings := IsModuleSettingsPath(dp) + // isModSettings := IsModuleSettingsPath(dp) for k, v := range d.Items() { for ref, rp := range valueReferences(v) { path := dp.Dot(k).Cty(rp) + // fmt.Println("GlobalVar", ref.GlobalVar, "Name", ref.Name, "Module", ref.Module, "rp", rp, "path", path) if !ref.GlobalVar { - if !isModSettings { - errs.At(path, fmt.Errorf("module output %q can only be referenced in other module settings", ref)) - } + // if !isModSettings { + // errs.At(path, fmt.Errorf("module output %q can only be referenced in other module settings", ref)) + // } // module to module references are checked by validateModuleSettingReferences later return } diff --git a/pkg/config/expand.go b/pkg/config/expand.go index ffabf3b6ce..ae5c30a328 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -207,23 +207,6 @@ func getDefaultGoogleProviders(bp Blueprint) map[string]TerraformProvider { Configuration: gglConf}} } -// func getDefaultKubectlProviders() map[string]TerraformProvider { -// kubectlConf := Dict{} -// for s, v := range map[string]string{ -// "cluster_ca_certificate": "cluster_ca_certificate", -// "host": "gke_cluster_endpoint", -// "token": "access_token"} { -// kubectlConf = kubectlConf.With(s, ModuleRef("gke_cluster", v).AsValue()) -// } -// kubectlConf = kubectlConf.With("apply_retry_count", cty.NumberIntVal(15)) -// kubectlConf = kubectlConf.With("load_config_file", cty.BoolVal(false)) -// return map[string]TerraformProvider{ -// "kubectl": { -// Source: "gavinbunney/kubectl", -// Version: ">= 1.7.0", -// Configuration: kubectlConf}} -// } - func (bp Blueprint) expandProviders(grp *Group) { // 1. DEFAULT: use TerraformProviders provider dictionary (if supplied) // 2. If top-level TerraformProviders is defined, insert that @@ -233,7 +216,6 @@ func (bp Blueprint) expandProviders(grp *Group) { pv := &grp.TerraformProviders if defaults == nil { defaults = getDefaultGoogleProviders(bp) - // maps.Copy(defaults, getDefaultKubectlProviders()) } if (*pv) == nil { (*pv) = maps.Clone(defaults) diff --git a/pkg/config/path.go b/pkg/config/path.go index 7d84f449d4..d0869d4bd2 100644 --- a/pkg/config/path.go +++ b/pkg/config/path.go @@ -194,14 +194,14 @@ func init() { initPath(&Root, nil, "") } -func IsModuleSettingsPath(p Path) bool { - parent := p.Parent() - if parent == nil { - return false - } - mp, ok := parent.(*ModulePath) - if !ok { - return false - } - return p == mp.Settings -} +// func IsModuleSettingsPath(p Path) bool { +// parent := p.Parent() +// if parent == nil { +// return false +// } +// mp, ok := parent.(*ModulePath) +// if !ok { +// return false +// } +// return p == mp.Settings +// } From 049376da2e85553306ae76806f0b6d0263bfc69e Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 13 Dec 2024 19:04:20 +0000 Subject: [PATCH 018/140] Fix misusage of `groupby_unsorted` --- .../modules/slurm_files/scripts/slurmsync.py | 2 +- .../modules/slurm_files/scripts/suspend.py | 5 ++--- .../modules/slurm_files/scripts/util.py | 8 +++----- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py index b26cdfcd5a..8b67365e68 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py @@ -387,7 +387,7 @@ def sync_slurm(): slurm_nodes = set(lookup().slurm_nodes().keys()) log.debug(f"reconciling {len(compute_instances)} GCP instances and {len(slurm_nodes)} Slurm nodes.") - for action, nodes in util.groupby_unsorted(compute_instances | slurm_nodes, get_node_action): + for action, nodes in util.groupby_unsorted(list(compute_instances | slurm_nodes), get_node_action): action.apply(list(nodes)) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py index dc901b6aba..f01013e1a2 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py @@ -98,9 +98,8 @@ def delete_instances(instances): log.info(f"delete {len(valid)} instances ({to_hostlist(valid)})") done, failed = batch_execute(requests) - if failed: - for err, nodes in groupby_unsorted(lambda n: failed[n][1], failed.keys()): - log.error(f"instances failed to delete: {err} ({to_hostlist(nodes)})") + for node, (_, err) in failed.items(): + log.error(f"instance {node} failed to delete: {err}") wait_for_operations(done.values()) # TODO do we need to check each operation for success? That is a lot more API calls log.info(f"deleted {len(done)} instances {to_hostlist(done.keys())}") diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index 47d8b6c771..017443002f 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -14,10 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, List, Tuple, Optional, Any, Dict +from typing import Iterable, List, Tuple, Optional, Any, Dict, Sequence import argparse import base64 -import collections from dataclasses import dataclass from datetime import timedelta, datetime import hashlib @@ -36,7 +35,7 @@ import sys import tempfile from enum import Enum -from collections import defaultdict, namedtuple +from collections import defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import contextmanager from functools import lru_cache, reduce, wraps @@ -746,8 +745,7 @@ def chunked(iterable, n=API_REQ_LIMIT): return yield chunk - -def groupby_unsorted(seq, key): +def groupby_unsorted(seq: Sequence[Any], key): indices = defaultdict(list) for i, el in enumerate(seq): indices[key(el)].append(i) From 03413f8e82368501885f116f36a484fe4f91d569 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Sat, 14 Dec 2024 08:41:09 +0000 Subject: [PATCH 019/140] Append kubectl provider in root module for blueprint with gke cluster module --- examples/storage-gke.yaml | 24 -------------- .../gke-persistent-volume/README.md | 1 + .../file-system/gke-persistent-volume/main.tf | 2 +- .../gke-persistent-volume/variables.tf | 5 +++ modules/scheduler/gke-cluster/README.md | 2 +- modules/scheduler/gke-cluster/outputs.tf | 2 +- .../pre-existing-gke-cluster/README.md | 2 +- .../pre-existing-gke-cluster/outputs.tf | 2 +- pkg/config/config.go | 9 +++-- pkg/config/expand.go | 33 +++++++++++++++++++ pkg/config/path.go | 22 ++++++------- 11 files changed, 59 insertions(+), 45 deletions(-) diff --git a/examples/storage-gke.yaml b/examples/storage-gke.yaml index e2452e83aa..faa587b046 100644 --- a/examples/storage-gke.yaml +++ b/examples/storage-gke.yaml @@ -26,30 +26,6 @@ vars: deployment_groups: - group: primary - terraform_providers: - kubectl: - source: "gavinbunney/kubectl" - version: ">= 1.7.0" - configuration: - host: $(gke_cluster.gke_cluster_endpoint) - cluster_ca_certificate: $(gke_cluster.cluster_ca_certificate) - token: $(gke_cluster.access_token) - load_config_file: false - apply_retry_count: 15 - google: - source: hashicorp/google - version: 6.12.0 - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) - google-beta: - source: hashicorp/google-beta - version: 6.13.0 - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) modules: - id: network1 source: modules/network/vpc diff --git a/modules/file-system/gke-persistent-volume/README.md b/modules/file-system/gke-persistent-volume/README.md index f53eb3067b..23bce2de8d 100644 --- a/modules/file-system/gke-persistent-volume/README.md +++ b/modules/file-system/gke-persistent-volume/README.md @@ -150,6 +150,7 @@ No modules. | [capacity\_gb](#input\_capacity\_gb) | The storage capacity with which to create the persistent volume. | `number` | n/a | yes | | [filestore\_id](#input\_filestore\_id) | An identifier for a filestore with the format `projects/{{project}}/locations/{{location}}/instances/{{name}}`. | `string` | `null` | no | | [gcs\_bucket\_name](#input\_gcs\_bucket\_name) | The gcs bucket to be used with the persistent volume. | `string` | `null` | no | +| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | n/a | yes | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [network\_storage](#input\_network\_storage) | Network attached storage mount to be configured. |
object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
})
| n/a | yes | diff --git a/modules/file-system/gke-persistent-volume/main.tf b/modules/file-system/gke-persistent-volume/main.tf index 4812a799e2..c9ebaa6010 100644 --- a/modules/file-system/gke-persistent-volume/main.tf +++ b/modules/file-system/gke-persistent-volume/main.tf @@ -92,7 +92,7 @@ resource "kubectl_manifest" "pv" { lifecycle { precondition { - condition = (var.gcs_bucket_name != null) != (var.filestore_id != null) + condition = var.gke_cluster_exists && (var.gcs_bucket_name != null) != (var.filestore_id != null) error_message = "Either gcs_bucket_name or filestore_id must be set." } } diff --git a/modules/file-system/gke-persistent-volume/variables.tf b/modules/file-system/gke-persistent-volume/variables.tf index ebf411d593..88ff9c36b8 100644 --- a/modules/file-system/gke-persistent-volume/variables.tf +++ b/modules/file-system/gke-persistent-volume/variables.tf @@ -14,6 +14,11 @@ * limitations under the License. */ +variable "gke_cluster_exists" { + description = "A static flag that signals to modules that a cluster has been created." + type = bool +} + variable "network_storage" { description = "Network attached storage mount to be configured." type = object({ diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index 1ac653f698..e5a869156d 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -200,9 +200,9 @@ limitations under the License. | [access\_token](#output\_access\_token) | Google client config access token. | | [cluster\_ca\_certificate](#output\_cluster\_ca\_certificate) | GKE cluster CA certificate. | | [cluster\_id](#output\_cluster\_id) | An identifier for the resource with format projects/{{project\_id}}/locations/{{region}}/clusters/{{name}}. | -| [gke\_cluster\_endpoint](#output\_gke\_cluster\_endpoint) | GKE cluster endpoint. | | [gke\_cluster\_exists](#output\_gke\_cluster\_exists) | A static flag that signals to downstream modules that a cluster has been created. Needed by community/modules/scripts/kubernetes-operations. | | [gke\_version](#output\_gke\_version) | GKE cluster's version. | +| [host\_endpoint](#output\_host\_endpoint) | GKE cluster endpoint. | | [instructions](#output\_instructions) | Instructions on how to connect to the created cluster. | | [k8s\_service\_account\_name](#output\_k8s\_service\_account\_name) | Name of k8s service account. | diff --git a/modules/scheduler/gke-cluster/outputs.tf b/modules/scheduler/gke-cluster/outputs.tf index 9ffd370664..087462049a 100644 --- a/modules/scheduler/gke-cluster/outputs.tf +++ b/modules/scheduler/gke-cluster/outputs.tf @@ -80,7 +80,7 @@ output "gke_version" { value = google_container_cluster.gke_cluster.master_version } -output "gke_cluster_endpoint" { +output "host_endpoint" { description = "GKE cluster endpoint." value = "https://${google_container_cluster.gke_cluster.endpoint}" } diff --git a/modules/scheduler/pre-existing-gke-cluster/README.md b/modules/scheduler/pre-existing-gke-cluster/README.md index 4faed93294..c0715c472d 100644 --- a/modules/scheduler/pre-existing-gke-cluster/README.md +++ b/modules/scheduler/pre-existing-gke-cluster/README.md @@ -114,7 +114,7 @@ limitations under the License. | [access\_token](#output\_access\_token) | Google client config access token. | | [cluster\_ca\_certificate](#output\_cluster\_ca\_certificate) | GKE cluster CA certificate. | | [cluster\_id](#output\_cluster\_id) | An identifier for the gke cluster with format projects/{{project\_id}}/locations/{{region}}/clusters/{{name}}. | -| [gke\_cluster\_endpoint](#output\_gke\_cluster\_endpoint) | GKE cluster endpoint. | | [gke\_cluster\_exists](#output\_gke\_cluster\_exists) | A static flag that signals to downstream modules that a cluster exists. | | [gke\_version](#output\_gke\_version) | GKE cluster's version. | +| [host\_endpoint](#output\_host\_endpoint) | GKE cluster endpoint. | diff --git a/modules/scheduler/pre-existing-gke-cluster/outputs.tf b/modules/scheduler/pre-existing-gke-cluster/outputs.tf index cab4bf0b22..880928d21b 100644 --- a/modules/scheduler/pre-existing-gke-cluster/outputs.tf +++ b/modules/scheduler/pre-existing-gke-cluster/outputs.tf @@ -32,7 +32,7 @@ output "gke_version" { value = data.google_container_cluster.existing_gke_cluster.master_version } -output "gke_cluster_endpoint" { +output "host_endpoint" { description = "GKE cluster endpoint." value = "https://${data.google_container_cluster.existing_gke_cluster.endpoint}" } diff --git a/pkg/config/config.go b/pkg/config/config.go index 099973f416..df2192291f 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -698,15 +698,14 @@ func (bp *Blueprint) checkToolkitModulesUrlAndVersion() error { func (bp *Blueprint) checkReferences() error { errs := Errors{} bp.visitDicts(func(dp dictPath, d *Dict) { - // isModSettings := IsModuleSettingsPath(dp) + isModSettings := IsModuleSettingsPath(dp) for k, v := range d.Items() { for ref, rp := range valueReferences(v) { path := dp.Dot(k).Cty(rp) - // fmt.Println("GlobalVar", ref.GlobalVar, "Name", ref.Name, "Module", ref.Module, "rp", rp, "path", path) if !ref.GlobalVar { - // if !isModSettings { - // errs.At(path, fmt.Errorf("module output %q can only be referenced in other module settings", ref)) - // } + if !isModSettings { + errs.At(path, fmt.Errorf("module output %q can only be referenced in other module settings", ref)) + } // module to module references are checked by validateModuleSettingReferences later return } diff --git a/pkg/config/expand.go b/pkg/config/expand.go index ae5c30a328..6e6d722c51 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -17,6 +17,7 @@ package config import ( "errors" "fmt" + "strings" "hpc-toolkit/pkg/modulereader" "hpc-toolkit/pkg/sourcereader" @@ -186,6 +187,34 @@ func (bp Blueprint) expandBackend(grp *Group) { } } +func kubectlProviderRequiredModules(grp *Group) []Module { + mods := []Module{} + for _, mod := range grp.Modules { + if strings.Contains(mod.Source, "gke-cluster") || strings.Contains(mod.Source, "pre-existing-gke-cluster") { + mods = append(mods, mod) + } + } + return mods +} + +func getModuleKubectlProviders(mod Module) map[string]TerraformProvider { + kubectlConf := Dict{} + for s, v := range map[string]string{ + "cluster_ca_certificate": "cluster_ca_certificate", + "host": "host_endpoint", + "token": "access_token"} { + kubectlConf = kubectlConf.With(s, ModuleRef(mod.ID, v).AsValue()) + } + // kubectlConf = kubectlConf.With("alias", cty.StringVal(string(mod.ID))) + kubectlConf = kubectlConf.With("apply_retry_count", cty.NumberIntVal(15)) + kubectlConf = kubectlConf.With("load_config_file", cty.BoolVal(false)) + return map[string]TerraformProvider{ + "kubectl": { + Source: "gavinbunney/kubectl", + Version: ">= 1.7.0", + Configuration: kubectlConf}} +} + func getDefaultGoogleProviders(bp Blueprint) map[string]TerraformProvider { gglConf := Dict{} for s, v := range map[string]string{ @@ -220,6 +249,10 @@ func (bp Blueprint) expandProviders(grp *Group) { if (*pv) == nil { (*pv) = maps.Clone(defaults) } + mods := kubectlProviderRequiredModules(grp) + for _, mod := range mods { + maps.Copy((*pv), getModuleKubectlProviders(mod)) + } } func getModuleInputMap(inputs []modulereader.VarInfo) map[string]cty.Type { diff --git a/pkg/config/path.go b/pkg/config/path.go index d0869d4bd2..7d84f449d4 100644 --- a/pkg/config/path.go +++ b/pkg/config/path.go @@ -194,14 +194,14 @@ func init() { initPath(&Root, nil, "") } -// func IsModuleSettingsPath(p Path) bool { -// parent := p.Parent() -// if parent == nil { -// return false -// } -// mp, ok := parent.(*ModulePath) -// if !ok { -// return false -// } -// return p == mp.Settings -// } +func IsModuleSettingsPath(p Path) bool { + parent := p.Parent() + if parent == nil { + return false + } + mp, ok := parent.(*ModulePath) + if !ok { + return false + } + return p == mp.Settings +} From da63370ae6d5b1dc80fbe4e552c24d5463c48fd4 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Sat, 14 Dec 2024 08:53:42 +0000 Subject: [PATCH 020/140] Append kubectl provider in root module for blueprint with gke cluster module --- modules/file-system/gke-persistent-volume/main.tf | 2 +- modules/management/kubectl-apply/README.md | 1 + modules/management/kubectl-apply/main.tf | 3 +++ modules/management/kubectl-apply/variables.tf | 5 +++++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/file-system/gke-persistent-volume/main.tf b/modules/file-system/gke-persistent-volume/main.tf index c9ebaa6010..df8b4733d8 100644 --- a/modules/file-system/gke-persistent-volume/main.tf +++ b/modules/file-system/gke-persistent-volume/main.tf @@ -93,7 +93,7 @@ resource "kubectl_manifest" "pv" { lifecycle { precondition { condition = var.gke_cluster_exists && (var.gcs_bucket_name != null) != (var.filestore_id != null) - error_message = "Either gcs_bucket_name or filestore_id must be set." + error_message = "GKE cluster should exists and either gcs_bucket_name or filestore_id must be set." } } } diff --git a/modules/management/kubectl-apply/README.md b/modules/management/kubectl-apply/README.md index 02812a2d96..64f254d11b 100644 --- a/modules/management/kubectl-apply/README.md +++ b/modules/management/kubectl-apply/README.md @@ -130,6 +130,7 @@ limitations under the License. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [apply\_manifests](#input\_apply\_manifests) | A list of manifests to apply to GKE cluster using kubectl. For more details see [kubectl module's inputs](kubectl/README.md). |
list(object({
content = optional(string, null)
source = optional(string, null)
template_vars = optional(map(any), null)
server_side_apply = optional(bool, false)
wait_for_rollout = optional(bool, true)
}))
| `[]` | no | +| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | n/a | yes | | [jobset](#input\_jobset) | Install [Jobset](https://github.com/kubernetes-sigs/jobset) which manages a group of K8s [jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) as a unit. |
object({
install = optional(bool, false)
version = optional(string, "v0.5.2")
})
| `{}` | no | | [kueue](#input\_kueue) | Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config\_path to be applied right after kueue installation. If a template file provided, its variables can be set to config\_template\_vars. |
object({
install = optional(bool, false)
version = optional(string, "v0.8.1")
config_path = optional(string, null)
config_template_vars = optional(map(any), null)
})
| `{}` | no | diff --git a/modules/management/kubectl-apply/main.tf b/modules/management/kubectl-apply/main.tf index 1f02677ff0..85f5f4ba03 100644 --- a/modules/management/kubectl-apply/main.tf +++ b/modules/management/kubectl-apply/main.tf @@ -34,6 +34,7 @@ module "kubectl_apply_manifests" { template_vars = each.value.template_vars server_side_apply = each.value.server_side_apply wait_for_rollout = each.value.wait_for_rollout + depends_on = [var.gke_cluster_exists] providers = { http = http.h @@ -44,6 +45,7 @@ module "install_kueue" { source = "./kubectl" source_path = local.install_kueue ? local.kueue_install_source : null server_side_apply = true + depends_on = [var.gke_cluster_exists] providers = { http = http.h @@ -54,6 +56,7 @@ module "install_jobset" { source = "./kubectl" source_path = local.install_jobset ? local.jobset_install_source : null server_side_apply = true + depends_on = [var.gke_cluster_exists] providers = { http = http.h diff --git a/modules/management/kubectl-apply/variables.tf b/modules/management/kubectl-apply/variables.tf index 7a4f54a0a9..cb1cc6b690 100644 --- a/modules/management/kubectl-apply/variables.tf +++ b/modules/management/kubectl-apply/variables.tf @@ -37,6 +37,11 @@ resource "terraform_data" "jobset_validations" { } } +variable "gke_cluster_exists" { + description = "A static flag that signals to modules that a cluster has been created." + type = bool +} + variable "apply_manifests" { description = "A list of manifests to apply to GKE cluster using kubectl. For more details see [kubectl module's inputs](kubectl/README.md)." type = list(object({ From 0e1d96e2987041017d60c0c895813cce9773c58b Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Sat, 14 Dec 2024 12:58:44 +0000 Subject: [PATCH 021/140] Further refined changes --- .../modules/compute/gke-topology-scheduler/README.md | 3 +-- .../modules/compute/gke-topology-scheduler/main.tf | 4 +--- .../compute/gke-topology-scheduler/variables.tf | 12 ++++-------- modules/file-system/gke-storage/README.md | 3 +-- modules/file-system/gke-storage/main.tf | 4 +--- modules/file-system/gke-storage/variables.tf | 12 ++++-------- modules/management/kubectl-apply/README.md | 2 +- modules/management/kubectl-apply/main.tf | 8 ++++---- modules/management/kubectl-apply/variables.tf | 1 + modules/scheduler/gke-cluster/main.tf | 2 ++ modules/scheduler/pre-existing-gke-cluster/main.tf | 2 ++ pkg/config/expand.go | 10 ++++------ 12 files changed, 26 insertions(+), 37 deletions(-) diff --git a/community/modules/compute/gke-topology-scheduler/README.md b/community/modules/compute/gke-topology-scheduler/README.md index 5aaa4fca98..8d5b42913e 100644 --- a/community/modules/compute/gke-topology-scheduler/README.md +++ b/community/modules/compute/gke-topology-scheduler/README.md @@ -45,8 +45,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [cluster\_id](#input\_cluster\_id) | projects/{{project}}/locations/{{location}}/clusters/{{cluster}} | `string` | n/a | yes | -| [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | +| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | `false` | no | ## Outputs diff --git a/community/modules/compute/gke-topology-scheduler/main.tf b/community/modules/compute/gke-topology-scheduler/main.tf index 677595632b..1c2b658668 100644 --- a/community/modules/compute/gke-topology-scheduler/main.tf +++ b/community/modules/compute/gke-topology-scheduler/main.tf @@ -13,11 +13,9 @@ # limitations under the License. module "kubectl_apply" { + count = var.gke_cluster_exists ? 1 : 0 source = "../../../../modules/management/kubectl-apply" - cluster_id = var.cluster_id - project_id = var.project_id - apply_manifests = [ { source = "${path.module}/manifests/topology-scheduler-scripts.yaml" }, { source = "${path.module}/manifests/service-account.yaml" }, diff --git a/community/modules/compute/gke-topology-scheduler/variables.tf b/community/modules/compute/gke-topology-scheduler/variables.tf index 0766091223..2fcbb93d58 100644 --- a/community/modules/compute/gke-topology-scheduler/variables.tf +++ b/community/modules/compute/gke-topology-scheduler/variables.tf @@ -12,12 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -variable "project_id" { - description = "The project ID to host the cluster in." - type = string -} - -variable "cluster_id" { - description = "projects/{{project}}/locations/{{location}}/clusters/{{cluster}}" - type = string +variable "gke_cluster_exists" { + description = "A static flag that signals to modules that a cluster has been created." + type = bool + default = false } diff --git a/modules/file-system/gke-storage/README.md b/modules/file-system/gke-storage/README.md index 17c718aa37..f1b3c2884e 100644 --- a/modules/file-system/gke-storage/README.md +++ b/modules/file-system/gke-storage/README.md @@ -109,11 +109,10 @@ No resources. |------|-------------|------|---------|:--------:| | [access\_mode](#input\_access\_mode) | The access mode that the volume can be mounted to the host/pod. More details in [Access Modes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes)
Valid access modes:
- ReadWriteOnce
- ReadOnlyMany
- ReadWriteMany
- ReadWriteOncePod | `string` | n/a | yes | | [capacity\_gb](#input\_capacity\_gb) | The storage capacity with which to create the persistent volume. | `number` | n/a | yes | -| [cluster\_id](#input\_cluster\_id) | An identifier for the GKE cluster in the format `projects/{{project}}/locations/{{location}}/clusters/{{cluster}}` | `string` | n/a | yes | +| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | `false` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [mount\_options](#input\_mount\_options) | Controls the mountOptions for dynamically provisioned PersistentVolumes of this storage class. | `string` | `null` | no | | [private\_vpc\_connection\_peering](#input\_private\_vpc\_connection\_peering) | The name of the VPC Network peering connection.
If using new VPC, please use community/modules/network/private-service-access to create private-service-access and
If using existing VPC with private-service-access enabled, set this manually follow [user guide](https://cloud.google.com/parallelstore/docs/vpc). | `string` | `null` | no | -| [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | | [pv\_mount\_path](#input\_pv\_mount\_path) | Path within the container at which the volume should be mounted. Must not contain ':'. | `string` | `"/data"` | no | | [pvc\_count](#input\_pvc\_count) | How many PersistentVolumeClaims that will be created | `number` | `1` | no | | [sc\_reclaim\_policy](#input\_sc\_reclaim\_policy) | Indicate whether to keep the dynamically provisioned PersistentVolumes of this storage class after the bound PersistentVolumeClaim is deleted.
[More details about reclaiming](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#reclaiming)
Supported value:
- Retain
- Delete | `string` | n/a | yes | diff --git a/modules/file-system/gke-storage/main.tf b/modules/file-system/gke-storage/main.tf index 18f85fa779..bb738162e5 100644 --- a/modules/file-system/gke-storage/main.tf +++ b/modules/file-system/gke-storage/main.tf @@ -37,11 +37,9 @@ check "private_vpc_connection_peering" { } module "kubectl_apply" { + count = var.gke_cluster_exists ? 1 : 0 source = "../../management/kubectl-apply" - cluster_id = var.cluster_id - project_id = var.project_id - # count = var.pvc_count apply_manifests = flatten( [ diff --git a/modules/file-system/gke-storage/variables.tf b/modules/file-system/gke-storage/variables.tf index 9ad3b839d8..9efbe6082c 100644 --- a/modules/file-system/gke-storage/variables.tf +++ b/modules/file-system/gke-storage/variables.tf @@ -14,14 +14,10 @@ * limitations under the License. */ -variable "project_id" { - description = "The project ID to host the cluster in." - type = string -} - -variable "cluster_id" { - description = "An identifier for the GKE cluster in the format `projects/{{project}}/locations/{{location}}/clusters/{{cluster}}`" - type = string +variable "gke_cluster_exists" { + description = "A static flag that signals to modules that a cluster has been created." + type = bool + default = false } variable "labels" { diff --git a/modules/management/kubectl-apply/README.md b/modules/management/kubectl-apply/README.md index 64f254d11b..47f0076618 100644 --- a/modules/management/kubectl-apply/README.md +++ b/modules/management/kubectl-apply/README.md @@ -130,7 +130,7 @@ limitations under the License. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [apply\_manifests](#input\_apply\_manifests) | A list of manifests to apply to GKE cluster using kubectl. For more details see [kubectl module's inputs](kubectl/README.md). |
list(object({
content = optional(string, null)
source = optional(string, null)
template_vars = optional(map(any), null)
server_side_apply = optional(bool, false)
wait_for_rollout = optional(bool, true)
}))
| `[]` | no | -| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | n/a | yes | +| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | `false` | no | | [jobset](#input\_jobset) | Install [Jobset](https://github.com/kubernetes-sigs/jobset) which manages a group of K8s [jobs](https://kubernetes.io/docs/concepts/workloads/controllers/job/) as a unit. |
object({
install = optional(bool, false)
version = optional(string, "v0.5.2")
})
| `{}` | no | | [kueue](#input\_kueue) | Install and configure [Kueue](https://kueue.sigs.k8s.io/docs/overview/) workload scheduler. A configuration yaml/template file can be provided with config\_path to be applied right after kueue installation. If a template file provided, its variables can be set to config\_template\_vars. |
object({
install = optional(bool, false)
version = optional(string, "v0.8.1")
config_path = optional(string, null)
config_template_vars = optional(map(any), null)
})
| `{}` | no | diff --git a/modules/management/kubectl-apply/main.tf b/modules/management/kubectl-apply/main.tf index 85f5f4ba03..cc1abd05f0 100644 --- a/modules/management/kubectl-apply/main.tf +++ b/modules/management/kubectl-apply/main.tf @@ -26,7 +26,7 @@ locals { } module "kubectl_apply_manifests" { - for_each = local.apply_manifests_map + for_each = var.gke_cluster_exists ? local.apply_manifests_map : {} source = "./kubectl" content = each.value.content @@ -34,7 +34,6 @@ module "kubectl_apply_manifests" { template_vars = each.value.template_vars server_side_apply = each.value.server_side_apply wait_for_rollout = each.value.wait_for_rollout - depends_on = [var.gke_cluster_exists] providers = { http = http.h @@ -42,10 +41,10 @@ module "kubectl_apply_manifests" { } module "install_kueue" { + count = var.gke_cluster_exists ? 1 : 0 source = "./kubectl" source_path = local.install_kueue ? local.kueue_install_source : null server_side_apply = true - depends_on = [var.gke_cluster_exists] providers = { http = http.h @@ -53,10 +52,10 @@ module "install_kueue" { } module "install_jobset" { + count = var.gke_cluster_exists ? 1 : 0 source = "./kubectl" source_path = local.install_jobset ? local.jobset_install_source : null server_side_apply = true - depends_on = [var.gke_cluster_exists] providers = { http = http.h @@ -64,6 +63,7 @@ module "install_jobset" { } module "configure_kueue" { + count = var.gke_cluster_exists ? 1 : 0 source = "./kubectl" source_path = local.install_kueue ? try(var.kueue.config_path, "") : null template_vars = local.install_kueue ? try(var.kueue.config_template_vars, null) : null diff --git a/modules/management/kubectl-apply/variables.tf b/modules/management/kubectl-apply/variables.tf index cb1cc6b690..356d268d9f 100644 --- a/modules/management/kubectl-apply/variables.tf +++ b/modules/management/kubectl-apply/variables.tf @@ -40,6 +40,7 @@ resource "terraform_data" "jobset_validations" { variable "gke_cluster_exists" { description = "A static flag that signals to modules that a cluster has been created." type = bool + default = false } variable "apply_manifests" { diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 55188acb6b..2d1dca3267 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -353,4 +353,6 @@ module "kubectl_apply" { } ] ]) + + depends_on = [google_container_cluster.gke_cluster] } diff --git a/modules/scheduler/pre-existing-gke-cluster/main.tf b/modules/scheduler/pre-existing-gke-cluster/main.tf index e90c8877ed..95761ef10d 100644 --- a/modules/scheduler/pre-existing-gke-cluster/main.tf +++ b/modules/scheduler/pre-existing-gke-cluster/main.tf @@ -66,4 +66,6 @@ module "kubectl_apply" { source = "../../management/kubectl-apply" apply_manifests = concat(local.apply_manifests_non_rdma_networks, local.apply_manifests_rdma_networks) + + depends_on = [data.google_container_cluster.existing_gke_cluster] } diff --git a/pkg/config/expand.go b/pkg/config/expand.go index 6e6d722c51..a2f67b2b5a 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -187,14 +187,13 @@ func (bp Blueprint) expandBackend(grp *Group) { } } -func kubectlProviderRequiredModules(grp *Group) []Module { - mods := []Module{} +func kubectlProviderRequiredModule(grp *Group) (bool, Module) { for _, mod := range grp.Modules { if strings.Contains(mod.Source, "gke-cluster") || strings.Contains(mod.Source, "pre-existing-gke-cluster") { - mods = append(mods, mod) + return true, mod } } - return mods + return false, Module{} } func getModuleKubectlProviders(mod Module) map[string]TerraformProvider { @@ -249,8 +248,7 @@ func (bp Blueprint) expandProviders(grp *Group) { if (*pv) == nil { (*pv) = maps.Clone(defaults) } - mods := kubectlProviderRequiredModules(grp) - for _, mod := range mods { + if ok, mod := kubectlProviderRequiredModule(grp); ok { maps.Copy((*pv), getModuleKubectlProviders(mod)) } } From d07250519d55af03efc2575053408cf50d8497eb Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Sat, 14 Dec 2024 13:06:20 +0000 Subject: [PATCH 022/140] Added changes related to kubectl-apply module --- modules/compute/gke-node-pool/README.md | 1 + modules/compute/gke-node-pool/main.tf | 1 + modules/compute/gke-node-pool/variables.tf | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/modules/compute/gke-node-pool/README.md b/modules/compute/gke-node-pool/README.md index d2715ff652..17d5a95fee 100644 --- a/modules/compute/gke-node-pool/README.md +++ b/modules/compute/gke-node-pool/README.md @@ -322,6 +322,7 @@ limitations under the License. | [disk\_type](#input\_disk\_type) | Disk type for each node. | `string` | `null` | no | | [enable\_gcfs](#input\_enable\_gcfs) | Enable the Google Container Filesystem (GCFS). See [restrictions](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#gcfs_config). | `bool` | `false` | no | | [enable\_secure\_boot](#input\_enable\_secure\_boot) | Enable secure boot for the nodes. Keep enabled unless custom kernel modules need to be loaded. See [here](https://cloud.google.com/compute/shielded-vm/docs/shielded-vm#secure-boot) for more info. | `bool` | `true` | no | +| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | `false` | no | | [gke\_version](#input\_gke\_version) | GKE version | `string` | n/a | yes | | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = optional(string)
count = optional(number, 0)
gpu_driver_installation_config = optional(object({
gpu_driver_version = string
}), { gpu_driver_version = "DEFAULT" })
gpu_partition_size = optional(string)
gpu_sharing_config = optional(object({
gpu_sharing_strategy = string
max_shared_clients_per_gpu = number
}))
}))
| `[]` | no | | [host\_maintenance\_interval](#input\_host\_maintenance\_interval) | Specifies the frequency of planned maintenance events. | `string` | `""` | no | diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 5d4bf02fb2..a0a334356e 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -353,6 +353,7 @@ resource "null_resource" "enable_tcpxo_in_workload" { # apply manifest to enable tcpx module "kubectl_apply" { + count = var.gke_cluster_exists ? 1 : 0 source = "../../management/kubectl-apply" apply_manifests = flatten([ diff --git a/modules/compute/gke-node-pool/variables.tf b/modules/compute/gke-node-pool/variables.tf index d3b403b564..b15fc3f3ef 100644 --- a/modules/compute/gke-node-pool/variables.tf +++ b/modules/compute/gke-node-pool/variables.tf @@ -24,6 +24,12 @@ variable "cluster_id" { type = string } +variable "gke_cluster_exists" { + description = "A static flag that signals to modules that a cluster has been created." + type = bool + default = false +} + variable "zones" { description = "A list of zones to be used. Zones must be in region of cluster. If null, cluster zones will be inherited. Note `zones` not `zone`; does not work with `zone` deployment variable." type = list(string) From f45e151fb96573765c804d2c6fafdabae0120996 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Sat, 14 Dec 2024 13:32:58 +0000 Subject: [PATCH 023/140] Added changes related to kubectl-apply module --- community/modules/compute/gke-topology-scheduler/main.tf | 3 ++- modules/compute/gke-node-pool/main.tf | 3 ++- modules/file-system/gke-persistent-volume/README.md | 2 +- modules/file-system/gke-persistent-volume/main.tf | 5 +++-- modules/file-system/gke-persistent-volume/variables.tf | 1 + modules/file-system/gke-storage/main.tf | 3 ++- modules/scheduler/gke-cluster/main.tf | 2 ++ modules/scheduler/pre-existing-gke-cluster/main.tf | 2 ++ 8 files changed, 15 insertions(+), 6 deletions(-) diff --git a/community/modules/compute/gke-topology-scheduler/main.tf b/community/modules/compute/gke-topology-scheduler/main.tf index 1c2b658668..3a79befcf3 100644 --- a/community/modules/compute/gke-topology-scheduler/main.tf +++ b/community/modules/compute/gke-topology-scheduler/main.tf @@ -13,9 +13,10 @@ # limitations under the License. module "kubectl_apply" { - count = var.gke_cluster_exists ? 1 : 0 source = "../../../../modules/management/kubectl-apply" + gke_cluster_exists = var.gke_cluster_exists + apply_manifests = [ { source = "${path.module}/manifests/topology-scheduler-scripts.yaml" }, { source = "${path.module}/manifests/service-account.yaml" }, diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index a0a334356e..64013374f1 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -353,9 +353,10 @@ resource "null_resource" "enable_tcpxo_in_workload" { # apply manifest to enable tcpx module "kubectl_apply" { - count = var.gke_cluster_exists ? 1 : 0 source = "../../management/kubectl-apply" + gke_cluster_exists = var.gke_cluster_exists + apply_manifests = flatten([ for manifest in local.gpu_direct_setting.gpu_direct_manifests : [ { diff --git a/modules/file-system/gke-persistent-volume/README.md b/modules/file-system/gke-persistent-volume/README.md index 23bce2de8d..b5967763c9 100644 --- a/modules/file-system/gke-persistent-volume/README.md +++ b/modules/file-system/gke-persistent-volume/README.md @@ -150,7 +150,7 @@ No modules. | [capacity\_gb](#input\_capacity\_gb) | The storage capacity with which to create the persistent volume. | `number` | n/a | yes | | [filestore\_id](#input\_filestore\_id) | An identifier for a filestore with the format `projects/{{project}}/locations/{{location}}/instances/{{name}}`. | `string` | `null` | no | | [gcs\_bucket\_name](#input\_gcs\_bucket\_name) | The gcs bucket to be used with the persistent volume. | `string` | `null` | no | -| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | n/a | yes | +| [gke\_cluster\_exists](#input\_gke\_cluster\_exists) | A static flag that signals to modules that a cluster has been created. | `bool` | `false` | no | | [labels](#input\_labels) | GCE resource labels to be applied to resources. Key-value pairs. | `map(string)` | n/a | yes | | [network\_storage](#input\_network\_storage) | Network attached storage mount to be configured. |
object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
})
| n/a | yes | diff --git a/modules/file-system/gke-persistent-volume/main.tf b/modules/file-system/gke-persistent-volume/main.tf index df8b4733d8..d12c5d6d39 100644 --- a/modules/file-system/gke-persistent-volume/main.tf +++ b/modules/file-system/gke-persistent-volume/main.tf @@ -88,12 +88,13 @@ resource "local_file" "debug_file" { } resource "kubectl_manifest" "pv" { + count = var.gke_cluster_exists ? 1 : 0 yaml_body = local.is_gcs ? local.gcs_pv_contents : local.filestore_pv_contents lifecycle { precondition { - condition = var.gke_cluster_exists && (var.gcs_bucket_name != null) != (var.filestore_id != null) - error_message = "GKE cluster should exists and either gcs_bucket_name or filestore_id must be set." + condition = (var.gcs_bucket_name != null) != (var.filestore_id != null) + error_message = "Either gcs_bucket_name or filestore_id must be set." } } } diff --git a/modules/file-system/gke-persistent-volume/variables.tf b/modules/file-system/gke-persistent-volume/variables.tf index 88ff9c36b8..96e3f31949 100644 --- a/modules/file-system/gke-persistent-volume/variables.tf +++ b/modules/file-system/gke-persistent-volume/variables.tf @@ -17,6 +17,7 @@ variable "gke_cluster_exists" { description = "A static flag that signals to modules that a cluster has been created." type = bool + default = false } variable "network_storage" { diff --git a/modules/file-system/gke-storage/main.tf b/modules/file-system/gke-storage/main.tf index bb738162e5..f26936de0c 100644 --- a/modules/file-system/gke-storage/main.tf +++ b/modules/file-system/gke-storage/main.tf @@ -37,9 +37,10 @@ check "private_vpc_connection_peering" { } module "kubectl_apply" { - count = var.gke_cluster_exists ? 1 : 0 source = "../../management/kubectl-apply" + gke_cluster_exists = var.gke_cluster_exists + # count = var.pvc_count apply_manifests = flatten( [ diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 2d1dca3267..823bfb5cbe 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -336,6 +336,8 @@ module "workload_identity" { module "kubectl_apply" { source = "../../management/kubectl-apply" + gke_cluster_exists = true + apply_manifests = flatten([ for idx, network_info in var.additional_networks : [ { diff --git a/modules/scheduler/pre-existing-gke-cluster/main.tf b/modules/scheduler/pre-existing-gke-cluster/main.tf index 95761ef10d..800ed87a51 100644 --- a/modules/scheduler/pre-existing-gke-cluster/main.tf +++ b/modules/scheduler/pre-existing-gke-cluster/main.tf @@ -65,6 +65,8 @@ data "google_client_config" "default" {} module "kubectl_apply" { source = "../../management/kubectl-apply" + gke_cluster_exists = true + apply_manifests = concat(local.apply_manifests_non_rdma_networks, local.apply_manifests_rdma_networks) depends_on = [data.google_container_cluster.existing_gke_cluster] From 3b2ab776a08e2055310f8ec27e89bfed955cd264 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Mon, 16 Dec 2024 07:37:46 +0000 Subject: [PATCH 024/140] SlurmGCP. Reduce usage of NSDict --- .../modules/slurm_files/scripts/resume.py | 47 ++++++++----------- .../modules/slurm_files/scripts/slurmsync.py | 3 +- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py index 669ccfc0a7..5d88751a41 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py @@ -155,11 +155,6 @@ def dws_flex_duration(dws_flex:object, job_id: Optional[int]) -> int: log.info("Job TimeLimit cannot be less than 30 seconds or exceed 2 weeks") return max_duration -def per_instance_properties(node): - props = NSDict() - # No properties beyond name are supported yet. - - return props def create_instances_request(nodes: List[str], placement_group: Optional[str], excl_job_id: Optional[int]): """Call regionInstances.bulkInsert to create instances""" @@ -167,31 +162,27 @@ def create_instances_request(nodes: List[str], placement_group: Optional[str], e # model here indicates any node that can be used to describe the rest model = next(iter(nodes)) - nodeset = lookup().node_nodeset(model) - template = lookup().node_template(model) log.debug(f"create_instances_request: {model} placement: {placement_group}") - body = NSDict() + nodeset = lookup().node_nodeset(model) + template = lookup().node_template(model) + labels = {"slurm_job_id": excl_job_id} if excl_job_id else None - body.count = len(nodes) + body = dict( + count = len(nodes), + sourceInstanceTemplate = template, + # key is instance name, value overwrites properties (no overwrites) + perInstanceProperties = {k: {} for k in nodes}, + instanceProperties = instance_properties( + nodeset, model, placement_group, labels, excl_job_id + ), + ) if placement_group: assert len(nodes) <= PLACEMENT_MAX_CNT pass # do not set minCount to force "all or nothing" behavior else: - body.minCount = 1 - - # source of instance properties - body.sourceInstanceTemplate = template - - labels = {"slurm_job_id": excl_job_id} if excl_job_id else None - # overwrites properties across all instances - body.instanceProperties = instance_properties( - nodeset, model, placement_group, labels, excl_job_id - ) - - # key is instance name, value overwrites properties - body.perInstanceProperties = {k: per_instance_properties(k) for k in nodes} + body["minCount"] = 1 zone_allow = nodeset.zone_policy_allow or [] zone_deny = nodeset.zone_policy_deny or [] @@ -203,10 +194,12 @@ def create_instances_request(nodes: List[str], placement_group: Optional[str], e api_method = lookup().compute.regionInstances().bulkInsert method_args = {"region": lookup().node_region(model)} - body.locationPolicy.locations = { - **{ f"zones/{z}": {"preference": "ALLOW"} for z in zone_allow }, - **{ f"zones/{z}": {"preference": "DENY"} for z in zone_deny }} - body.locationPolicy.targetShape = nodeset.zone_target_shape + body["locationPolicy"] = dict( + locations = { + **{ f"zones/{z}": {"preference": "ALLOW"} for z in zone_allow }, + **{ f"zones/{z}": {"preference": "DENY"} for z in zone_deny }}, + targetShape = nodeset.zone_target_shape, + ) if lookup().cfg.enable_slurm_gcp_plugins: slurm_gcp_plugins.pre_instance_bulk_insert( @@ -218,7 +211,7 @@ def create_instances_request(nodes: List[str], placement_group: Optional[str], e req = api_method( project=lookup().project, - body=body.to_dict(), + body=body, **method_args) log.debug(f"new request: endpoint={req.methodId} nodes={to_hostlist(nodes)}") log_api_request(req) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py index 8b67365e68..21d9324e79 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py @@ -39,7 +39,6 @@ run, separate, to_hostlist, - NSDict, NodeState, TPU, chunked, @@ -363,7 +362,7 @@ def sync_placement_groups(): result = ensure_execute(op) # merge placement group info from API and job_id,partition,index parsed from the name pgs = ( - NSDict({**pg, **pg_regex.match(pg["name"]).groupdict()}) + {**pg, **pg_regex.match(pg["name"]).groupdict()} for pg in chain.from_iterable( item["resourcePolicies"] for item in result.get("items", {}).values() From 926f5ed34f045bbe08f3392242553e2628f8b5f5 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Mon, 16 Dec 2024 10:15:41 +0000 Subject: [PATCH 025/140] Update README for parallelstore related example blueprint --- examples/README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/examples/README.md b/examples/README.md index 30883ce0f9..4abeb289f9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1518,6 +1518,30 @@ cleaned up when the job is deleted. [storage-gke.yaml]: ../examples/storage-gke.yaml +### [gke-storage-parallelstore.yaml] ![core-badge] ![experimental-badge] + +This blueprint shows how to use parallelstore storage options with GKE in the toolkit. + +The blueprint contains the following: + +* A K8s Job that uses a parallelstore storage volume option. +* A K8s Job that demonstrates ML training workload with parallelstore storage disk ops. + +> **Warning**: In this example, when storage type `Parallelstore` is specified in `gke-storage` module. +> The lifecycle of the parallelstore is not managed by the blueprint. +> On glcuster destroy ops, the Parallelstore created will also be destroyed. +> +> [!Note] +> The Kubernetes API server will only allow requests from authorized networks. +> The `gke-cluster` module needs access to the Kubernetes API server +> to create a Persistent Volume and a Persistent Volume Claim. **You must use +> the `authorized_cidr` variable to supply an authorized network which contains +> the IP address of the machine deploying the blueprint, for example +> `--vars authorized_cidr=/32`.** You can use a service like +> [whatismyip.com](https://whatismyip.com) to determine your IP address. + +[gke-storage-parallelstore.yaml]: ../examples/gke-storage-parallelstore.yaml + ### [gke-a3-megagpu.yaml] ![core-badge] ![experimental-badge] This blueprint shows how to provision a GKE cluster with A3 Mega machines in the toolkit. From 67be248c364f6078f5031a8688725fe740cc9a28 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Mon, 16 Dec 2024 11:11:11 +0000 Subject: [PATCH 026/140] Update README with GKE parallelstore related example blueprint details --- examples/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/README.md b/examples/README.md index 4abeb289f9..95cea23dae 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1528,8 +1528,8 @@ The blueprint contains the following: * A K8s Job that demonstrates ML training workload with parallelstore storage disk ops. > **Warning**: In this example, when storage type `Parallelstore` is specified in `gke-storage` module. -> The lifecycle of the parallelstore is not managed by the blueprint. -> On glcuster destroy ops, the Parallelstore created will also be destroyed. +> The lifecycle of the parallelstore is managed by the blueprint. +> On glcuster destroy ops, the Parallelstore storage created will also be destroyed. > > [!Note] > The Kubernetes API server will only allow requests from authorized networks. From c59164fe99839a1549a864a99e4f1cc1de32cfc4 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Mon, 16 Dec 2024 12:31:40 +0000 Subject: [PATCH 027/140] Added unit test cases --- pkg/config/expand.go | 15 ++++++--- pkg/config/expand_test.go | 70 +++++++++++++++++++++++++++++++++++---- 2 files changed, 74 insertions(+), 11 deletions(-) diff --git a/pkg/config/expand.go b/pkg/config/expand.go index a2f67b2b5a..5ba931bcbb 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -197,16 +197,23 @@ func kubectlProviderRequiredModule(grp *Group) (bool, Module) { } func getModuleKubectlProviders(mod Module) map[string]TerraformProvider { + modOutputs := []string{} + for idx := range mod.Outputs { + modOutputs = append(modOutputs, mod.Outputs[idx].Name) + } + kubectlConf := Dict{} for s, v := range map[string]string{ "cluster_ca_certificate": "cluster_ca_certificate", "host": "host_endpoint", "token": "access_token"} { - kubectlConf = kubectlConf.With(s, ModuleRef(mod.ID, v).AsValue()) + if slices.Contains(modOutputs, v) { + kubectlConf = kubectlConf.With(s, ModuleRef(mod.ID, v).AsValue()) + } } - // kubectlConf = kubectlConf.With("alias", cty.StringVal(string(mod.ID))) - kubectlConf = kubectlConf.With("apply_retry_count", cty.NumberIntVal(15)) - kubectlConf = kubectlConf.With("load_config_file", cty.BoolVal(false)) + kubectlConf = kubectlConf. + With("apply_retry_count", cty.NumberIntVal(15)). + With("load_config_file", cty.BoolVal(false)) return map[string]TerraformProvider{ "kubectl": { Source: "gavinbunney/kubectl", diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index e1ad008407..7774dbc90e 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -87,16 +87,72 @@ func (s *zeroSuite) TestExpandProviders(c *C) { With("zone", cty.StringVal("zone1")). With("universe_domain", cty.StringVal("test-universe.com"))}} + defaultProvider := map[string]PR{ + "google": TerraformProvider{ + Source: "hashicorp/google", + Version: "~> 6.13.0"}, + "google-beta": TerraformProvider{ + Source: "hashicorp/google-beta", + Version: "~> 6.13.0"}} + + testGKEClusterModuleID := ModuleID("dummy_cluster") + testGKEClusterModuleOutputName := "host_endpoint" + + kubectlProvider := PR{ + Source: "gavinbunney/kubectl", + Version: ">= 1.7.0", + Configuration: Dict{}. + With("host", ModuleRef(testGKEClusterModuleID, testGKEClusterModuleOutputName).AsValue()). + With("apply_retry_count", cty.NumberIntVal(15)). + With("load_config_file", cty.BoolVal(false))} + + testModuleOutputs := []modulereader.OutputInfo{ + {Name: testGKEClusterModuleOutputName}} + + testGKEClusterModule := Module{ + Source: "module/test/gke-cluster", + ID: testGKEClusterModuleID, + Outputs: testModuleOutputs} + + testPreExistingGKEClusterModule := Module{ + Source: "module/test/pre-existing-gke-cluster", + ID: testGKEClusterModuleID, + Outputs: testModuleOutputs} + { // no def PR, no group PR - match default values g := Group{Name: "clown"} noDefPr.expandProviders(&g) - c.Check(g.TerraformProviders, DeepEquals, map[string]PR{ - "google": TerraformProvider{ - Source: "hashicorp/google", - Version: "~> 6.13.0"}, - "google-beta": TerraformProvider{ - Source: "hashicorp/google-beta", - Version: "~> 6.13.0"}}) + c.Check(g.TerraformProviders, DeepEquals, defaultProvider) + } + + { // no def PR, no group PR, group only have gke cluster module + g := Group{ + Name: "clown", + Modules: []Module{testGKEClusterModule}} + defaultProvider["kubectl"] = kubectlProvider + noDefPr.expandProviders(&g) + c.Check(g.TerraformProviders, DeepEquals, defaultProvider) + delete(defaultProvider, "kubectl") + } + + { // no def PR, no group PR, group only have pre existing gke cluster module + g := Group{ + Name: "clown", + Modules: []Module{testPreExistingGKEClusterModule}} + defaultProvider["kubectl"] = kubectlProvider + noDefPr.expandProviders(&g) + c.Check(g.TerraformProviders, DeepEquals, defaultProvider) + delete(defaultProvider, "kubectl") + } + + { // no def PR, no group PR, group have both gke cluster and pre existing gke cluster module + g := Group{ + Name: "clown", + Modules: []Module{testGKEClusterModule, testPreExistingGKEClusterModule}} + defaultProvider["kubectl"] = kubectlProvider + noDefPr.expandProviders(&g) + c.Check(g.TerraformProviders, DeepEquals, defaultProvider) + delete(defaultProvider, "kubectl") } { // no def PR, group PR From 93fa0f4a18b7a5935352b7b6c809fa7c8bf26d35 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Mon, 16 Dec 2024 12:37:00 +0000 Subject: [PATCH 028/140] Updated unit test case --- pkg/config/expand_test.go | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 7774dbc90e..625b8c2c15 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -106,18 +106,18 @@ func (s *zeroSuite) TestExpandProviders(c *C) { With("apply_retry_count", cty.NumberIntVal(15)). With("load_config_file", cty.BoolVal(false))} - testModuleOutputs := []modulereader.OutputInfo{ + testGKEClusterModuleOutputs := []modulereader.OutputInfo{ {Name: testGKEClusterModuleOutputName}} testGKEClusterModule := Module{ - Source: "module/test/gke-cluster", + Source: "module/test/gke-cluster/dummy", ID: testGKEClusterModuleID, - Outputs: testModuleOutputs} + Outputs: testGKEClusterModuleOutputs} testPreExistingGKEClusterModule := Module{ - Source: "module/test/pre-existing-gke-cluster", + Source: "module/test/pre-existing-gke-cluster/dummy", ID: testGKEClusterModuleID, - Outputs: testModuleOutputs} + Outputs: testGKEClusterModuleOutputs} { // no def PR, no group PR - match default values g := Group{Name: "clown"} @@ -145,16 +145,6 @@ func (s *zeroSuite) TestExpandProviders(c *C) { delete(defaultProvider, "kubectl") } - { // no def PR, no group PR, group have both gke cluster and pre existing gke cluster module - g := Group{ - Name: "clown", - Modules: []Module{testGKEClusterModule, testPreExistingGKEClusterModule}} - defaultProvider["kubectl"] = kubectlProvider - noDefPr.expandProviders(&g) - c.Check(g.TerraformProviders, DeepEquals, defaultProvider) - delete(defaultProvider, "kubectl") - } - { // no def PR, group PR g := Group{ Name: "clown", From 90aa74bb9470f7c74f6dd614437383730e3c8e31 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Mon, 16 Dec 2024 13:21:32 +0000 Subject: [PATCH 029/140] Updated unit test cases --- pkg/config/expand.go | 9 +------ pkg/config/expand_test.go | 54 ++++++++++++++++++++------------------- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/pkg/config/expand.go b/pkg/config/expand.go index 5ba931bcbb..1005eb780f 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -197,19 +197,12 @@ func kubectlProviderRequiredModule(grp *Group) (bool, Module) { } func getModuleKubectlProviders(mod Module) map[string]TerraformProvider { - modOutputs := []string{} - for idx := range mod.Outputs { - modOutputs = append(modOutputs, mod.Outputs[idx].Name) - } - kubectlConf := Dict{} for s, v := range map[string]string{ "cluster_ca_certificate": "cluster_ca_certificate", "host": "host_endpoint", "token": "access_token"} { - if slices.Contains(modOutputs, v) { - kubectlConf = kubectlConf.With(s, ModuleRef(mod.ID, v).AsValue()) - } + kubectlConf = kubectlConf.With(s, ModuleRef(mod.ID, v).AsValue()) } kubectlConf = kubectlConf. With("apply_retry_count", cty.NumberIntVal(15)). diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index 625b8c2c15..f9f273efd8 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -87,37 +87,39 @@ func (s *zeroSuite) TestExpandProviders(c *C) { With("zone", cty.StringVal("zone1")). With("universe_domain", cty.StringVal("test-universe.com"))}} - defaultProvider := map[string]PR{ - "google": TerraformProvider{ - Source: "hashicorp/google", - Version: "~> 6.13.0"}, - "google-beta": TerraformProvider{ - Source: "hashicorp/google-beta", - Version: "~> 6.13.0"}} - testGKEClusterModuleID := ModuleID("dummy_cluster") - testGKEClusterModuleOutputName := "host_endpoint" - kubectlProvider := PR{ - Source: "gavinbunney/kubectl", - Version: ">= 1.7.0", - Configuration: Dict{}. - With("host", ModuleRef(testGKEClusterModuleID, testGKEClusterModuleOutputName).AsValue()). - With("apply_retry_count", cty.NumberIntVal(15)). - With("load_config_file", cty.BoolVal(false))} + testKubectlConf := Dict{} + for s, v := range map[string]string{ + "cluster_ca_certificate": "cluster_ca_certificate", + "host": "host_endpoint", + "token": "access_token"} { + testKubectlConf = testKubectlConf.With(s, ModuleRef(testGKEClusterModuleID, v).AsValue()) + } + testKubectlConf = testKubectlConf. + With("apply_retry_count", cty.NumberIntVal(15)). + With("load_config_file", cty.BoolVal(false)) - testGKEClusterModuleOutputs := []modulereader.OutputInfo{ - {Name: testGKEClusterModuleOutputName}} + testKubectlProvider := PR{ + Source: "gavinbunney/kubectl", + Version: ">= 1.7.0", + Configuration: testKubectlConf} testGKEClusterModule := Module{ - Source: "module/test/gke-cluster/dummy", - ID: testGKEClusterModuleID, - Outputs: testGKEClusterModuleOutputs} + Source: "module/test/gke-cluster/dummy", + ID: testGKEClusterModuleID} testPreExistingGKEClusterModule := Module{ - Source: "module/test/pre-existing-gke-cluster/dummy", - ID: testGKEClusterModuleID, - Outputs: testGKEClusterModuleOutputs} + Source: "module/test/pre-existing-gke-cluster/dummy", + ID: testGKEClusterModuleID} + + defaultProvider := map[string]PR{ + "google": TerraformProvider{ + Source: "hashicorp/google", + Version: "~> 6.13.0"}, + "google-beta": TerraformProvider{ + Source: "hashicorp/google-beta", + Version: "~> 6.13.0"}} { // no def PR, no group PR - match default values g := Group{Name: "clown"} @@ -129,7 +131,7 @@ func (s *zeroSuite) TestExpandProviders(c *C) { g := Group{ Name: "clown", Modules: []Module{testGKEClusterModule}} - defaultProvider["kubectl"] = kubectlProvider + defaultProvider["kubectl"] = testKubectlProvider noDefPr.expandProviders(&g) c.Check(g.TerraformProviders, DeepEquals, defaultProvider) delete(defaultProvider, "kubectl") @@ -139,7 +141,7 @@ func (s *zeroSuite) TestExpandProviders(c *C) { g := Group{ Name: "clown", Modules: []Module{testPreExistingGKEClusterModule}} - defaultProvider["kubectl"] = kubectlProvider + defaultProvider["kubectl"] = testKubectlProvider noDefPr.expandProviders(&g) c.Check(g.TerraformProviders, DeepEquals, defaultProvider) delete(defaultProvider, "kubectl") From 3361ed5f682eb2001fe3ef9bc59497b42bfd2544 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Mon, 16 Dec 2024 14:10:50 +0000 Subject: [PATCH 030/140] Update README with GKE parallelstore related example blueprint details --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 95cea23dae..b2a7bd7b3c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1527,7 +1527,7 @@ The blueprint contains the following: * A K8s Job that uses a parallelstore storage volume option. * A K8s Job that demonstrates ML training workload with parallelstore storage disk ops. -> **Warning**: In this example, when storage type `Parallelstore` is specified in `gke-storage` module. +> **Warning**: In this example blueprint, when storage type `Parallelstore` is specified in `gke-storage` module. > The lifecycle of the parallelstore is managed by the blueprint. > On glcuster destroy ops, the Parallelstore storage created will also be destroyed. > From 1f1f978f68308be5b838c3c440b99df7bcdd17b8 Mon Sep 17 00:00:00 2001 From: Ankit Kinra <1037624+ankitkinra@users.noreply.github.com> Date: Mon, 16 Dec 2024 22:10:41 +0000 Subject: [PATCH 031/140] Move gke a3u blueprints to develop to enable integration testing --- examples/gke-a3-ultragpu/README.md | 1 + .../gke-a3-ultragpu-deployment.yaml | 16 ++ examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml | 212 ++++++++++++++++++ examples/gke-a3-ultragpu/mglru-disable.yaml | 59 +++++ examples/gke-a3-ultragpu/nccl-installer.yaml | 80 +++++++ .../gke-a3-ultragpu/nccl-jobset-example.yaml | 208 +++++++++++++++++ .../gke-a3-ultragpu/nccl-test-32-node.yaml | 208 +++++++++++++++++ examples/gke-a3-ultragpu/nccl-test.yaml | 149 ++++++++++++ 8 files changed, 933 insertions(+) create mode 100644 examples/gke-a3-ultragpu/README.md create mode 100644 examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml create mode 100644 examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml create mode 100644 examples/gke-a3-ultragpu/mglru-disable.yaml create mode 100644 examples/gke-a3-ultragpu/nccl-installer.yaml create mode 100644 examples/gke-a3-ultragpu/nccl-jobset-example.yaml create mode 100644 examples/gke-a3-ultragpu/nccl-test-32-node.yaml create mode 100644 examples/gke-a3-ultragpu/nccl-test.yaml diff --git a/examples/gke-a3-ultragpu/README.md b/examples/gke-a3-ultragpu/README.md new file mode 100644 index 0000000000..73b37bbfcb --- /dev/null +++ b/examples/gke-a3-ultragpu/README.md @@ -0,0 +1 @@ +Refer to [AI Hypercomputer Documentation](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute#create-cluster) for instructions. \ No newline at end of file diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml new file mode 100644 index 0000000000..b7a8d24071 --- /dev/null +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml @@ -0,0 +1,16 @@ +--- + terraform_backend_defaults: + type: gcs + configuration: + bucket: BUCKET_NAME + + vars: + deployment_name: gke-a3-ultra + project_id: PROJECT_ID + region: COMPUTE_REGION + zone: COMPUTE_ZONE + authorized_cidr: / + # In order to not target a BLOCK_NAME, extended_reservation can be inputed as + # extended_reservation: RESERVATION_NAME + extended_reservation: RESERVATION_NAME/reservationBlocks/BLOCK_NAME + static_node_count: NODE_COUNT diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml new file mode 100644 index 0000000000..7069b90797 --- /dev/null +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml @@ -0,0 +1,212 @@ +# Copyright 2024 "Google LLC" +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +blueprint_name: gke-a3-ultra + +vars: + project_id: # add this + deployment_name: # add this + region: # add this + zone: # add this + # Cidr block containing the IP of the machine calling terraform. + # The following line must be updated for this example to work. + authorized_cidr: # add this + extended_reservation: # add this + # Installs NCCL library and Google NCCL plugin + # Runs an init container on all H200 GPU nodes with the NCCL plugin image + nccl_installer_path: $(ghpc_stage("./nccl-installer.yaml")) + # Temporary fix for COS issue, will be fixed in next release + mglru_disable_path: $(ghpc_stage("./mglru-disable.yaml")) + mtu_size: 8896 + static_node_count: # add this + system_node_pool_disk_size_gb: 200 + a3ultra_node_pool_disk_size_gb: 100 + +deployment_groups: +- group: primary + modules: + - id: gke-a3-ultra-net-0 + source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/network/vpc?ref=e0c690b + settings: + network_name: gke-a3-ultra-net-0 + subnetworks: + - subnet_name: gke-a3-ultra-sub-0 + subnet_region: $(vars.region) + subnet_ip: 192.168.0.0/18 + secondary_ranges: + gke-a3-ultra-sub-0: + - range_name: pods + ip_cidr_range: 10.4.0.0/14 + - range_name: services + ip_cidr_range: 10.0.32.0/20 + firewall_rules: + - name: gke-a3-ultra-internal-0 + ranges: [192.168.0.0/16] + allow: + - protocol: tcp + ports: ["0-65535"] + - protocol: udp + ports: ["0-65535"] + - protocol: icmp + + - id: gke-a3-ultra-net-1 + source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/network/vpc?ref=e0c690b + settings: + network_name: gke-a3-ultra-net-1 + mtu: $(vars.mtu_size) + subnetworks: + - subnet_name: gke-a3-ultra-sub-1 + subnet_region: $(vars.region) + subnet_ip: 192.168.64.0/18 + firewall_rules: + - name: gke-a3-ultra-internal-1 + ranges: [192.168.0.0/16] + allow: + - protocol: tcp + ports: ["0-65535"] + - protocol: udp + ports: ["0-65535"] + - protocol: icmp + + - id: gke-a3-ultra-rdma-net + source: github.com/GoogleCloudPlatform/cluster-toolkit.git//community/modules/network/rdma-vpc?ref=98c49fe + settings: + network_name: gke-a3-ultra-rdma-net + mtu: $(vars.mtu_size) + network_profile: https://www.googleapis.com/compute/beta/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-roce + network_routing_mode: REGIONAL + subnetworks_template: + name_prefix: gke-a3-ultra-rdma-sub + count: 8 + ip_range: 192.168.128.0/18 + region: $(vars.region) + + - id: a3-ultragpu-cluster + source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/scheduler/gke-cluster?ref=e0c690b + use: [gke-a3-ultra-net-0] + settings: + release_channel: RAPID + system_node_pool_machine_type: "e2-standard-16" + system_node_pool_disk_size_gb: $(vars.system_node_pool_disk_size_gb) + system_node_pool_taints: [] + enable_dcgm_monitoring: true + enable_gcsfuse_csi: true + enable_private_endpoint: false # Allows access from authorized public IPs + master_authorized_networks: + - cidr_block: $(vars.authorized_cidr) # Allows your machine to run the kubectl command. Required for multi network setup. + display_name: "kubectl-access-network" + maintenance_exclusions: + - name: no-minor-or-node-upgrades-indefinite + start_time: "2024-12-01T00:00:00Z" + end_time: "2025-12-22T00:00:00Z" + exclusion_scope: NO_MINOR_OR_NODE_UPGRADES + additional_networks: + $(concat( + [{ + network=gke-a3-ultra-net-1.network_name, + subnetwork=gke-a3-ultra-net-1.subnetwork_name, + subnetwork_project=vars.project_id, + nic_type="GVNIC", + queue_count=null, + network_ip=null, + stack_type=null, + access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], + ipv6_access_config=[], + alias_ip_range=[] + }], + gke-a3-ultra-rdma-net.subnetwork_interfaces_gke + )) + outputs: [instructions] + + - id: a3-ultragpu-pool + source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/compute/gke-node-pool?ref=e0c690b + use: [a3-ultragpu-cluster] + settings: + machine_type: a3-ultragpu-8g + auto_upgrade: true + zones: [$(vars.zone)] + disk_type: hyperdisk-balanced + disk_size_gb: $(vars.a3ultra_node_pool_disk_size_gb) + static_node_count: $(vars.static_node_count) + guest_accelerator: + - type: nvidia-h200-141gb + count: 8 + gpu_driver_installation_config: + gpu_driver_version: "LATEST" + reservation_affinity: + consume_reservation_type: SPECIFIC_RESERVATION + specific_reservations: + - name: $(vars.extended_reservation) + additional_networks: + $(concat( + [{ + network=gke-a3-ultra-net-1.network_name, + subnetwork=gke-a3-ultra-net-1.subnetwork_name, + subnetwork_project=vars.project_id, + nic_type="GVNIC", + queue_count=null, + network_ip=null, + stack_type=null, + access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], + ipv6_access_config=[], + alias_ip_range=[] + }], + gke-a3-ultra-rdma-net.subnetwork_interfaces_gke + )) + outputs: [instructions] + + - id: topology-aware-scheduler-install + source: github.com/GoogleCloudPlatform/cluster-toolkit.git//community/modules/compute/gke-topology-scheduler?ref=e0c690b + use: [a3-ultragpu-cluster] + + - id: workload-manager-install + source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/management/kubectl-apply?ref=e0c690b + use: [a3-ultragpu-cluster] + settings: + kueue: + install: true + version: v0.9.1 + jobset: + install: true + version: v0.7.1 + apply_manifests: + - source: $(vars.nccl_installer_path) + - source: $(vars.mglru_disable_path) + + - id: job-template + source: modules/compute/gke-job-template + use: [a3-ultragpu-pool] + settings: + image: nvidia/cuda:11.0.3-runtime-ubuntu20.04 + command: + - nvidia-smi + node_count: 2 + name: run-nvidia-smi + outputs: [instructions] + +terraform_providers: + google: + source: hashicorp/google + version: 6.13.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + google-beta: + source: hashicorp/google-beta + version: 6.13.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) diff --git a/examples/gke-a3-ultragpu/mglru-disable.yaml b/examples/gke-a3-ultragpu/mglru-disable.yaml new file mode 100644 index 0000000000..f0bc1c8caf --- /dev/null +++ b/examples/gke-a3-ultragpu/mglru-disable.yaml @@ -0,0 +1,59 @@ +# Copyright 2024 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: disable-mglru + namespace: kube-system +spec: + selector: + matchLabels: + app: disable-mglru + template: + metadata: + labels: + app: disable-mglru + spec: + hostNetwork: true + tolerations: + - operator: "Exists" + key: nvidia.com/gpu + containers: + - name: disable-mglru + image: alpine:latest + command: ["/bin/sh"] + securityContext: + privileged: true + args: + - -c + - | + echo n | tee /sys/kernel/mm/lru_gen/enabled + sysctl -w net.ipv4.conf.eth2.log_martians=0 + sysctl -w net.ipv4.conf.eth3.log_martians=0 + sysctl -w net.ipv4.conf.eth4.log_martians=0 + sysctl -w net.ipv4.conf.eth5.log_martians=0 + sysctl -w net.ipv4.conf.eth6.log_martians=0 + sysctl -w net.ipv4.conf.eth7.log_martians=0 + sysctl -w net.ipv4.conf.eth8.log_martians=0 + sysctl -w net.ipv4.conf.eth9.log_martians=0 + sleep infinity + volumeMounts: + - name: sys-kernel-mm-lru-gen + mountPath: /sys/kernel/mm/lru_gen + # Remount sysfs so that it will be writable. + volumes: + - name: sys-kernel-mm-lru-gen + hostPath: + path: /sys/kernel/mm/lru_gen diff --git a/examples/gke-a3-ultragpu/nccl-installer.yaml b/examples/gke-a3-ultragpu/nccl-installer.yaml new file mode 100644 index 0000000000..f2239b2584 --- /dev/null +++ b/examples/gke-a3-ultragpu/nccl-installer.yaml @@ -0,0 +1,80 @@ +# Copyright 2024 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nccl-rdma-installer + namespace: kube-system + labels: + k8s-app: nccl-rdma-installer +spec: + selector: + matchLabels: + k8s-app: nccl-rdma-installer + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + name: nccl-rdma-installer + k8s-app: nccl-rdma-installer + spec: + priorityClassName: system-node-critical + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: cloud.google.com/gke-accelerator + operator: In + values: + - nvidia-h200-141gb + tolerations: + - operator: "Exists" + hostNetwork: true + hostPID: true + volumes: + - name: library-dir-host + hostPath: + path: /home/kubernetes/bin/nvidia/lib64 + type: DirectoryOrCreate + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + initContainers: + - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + name: nccl-rdma-installer + resources: + requests: + cpu: 150m + securityContext: + privileged: true + volumeMounts: + - name: library-dir-host + mountPath: /usr/local/home/kubernetes/bin/nvidia/lib64 + - name: gib + mountPath: /usr/local/home/kubernetes/bin/gib + command: ["/bin/sh", "-c"] + args: + - | + set -ex + /scripts/container_entry.sh install --install-nccl + cp -r /var/lib/gib/lib64/. /usr/local/home/kubernetes/bin/nvidia/lib64 + cp -r /var/lib/gib/. /usr/local/home/kubernetes/bin/gib + ibv_devinfo || exit 1 + echo "installation finishes" + containers: + - image: "gke.gcr.io/pause:3.8@sha256:880e63f94b145e46f1b1082bb71b85e21f16b99b180b9996407d61240ceb9830" + name: pause diff --git a/examples/gke-a3-ultragpu/nccl-jobset-example.yaml b/examples/gke-a3-ultragpu/nccl-jobset-example.yaml new file mode 100644 index 0000000000..da49668d0a --- /dev/null +++ b/examples/gke-a3-ultragpu/nccl-jobset-example.yaml @@ -0,0 +1,208 @@ +apiVersion: jobset.x-k8s.io/v1alpha2 +kind: JobSet +metadata: + generateName: ag-4- + namespace: default +spec: + ttlSecondsAfterFinished: 1200 + suspend: False + network: + enableDNSHostnames: true + replicatedJobs: + - name: w + template: + spec: + parallelism: 4 + completions: 4 + + template: + metadata: + annotations: + networking.gke.io/default-interface: 'eth0' + networking.gke.io/interfaces: | + [ + {"interfaceName":"eth0","network":"default"}, + {"interfaceName":"eth1","network":"gke-a3-ultra-sub-1"}, + {"interfaceName":"eth2","network":"gke-a3-ultra-rdma-sub-0"}, + {"interfaceName":"eth3","network":"gke-a3-ultra-rdma-sub-1"}, + {"interfaceName":"eth4","network":"gke-a3-ultra-rdma-sub-2"}, + {"interfaceName":"eth5","network":"gke-a3-ultra-rdma-sub-3"}, + {"interfaceName":"eth6","network":"gke-a3-ultra-rdma-sub-4"}, + {"interfaceName":"eth7","network":"gke-a3-ultra-rdma-sub-5"}, + {"interfaceName":"eth8","network":"gke-a3-ultra-rdma-sub-6"}, + {"interfaceName":"eth9","network":"gke-a3-ultra-rdma-sub-7"} + ] + spec: + # Limit benchmark run duration + activeDeadlineSeconds: 3600 + restartPolicy: Never + nodeSelector: + cloud.google.com/gke-nodepool: a3-ultragpu-8g-a3-ultragpu-pool + tolerations: + - key: cloud.google.com/gke-queued + effect: NoSchedule + value: "true" + + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" + + setHostnameAsFQDN: true + volumes: + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + - name: nvidia + hostPath: + path: /home/kubernetes/bin/nvidia + - name: lib64 + hostPath: + path: /lib64 + - name: shared-memory + emptyDir: + medium: "Memory" + sizeLimit: 250Gi + - name: sys + hostPath: + path: /sys + - name: proc-sys + hostPath: + path: /proc/sys + schedulingGates: + # Set this to a unique name per job. + - name: "gke.io/topology-aware-auto-ag-4" + + initContainers: + - name: gpu-healthcheck + image: alpine:latest + command: ["/bin/sh", "-c"] + args: + - | + apk add --no-cache bash # Install bash + /bin/bash -c "set -ex + NUM_GPUS=$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits | wc -l) + if [ \${NUM_GPUS} -lt 8 ]; then + echo \"Error: Only \${NUM_GPUS} GPUs and expected 8\" + exit 1 + fi + gpu_errors=(\$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=ecc.errors.uncorrected.volatile.total --format=csv,noheader,nounits)) + for gpu_index in \${!gpu_errors[@]}; do + if [ \${gpu_errors[\$gpu_index]} == '[N/A]' ]; then + echo 'Error: ERR detected in GPU index '\$gpu_index + exit 1 + elif [ \${gpu_errors[\$gpu_index]} -gt 0 ]; then + echo 'Error: Unrecoverable ECC errors detected in GPU index '\$gpu_index + exit 1 + fi + done + echo \${NUM_GPUS} GPUs found with no ERR or Unrecoverable ECC errors" + + volumeMounts: + - name: nvidia + mountPath: /usr/local/nvidia + - name: lib64 + mountPath: /lib64 + securityContext: + privileged: true + env: + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + + containers: + - name: nccl + stdin: true + tty: true + image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OMPI_ALLOW_RUN_AS_ROOT + value: "1" + - name: OMPI_ALLOW_RUN_AS_ROOT_CONFIRM + value: "1" + command: + - bash + - -c + - | + set -x + export N_NODES=4 + echo "Starting workload container on ${MY_NODE_NAME} for $N_NODES benchmark" + + # Load all the cuda libs + /sbin/ldconfig + + # Install ping + apt update -y + apt install -y iputils-ping + + # Start sshd + /scripts/container_entry.sh daemon & + + # Get helper variables to form all hostnames + export POSTFIX=$(hostname | cut -d . -f 2-) + export WORKERS_BASENAME=$(hostname | cut -d . -f 1 | rev | cut -d - -f 2- | rev ) + export NODE_RANK=$JOB_COMPLETION_INDEX + + + # For every worker, wait till online and add to hostfile + for i in `seq 0 $(($N_NODES-1))`; do + OTHER=${WORKERS_BASENAME}-${i}.${POSTFIX} + until ssh -p 222 -o StrictHostKeyChecking=no $OTHER hostname; do + echo Waiting for ${OTHER}... + sleep 10 + done + echo ${OTHER} port=222 slots=8 | tee -a /tmp/hostfile; + done + + cat /tmp/hostfile + + # Launch from head node + if [[ "${NODE_RANK}" -eq "0" ]]; then + + # World Level = 0x0, Rail Aligned = 0x7 + export NCCL_TESTS_SPLIT_MASK="0x0"; + + # Force use of libnccl-gib + export NCCL_NET=gIB + + # Set all the correct libnccl-gib environment variables + source /usr/local/gib/scripts/set_nccl_env.sh + + # Get all relevant NCCL / env vars to pass to all workers + ENV_VARS=$(echo ${!NCCL*} ${!OMPI*} LD_LIBRARY_PATH PATH | sed 's/ / -x /g') + + mpirun --hostfile /tmp/hostfile \ + -x $ENV_VARS \ + -mca plm_rsh_no_tree_spawn 1 \ + --mca orte_keep_fqdn_hostnames 1 \ + --mca btl self,tcp \ + --mca btl_tcp_if_include eth0 \ + --bind-to none \ + --mca plm_rsh_agent "ssh -q -o LogLevel=ERROR -o StrictHostKeyChecking=no -p 222" \ + /third_party/nccl-tests/build/all_gather_perf -b 1K -e 8G -f 2 -g 1 -w 5 --iters 100 -c 1 + + else + while ping -c 1 ${WORKERS_BASENAME}-0.${POSTFIX}; do + sleep 5 + done + fi + + exit 0 + + volumeMounts: + - name: nvidia + mountPath: /usr/local/nvidia + - name: gib + mountPath: /usr/local/gib + - name: shared-memory + mountPath: /dev/shm + resources: + limits: + nvidia.com/gpu: 8 + requests: + nvidia.com/gpu: 8 + restartPolicy: Never diff --git a/examples/gke-a3-ultragpu/nccl-test-32-node.yaml b/examples/gke-a3-ultragpu/nccl-test-32-node.yaml new file mode 100644 index 0000000000..3ce2b490d6 --- /dev/null +++ b/examples/gke-a3-ultragpu/nccl-test-32-node.yaml @@ -0,0 +1,208 @@ +apiVersion: jobset.x-k8s.io/v1alpha2 +kind: JobSet +metadata: + generateName: ag-32- + namespace: default +spec: + ttlSecondsAfterFinished: 1200 + suspend: False + network: + enableDNSHostnames: true + replicatedJobs: + - name: w + template: + spec: + parallelism: 32 + completions: 32 + + template: + metadata: + annotations: + networking.gke.io/default-interface: 'eth0' + networking.gke.io/interfaces: | + [ + {"interfaceName":"eth0","network":"default"}, + {"interfaceName":"eth1","network":"gke-a3-ultra-sub-1"}, + {"interfaceName":"eth2","network":"gke-a3-ultra-rdma-sub-0"}, + {"interfaceName":"eth3","network":"gke-a3-ultra-rdma-sub-1"}, + {"interfaceName":"eth4","network":"gke-a3-ultra-rdma-sub-2"}, + {"interfaceName":"eth5","network":"gke-a3-ultra-rdma-sub-3"}, + {"interfaceName":"eth6","network":"gke-a3-ultra-rdma-sub-4"}, + {"interfaceName":"eth7","network":"gke-a3-ultra-rdma-sub-5"}, + {"interfaceName":"eth8","network":"gke-a3-ultra-rdma-sub-6"}, + {"interfaceName":"eth9","network":"gke-a3-ultra-rdma-sub-7"} + ] + spec: + # Limit benchmark run duration + activeDeadlineSeconds: 3600 + restartPolicy: Never + nodeSelector: + cloud.google.com/gke-nodepool: a3-ultragpu-8g-a3-ultragpu-pool + tolerations: + - key: cloud.google.com/gke-queued + effect: NoSchedule + value: "true" + + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" + + setHostnameAsFQDN: true + volumes: + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + - name: nvidia + hostPath: + path: /home/kubernetes/bin/nvidia + - name: lib64 + hostPath: + path: /lib64 + - name: shared-memory + emptyDir: + medium: "Memory" + sizeLimit: 250Gi + - name: sys + hostPath: + path: /sys + - name: proc-sys + hostPath: + path: /proc/sys + schedulingGates: + # Set this to a unique name per job. + - name: "gke.io/topology-aware-auto-ag-32" + + initContainers: + - name: gpu-healthcheck + image: alpine:latest + command: ["/bin/sh", "-c"] + args: + - | + apk add --no-cache bash # Install bash + /bin/bash -c "set -ex + NUM_GPUS=$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits | wc -l) + if [ \${NUM_GPUS} -lt 8 ]; then + echo \"Error: Only \${NUM_GPUS} GPUs and expected 8\" + exit 1 + fi + gpu_errors=(\$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=ecc.errors.uncorrected.volatile.total --format=csv,noheader,nounits)) + for gpu_index in \${!gpu_errors[@]}; do + if [ \${gpu_errors[\$gpu_index]} == '[N/A]' ]; then + echo 'Error: ERR detected in GPU index '\$gpu_index + exit 1 + elif [ \${gpu_errors[\$gpu_index]} -gt 0 ]; then + echo 'Error: Unrecoverable ECC errors detected in GPU index '\$gpu_index + exit 1 + fi + done + echo \${NUM_GPUS} GPUs found with no ERR or Unrecoverable ECC errors" + + volumeMounts: + - name: nvidia + mountPath: /usr/local/nvidia + - name: lib64 + mountPath: /lib64 + securityContext: + privileged: true + env: + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + + containers: + - name: nccl + stdin: true + tty: true + image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OMPI_ALLOW_RUN_AS_ROOT + value: "1" + - name: OMPI_ALLOW_RUN_AS_ROOT_CONFIRM + value: "1" + command: + - bash + - -c + - | + set -x + export N_NODES=32 + echo "Starting workload container on ${MY_NODE_NAME} for $N_NODES benchmark" + + # Load all the cuda libs + /sbin/ldconfig + + # Install ping + apt update -y + apt install -y iputils-ping + + # Start sshd + /scripts/container_entry.sh daemon & + + # Get helper variables to form all hostnames + export POSTFIX=$(hostname | cut -d . -f 2-) + export WORKERS_BASENAME=$(hostname | cut -d . -f 1 | rev | cut -d - -f 2- | rev ) + export NODE_RANK=$JOB_COMPLETION_INDEX + + + # For every worker, wait till online and add to hostfile + for i in `seq 0 $(($N_NODES-1))`; do + OTHER=${WORKERS_BASENAME}-${i}.${POSTFIX} + until ssh -p 222 -o StrictHostKeyChecking=no $OTHER hostname; do + echo Waiting for ${OTHER}... + sleep 10 + done + echo ${OTHER} port=222 slots=8 | tee -a /tmp/hostfile; + done + + cat /tmp/hostfile + + # Launch from head node + if [[ "${NODE_RANK}" -eq "0" ]]; then + + # World Level = 0x0, Rail Aligned = 0x7 + export NCCL_TESTS_SPLIT_MASK="0x0"; + + # Force use of libnccl-gib + export NCCL_NET=gIB + + # Set all the correct libnccl-gib environment variables + source /usr/local/gib/scripts/set_nccl_env.sh + + # Get all relevant NCCL / env vars to pass to all workers + ENV_VARS=$(echo ${!NCCL*} ${!OMPI*} LD_LIBRARY_PATH PATH | sed 's/ / -x /g') + + mpirun --hostfile /tmp/hostfile \ + -x $ENV_VARS \ + -mca plm_rsh_no_tree_spawn 1 \ + --mca orte_keep_fqdn_hostnames 1 \ + --mca btl self,tcp \ + --mca btl_tcp_if_include eth0 \ + --bind-to none \ + --mca plm_rsh_agent "ssh -q -o LogLevel=ERROR -o StrictHostKeyChecking=no -p 222" \ + /third_party/nccl-tests/build/all_gather_perf -b 1K -e 8G -f 2 -g 1 -w 5 --iters 100 -c 1 + + else + while ping -c 1 ${WORKERS_BASENAME}-0.${POSTFIX}; do + sleep 5 + done + fi + + exit 0 + + volumeMounts: + - name: nvidia + mountPath: /usr/local/nvidia + - name: gib + mountPath: /usr/local/gib + - name: shared-memory + mountPath: /dev/shm + resources: + limits: + nvidia.com/gpu: 8 + requests: + nvidia.com/gpu: 8 + restartPolicy: Never diff --git a/examples/gke-a3-ultragpu/nccl-test.yaml b/examples/gke-a3-ultragpu/nccl-test.yaml new file mode 100644 index 0000000000..994601472f --- /dev/null +++ b/examples/gke-a3-ultragpu/nccl-test.yaml @@ -0,0 +1,149 @@ +# Copyright 2024 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Service +metadata: + name: nccl-host-1 +spec: + selector: + name: nccl-host-1 + clusterIP: None +--- +apiVersion: v1 +kind: Service +metadata: + name: nccl-host-2 +spec: + selector: + name: nccl-host-2 + clusterIP: None +--- +apiVersion: v1 +kind: Pod +metadata: + name: nccl-test-host-1 + labels: + name: nccl-host-1 + annotations: + networking.gke.io/default-interface: 'eth0' + networking.gke.io/interfaces: | + [ + {"interfaceName":"eth0","network":"default"}, + {"interfaceName":"eth1","network":"gke-a3-ultra-sub-1"}, + {"interfaceName":"eth2","network":"gke-a3-ultra-rdma-sub-0"}, + {"interfaceName":"eth3","network":"gke-a3-ultra-rdma-sub-1"}, + {"interfaceName":"eth4","network":"gke-a3-ultra-rdma-sub-2"}, + {"interfaceName":"eth5","network":"gke-a3-ultra-rdma-sub-3"}, + {"interfaceName":"eth6","network":"gke-a3-ultra-rdma-sub-4"}, + {"interfaceName":"eth7","network":"gke-a3-ultra-rdma-sub-5"}, + {"interfaceName":"eth8","network":"gke-a3-ultra-rdma-sub-6"}, + {"interfaceName":"eth9","network":"gke-a3-ultra-rdma-sub-7"} + ] +spec: + volumes: + - name: library-dir-host + hostPath: + path: /home/kubernetes/bin/nvidia + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + - name: shared-memory + emptyDir: + medium: "Memory" + sizeLimit: 250Gi + containers: + - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + name: test + resources: + requests: + cpu: 150m + volumeMounts: + - name: library-dir-host + mountPath: /usr/local/nvidia + - name: gib + mountPath: /usr/local/gib + - name: shared-memory + mountPath: /dev/shm + env: + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + resources: + limits: + nvidia.com/gpu: 8 + command: ["/bin/bash", "-c"] + args: + - | + /scripts/container_entry.sh shell + source /usr/local/gib/scripts/set_nccl_env.sh + sleep infinity +--- +apiVersion: v1 +kind: Pod +metadata: + name: nccl-test-host-2 + labels: + name: nccl-host-2 + annotations: + networking.gke.io/default-interface: 'eth0' + networking.gke.io/interfaces: | + [ + {"interfaceName":"eth0","network":"default"}, + {"interfaceName":"eth1","network":"gke-a3-ultra-sub-1"}, + {"interfaceName":"eth2","network":"gke-a3-ultra-rdma-sub-0"}, + {"interfaceName":"eth3","network":"gke-a3-ultra-rdma-sub-1"}, + {"interfaceName":"eth4","network":"gke-a3-ultra-rdma-sub-2"}, + {"interfaceName":"eth5","network":"gke-a3-ultra-rdma-sub-3"}, + {"interfaceName":"eth6","network":"gke-a3-ultra-rdma-sub-4"}, + {"interfaceName":"eth7","network":"gke-a3-ultra-rdma-sub-5"}, + {"interfaceName":"eth8","network":"gke-a3-ultra-rdma-sub-6"}, + {"interfaceName":"eth9","network":"gke-a3-ultra-rdma-sub-7"} + ] +spec: + volumes: + - name: library-dir-host + hostPath: + path: /home/kubernetes/bin/nvidia + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + - name: shared-memory + emptyDir: + medium: "Memory" + sizeLimit: 250Gi + containers: + - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + name: test + resources: + requests: + cpu: 150m + volumeMounts: + - name: library-dir-host + mountPath: /usr/local/nvidia + - name: gib + mountPath: /usr/local/gib + - name: shared-memory + mountPath: /dev/shm + env: + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + resources: + limits: + nvidia.com/gpu: 8 + command: ["/bin/bash", "-c"] + args: + - | + /scripts/container_entry.sh shell + source /usr/local/gib/scripts/set_nccl_env.sh + sleep infinity From ab6bc3b0c404d5773b62c6e8157d29ea2d7a6a11 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Tue, 17 Dec 2024 06:33:34 +0000 Subject: [PATCH 032/140] Updated blueprint name from gke-storage-parallelstore to gke-storage-managed-parallelstore --- examples/README.md | 10 +++++----- ...ore.yaml => gke-storage-managed-parallelstore.yaml} | 4 ++-- modules/file-system/gke-storage/README.md | 2 +- ...ore.yaml => gke-storage-managed-parallelstore.yaml} | 6 +++--- ...store.yml => gke-storage-managed-parallelstore.yml} | 8 ++++---- 5 files changed, 15 insertions(+), 15 deletions(-) rename examples/{gke-storage-parallelstore.yaml => gke-storage-managed-parallelstore.yaml} (98%) rename tools/cloud-build/daily-tests/builds/{gke-storage-parallelstore.yaml => gke-storage-managed-parallelstore.yaml} (93%) rename tools/cloud-build/daily-tests/tests/{gke-storage-parallelstore.yml => gke-storage-managed-parallelstore.yml} (77%) diff --git a/examples/README.md b/examples/README.md index b2a7bd7b3c..73272df3cb 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1518,14 +1518,14 @@ cleaned up when the job is deleted. [storage-gke.yaml]: ../examples/storage-gke.yaml -### [gke-storage-parallelstore.yaml] ![core-badge] ![experimental-badge] +### [gke-storage-managed-parallelstore.yaml] ![core-badge] ![experimental-badge] -This blueprint shows how to use parallelstore storage options with GKE in the toolkit. +This blueprint shows how to use managed parallelstore storage options with GKE in the toolkit. The blueprint contains the following: -* A K8s Job that uses a parallelstore storage volume option. -* A K8s Job that demonstrates ML training workload with parallelstore storage disk ops. +* A K8s Job that uses a managed parallelstore storage volume option. +* A K8s Job that demonstrates ML training workload with managed parallelstore storage disk ops. > **Warning**: In this example blueprint, when storage type `Parallelstore` is specified in `gke-storage` module. > The lifecycle of the parallelstore is managed by the blueprint. @@ -1540,7 +1540,7 @@ The blueprint contains the following: > `--vars authorized_cidr=/32`.** You can use a service like > [whatismyip.com](https://whatismyip.com) to determine your IP address. -[gke-storage-parallelstore.yaml]: ../examples/gke-storage-parallelstore.yaml +[gke-storage-managed-parallelstore.yaml]: ../examples/gke-storage-managed-parallelstore.yaml ### [gke-a3-megagpu.yaml] ![core-badge] ![experimental-badge] diff --git a/examples/gke-storage-parallelstore.yaml b/examples/gke-storage-managed-parallelstore.yaml similarity index 98% rename from examples/gke-storage-parallelstore.yaml rename to examples/gke-storage-managed-parallelstore.yaml index ac8f5773b9..414a2b180d 100644 --- a/examples/gke-storage-parallelstore.yaml +++ b/examples/gke-storage-managed-parallelstore.yaml @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -blueprint_name: gke-storage-parallelstore +blueprint_name: gke-storage-managed-parallelstore vars: project_id: ## Set GCP Project ID Here ## - deployment_name: gke-storage-ps + deployment_name: gke-storage-managed-ps region: us-central1 zone: us-central1-c # Cidr block containing the IP of the machine calling terraform. diff --git a/modules/file-system/gke-storage/README.md b/modules/file-system/gke-storage/README.md index 17c718aa37..f4ebd8add0 100644 --- a/modules/file-system/gke-storage/README.md +++ b/modules/file-system/gke-storage/README.md @@ -39,7 +39,7 @@ then use them in a `gke-job-template` to dynamically provision the resource. ``` See example -[gke-storage-parallelstore.yaml](../../../examples/README.md#gke-storage-parallelstoreyaml--) blueprint +[gke-storage-managed-parallelstore.yaml](../../../examples/README.md#gke-storage-managed-parallelstoreyaml--) blueprint for a complete example. ### Authorized Network diff --git a/tools/cloud-build/daily-tests/builds/gke-storage-parallelstore.yaml b/tools/cloud-build/daily-tests/builds/gke-storage-managed-parallelstore.yaml similarity index 93% rename from tools/cloud-build/daily-tests/builds/gke-storage-parallelstore.yaml rename to tools/cloud-build/daily-tests/builds/gke-storage-managed-parallelstore.yaml index a51c8cebab..8fbc9c1794 100644 --- a/tools/cloud-build/daily-tests/builds/gke-storage-parallelstore.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-storage-managed-parallelstore.yaml @@ -27,7 +27,7 @@ timeout: 14400s # 4hr steps: ## Test GKE -- id: gke-storage-parallelstore +- id: gke-storage-managed-parallelstore name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: @@ -40,7 +40,7 @@ steps: cd /workspace && make BUILD_ID_FULL=$BUILD_ID BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - SG_EXAMPLE=examples/gke-storage-parallelstore.yaml + SG_EXAMPLE=examples/gke-storage-managed-parallelstore.yaml # adding vm to act as remote node echo ' - id: remote-node' >> $${SG_EXAMPLE} @@ -58,4 +58,4 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-storage-parallelstore.yml" + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-storage-managed-parallelstore.yml" diff --git a/tools/cloud-build/daily-tests/tests/gke-storage-parallelstore.yml b/tools/cloud-build/daily-tests/tests/gke-storage-managed-parallelstore.yml similarity index 77% rename from tools/cloud-build/daily-tests/tests/gke-storage-parallelstore.yml rename to tools/cloud-build/daily-tests/tests/gke-storage-managed-parallelstore.yml index a6de4bf239..bfb8bc32d7 100644 --- a/tools/cloud-build/daily-tests/tests/gke-storage-parallelstore.yml +++ b/tools/cloud-build/daily-tests/tests/gke-storage-managed-parallelstore.yml @@ -12,16 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -test_name: gke-storage-parallelstore -deployment_name: gke-storage-parallelstore-{{ build }} +test_name: gke-storage-managed-parallelstore +deployment_name: gke-storage-managed-parallelstore-{{ build }} zone: us-central1-a # for remote node region: us-central1 workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/gke-storage-parallelstore.yaml" +blueprint_yaml: "{{ workspace }}/examples/gke-storage-managed-parallelstore.yaml" network: "{{ deployment_name }}-net" remote_node: "{{ deployment_name }}-0" post_deploy_tests: -- test-validation/test-gke-storage-parallelstore.yml +- test-validation/test-gke-storage-managed-parallelstore.yml custom_vars: project: "{{ project }}" cli_deployment_vars: From 270ccb756299a9f01c1a3c8b9829c6195dd368d4 Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Tue, 17 Dec 2024 09:38:42 +0000 Subject: [PATCH 033/140] integrating kueue v0.10.0 to enable TAS with rank ordering support --- .../manifests/kueue-v0.10.0.yaml | 13184 ++++++++++++++++ modules/management/kubectl-apply/variables.tf | 2 +- .../blueprints/gke-a2-highgpu.yaml | 2 +- 3 files changed, 13186 insertions(+), 2 deletions(-) create mode 100644 modules/management/kubectl-apply/manifests/kueue-v0.10.0.yaml diff --git a/modules/management/kubectl-apply/manifests/kueue-v0.10.0.yaml b/modules/management/kubectl-apply/manifests/kueue-v0.10.0.yaml new file mode 100644 index 0000000000..696e9b1ffb --- /dev/null +++ b/modules/management/kubectl-apply/manifests/kueue-v0.10.0.yaml @@ -0,0 +1,13184 @@ +# Copyright 2024 "Google LLC" +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-system +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: admissionchecks.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: AdmissionCheck + listKind: AdmissionCheckList + plural: admissionchecks + singular: admissioncheck + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: AdmissionCheck is the Schema for the admissionchecks API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: AdmissionCheckSpec defines the desired state of AdmissionCheck + properties: + controllerName: + description: |- + controllerName identifies the controller that processes the AdmissionCheck, + not necessarily a Kubernetes Pod or Deployment name. Cannot be empty. + type: string + x-kubernetes-validations: + - message: field is immutable + rule: self == oldSelf + parameters: + description: |- + Parameters identifies a configuration with additional parameters for the + check. + properties: + apiGroup: + description: ApiGroup is the group for the resource being referenced. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + description: Kind is the type of the resource being referenced. + maxLength: 63 + pattern: ^(?i)[a-z]([-a-z0-9]*[a-z0-9])?$ + type: string + name: + description: Name is the name of the resource being referenced. + maxLength: 63 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + required: + - apiGroup + - kind + - name + type: object + retryDelayMinutes: + default: 15 + description: |- + RetryDelayMinutes specifies how long to keep the workload suspended after + a failed check (after it transitioned to False). When the delay period has passed, the check + state goes to "Unknown". The default is 15 min. + Deprecated: retryDelayMinutes has already been deprecated since v0.8 and will be removed in v1beta2. + format: int64 + type: integer + required: + - controllerName + type: object + status: + description: AdmissionCheckStatus defines the observed state of AdmissionCheck + properties: + conditions: + description: |- + conditions hold the latest available observations of the AdmissionCheck + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: clusterqueues.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: ClusterQueue + listKind: ClusterQueueList + plural: clusterqueues + shortNames: + - cq + singular: clusterqueue + scope: Cluster + versions: + - additionalPrinterColumns: + - description: Cohort that this ClusterQueue belongs to + jsonPath: .spec.cohort + name: Cohort + type: string + - description: The queueing strategy used to prioritize workloads + jsonPath: .spec.queueingStrategy + name: Strategy + priority: 1 + type: string + - description: Number of pending workloads + jsonPath: .status.pendingWorkloads + name: Pending Workloads + type: integer + - description: Number of admitted workloads that haven't finished yet + jsonPath: .status.admittedWorkloads + name: Admitted Workloads + priority: 1 + type: integer + name: v1beta1 + schema: + openAPIV3Schema: + description: ClusterQueue is the Schema for the clusterQueue API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ClusterQueueSpec defines the desired state of ClusterQueue + properties: + admissionChecks: + description: |- + admissionChecks lists the AdmissionChecks required by this ClusterQueue. + Cannot be used along with AdmissionCheckStrategy. + items: + type: string + type: array + admissionChecksStrategy: + description: |- + admissionCheckStrategy defines a list of strategies to determine which ResourceFlavors require AdmissionChecks. + This property cannot be used in conjunction with the 'admissionChecks' property. + properties: + admissionChecks: + description: admissionChecks is a list of strategies for AdmissionChecks + items: + description: AdmissionCheckStrategyRule defines rules for a + single AdmissionCheck + properties: + name: + description: name is an AdmissionCheck's name. + type: string + onFlavors: + description: |- + onFlavors is a list of ResourceFlavors' names that this AdmissionCheck should run for. + If empty, the AdmissionCheck will run for all workloads submitted to the ClusterQueue. + items: + description: ResourceFlavorReference is the name of the + ResourceFlavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + type: array + required: + - name + type: object + type: array + type: object + cohort: + description: |- + cohort that this ClusterQueue belongs to. CQs that belong to the + same cohort can borrow unused resources from each other. + + A CQ can be a member of a single borrowing cohort. A workload submitted + to a queue referencing this CQ can borrow quota from any CQ in the cohort. + Only quota for the [resource, flavor] pairs listed in the CQ can be + borrowed. + If empty, this ClusterQueue cannot borrow from any other ClusterQueue and + vice versa. + + A cohort is a name that links CQs together, but it doesn't reference any + object. + + Validation of a cohort name is equivalent to that of object names: + subdomain in DNS (RFC 1123). + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + fairSharing: + description: |- + fairSharing defines the properties of the ClusterQueue when participating in fair sharing. + The values are only relevant if fair sharing is enabled in the Kueue configuration. + properties: + weight: + anyOf: + - type: integer + - type: string + default: 1 + description: |- + weight gives a comparative advantage to this ClusterQueue when competing for unused + resources in the cohort against other ClusterQueues. + The share of a ClusterQueue is based on the dominant resource usage above nominal + quotas for each resource, divided by the weight. + Admission prioritizes scheduling workloads from ClusterQueues with the lowest share + and preempting workloads from the ClusterQueues with the highest share. + A zero weight implies infinite share value, meaning that this ClusterQueue will always + be at disadvantage against other ClusterQueues. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + flavorFungibility: + default: {} + description: |- + flavorFungibility defines whether a workload should try the next flavor + before borrowing or preempting in the flavor being evaluated. + properties: + whenCanBorrow: + default: Borrow + description: |- + whenCanBorrow determines whether a workload should try the next flavor + before borrowing in current flavor. The possible values are: + + - `Borrow` (default): allocate in current flavor if borrowing + is possible. + - `TryNextFlavor`: try next flavor even if the current + flavor has enough resources to borrow. + enum: + - Borrow + - TryNextFlavor + type: string + whenCanPreempt: + default: TryNextFlavor + description: |- + whenCanPreempt determines whether a workload should try the next flavor + before borrowing in current flavor. The possible values are: + + - `Preempt`: allocate in current flavor if it's possible to preempt some workloads. + - `TryNextFlavor` (default): try next flavor even if there are enough + candidates for preemption in the current flavor. + enum: + - Preempt + - TryNextFlavor + type: string + type: object + namespaceSelector: + description: |- + namespaceSelector defines which namespaces are allowed to submit workloads to + this clusterQueue. Beyond this basic support for policy, a policy agent like + Gatekeeper should be used to enforce more advanced policies. + Defaults to null which is a nothing selector (no namespaces eligible). + If set to an empty selector `{}`, then all namespaces are eligible. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + preemption: + default: {} + description: |- + preemption describes policies to preempt Workloads from this ClusterQueue + or the ClusterQueue's cohort. + + Preemption can happen in two scenarios: + + - When a Workload fits within the nominal quota of the ClusterQueue, but + the quota is currently borrowed by other ClusterQueues in the cohort. + Preempting Workloads in other ClusterQueues allows this ClusterQueue to + reclaim its nominal quota. + - When a Workload doesn't fit within the nominal quota of the ClusterQueue + and there are admitted Workloads in the ClusterQueue with lower priority. + + The preemption algorithm tries to find a minimal set of Workloads to + preempt to accomomdate the pending Workload, preempting Workloads with + lower priority first. + properties: + borrowWithinCohort: + default: {} + description: |- + borrowWithinCohort provides configuration to allow preemption within + cohort while borrowing. + properties: + maxPriorityThreshold: + description: |- + maxPriorityThreshold allows to restrict the set of workloads which + might be preempted by a borrowing workload, to only workloads with + priority less than or equal to the specified threshold priority. + When the threshold is not specified, then any workload satisfying the + policy can be preempted by the borrowing workload. + format: int32 + type: integer + policy: + default: Never + description: |- + policy determines the policy for preemption to reclaim quota within cohort while borrowing. + Possible values are: + - `Never` (default): do not allow for preemption, in other + ClusterQueues within the cohort, for a borrowing workload. + - `LowerPriority`: allow preemption, in other ClusterQueues + within the cohort, for a borrowing workload, but only if + the preempted workloads are of lower priority. + enum: + - Never + - LowerPriority + type: string + type: object + reclaimWithinCohort: + default: Never + description: |- + reclaimWithinCohort determines whether a pending Workload can preempt + Workloads from other ClusterQueues in the cohort that are using more than + their nominal quota. The possible values are: + + - `Never` (default): do not preempt Workloads in the cohort. + - `LowerPriority`: **Classic Preemption** if the pending Workload + fits within the nominal quota of its ClusterQueue, only preempt + Workloads in the cohort that have lower priority than the pending + Workload. **Fair Sharing** only preempt Workloads in the cohort that + have lower priority than the pending Workload and that satisfy the + fair sharing preemptionStategies. + - `Any`: **Classic Preemption** if the pending Workload fits within + the nominal quota of its ClusterQueue, preempt any Workload in the + cohort, irrespective of priority. **Fair Sharing** preempt Workloads + in the cohort that satisfy the fair sharing preemptionStrategies. + enum: + - Never + - LowerPriority + - Any + type: string + withinClusterQueue: + default: Never + description: |- + withinClusterQueue determines whether a pending Workload that doesn't fit + within the nominal quota for its ClusterQueue, can preempt active Workloads in + the ClusterQueue. The possible values are: + + - `Never` (default): do not preempt Workloads in the ClusterQueue. + - `LowerPriority`: only preempt Workloads in the ClusterQueue that have + lower priority than the pending Workload. + - `LowerOrNewerEqualPriority`: only preempt Workloads in the ClusterQueue that + either have a lower priority than the pending workload or equal priority + and are newer than the pending workload. + enum: + - Never + - LowerPriority + - LowerOrNewerEqualPriority + type: string + type: object + x-kubernetes-validations: + - message: reclaimWithinCohort=Never and borrowWithinCohort.Policy!=Never + rule: '!(self.reclaimWithinCohort == ''Never'' && has(self.borrowWithinCohort) + && self.borrowWithinCohort.policy != ''Never'')' + queueingStrategy: + default: BestEffortFIFO + description: |- + QueueingStrategy indicates the queueing strategy of the workloads + across the queues in this ClusterQueue. + Current Supported Strategies: + + - StrictFIFO: workloads are ordered strictly by creation time. + Older workloads that can't be admitted will block admitting newer + workloads even if they fit available quota. + - BestEffortFIFO: workloads are ordered by creation time, + however older workloads that can't be admitted will not block + admitting newer workloads that fit existing quota. + enum: + - StrictFIFO + - BestEffortFIFO + type: string + resourceGroups: + description: |- + resourceGroups describes groups of resources. + Each resource group defines the list of resources and a list of flavors + that provide quotas for these resources. + Each resource and each flavor can only form part of one resource group. + resourceGroups can be up to 16. + items: + properties: + coveredResources: + description: |- + coveredResources is the list of resources covered by the flavors in this + group. + Examples: cpu, memory, vendor.com/gpu. + The list cannot be empty and it can contain up to 16 resources. + items: + description: ResourceName is the name identifying various + resources in a ResourceList. + type: string + maxItems: 16 + minItems: 1 + type: array + flavors: + description: |- + flavors is the list of flavors that provide the resources of this group. + Typically, different flavors represent different hardware models + (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot + cpus). + Each flavor MUST list all the resources listed for this group in the same + order as the .resources field. + The list cannot be empty and it can contain up to 16 flavors. + items: + properties: + name: + description: |- + name of this flavor. The name should match the .metadata.name of a + ResourceFlavor. If a matching ResourceFlavor does not exist, the + ClusterQueue will have an Active condition set to False. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: |- + resources is the list of quotas for this flavor per resource. + There could be up to 16 resources. + items: + properties: + borrowingLimit: + anyOf: + - type: integer + - type: string + description: |- + borrowingLimit is the maximum amount of quota for the [flavor, resource] + combination that this ClusterQueue is allowed to borrow from the unused + quota of other ClusterQueues in the same cohort. + In total, at a given time, Workloads in a ClusterQueue can consume a + quantity of quota equal to nominalQuota+borrowingLimit, assuming the other + ClusterQueues in the cohort have enough unused quota. + If null, it means that there is no borrowing limit. + If not null, it must be non-negative. + borrowingLimit must be null if spec.cohort is empty. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + lendingLimit: + anyOf: + - type: integer + - type: string + description: |- + lendingLimit is the maximum amount of unused quota for the [flavor, resource] + combination that this ClusterQueue can lend to other ClusterQueues in the same cohort. + In total, at a given time, ClusterQueue reserves for its exclusive use + a quantity of quota equals to nominalQuota - lendingLimit. + If null, it means that there is no lending limit, meaning that + all the nominalQuota can be borrowed by other clusterQueues in the cohort. + If not null, it must be non-negative. + lendingLimit must be null if spec.cohort is empty. + This field is in beta stage and is enabled by default. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of this resource. + type: string + nominalQuota: + anyOf: + - type: integer + - type: string + description: |- + nominalQuota is the quantity of this resource that is available for + Workloads admitted by this ClusterQueue at a point in time. + The nominalQuota must be non-negative. + nominalQuota should represent the resources in the cluster available for + running jobs (after discounting resources consumed by system components + and pods not managed by kueue). In an autoscaled cluster, nominalQuota + should account for resources that can be provided by a component such as + Kubernetes cluster-autoscaler. + + If the ClusterQueue belongs to a cohort, the sum of the quotas for each + (flavor, resource) combination defines the maximum quantity that can be + allocated by a ClusterQueue in the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + - nominalQuota + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - coveredResources + - flavors + type: object + x-kubernetes-validations: + - message: flavors must have the same number of resources as the + coveredResources + rule: self.flavors.all(x, size(x.resources) == size(self.coveredResources)) + maxItems: 16 + type: array + x-kubernetes-list-type: atomic + stopPolicy: + default: None + description: |- + stopPolicy - if set to a value different from None, the ClusterQueue is considered Inactive, no new reservation being + made. + + Depending on its value, its associated workloads will: + + - None - Workloads are admitted + - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation. + - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation. + enum: + - None + - Hold + - HoldAndDrain + type: string + type: object + x-kubernetes-validations: + - message: borrowingLimit must be nil when cohort is empty + rule: '!has(self.cohort) && has(self.resourceGroups) ? self.resourceGroups.all(rg, + rg.flavors.all(f, f.resources.all(r, !has(r.borrowingLimit)))) : true' + status: + description: ClusterQueueStatus defines the observed state of ClusterQueue + properties: + admittedWorkloads: + description: |- + admittedWorkloads is the number of workloads currently admitted to this + clusterQueue and haven't finished yet. + format: int32 + type: integer + conditions: + description: |- + conditions hold the latest available observations of the ClusterQueue + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + fairSharing: + description: FairSharing contains the information about the current + status of fair sharing. + properties: + weightedShare: + description: |- + WeightedShare represent the maximum of the ratios of usage above nominal + quota to the lendable resources in the cohort, among all the resources + provided by the ClusterQueue, and divided by the weight. + If zero, it means that the usage of the ClusterQueue is below the nominal quota. + If the ClusterQueue has a weight of zero, this will return 9223372036854775807, + the maximum possible share value. + format: int64 + type: integer + required: + - weightedShare + type: object + flavorsReservation: + description: |- + flavorsReservation are the reserved quotas, by flavor, currently in use by the + workloads assigned to this ClusterQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: resources lists the quota usage for the resources + in this flavor. + items: + properties: + borrowed: + anyOf: + - type: integer + - type: string + description: |- + Borrowed is quantity of quota that is borrowed from the cohort. In other + words, it's the used quota that is over the nominalQuota. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of the resource + type: string + total: + anyOf: + - type: integer + - type: string + description: |- + total is the total quantity of used quota, including the amount borrowed + from the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + flavorsUsage: + description: |- + flavorsUsage are the used quotas, by flavor, currently in use by the + workloads admitted in this ClusterQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: resources lists the quota usage for the resources + in this flavor. + items: + properties: + borrowed: + anyOf: + - type: integer + - type: string + description: |- + Borrowed is quantity of quota that is borrowed from the cohort. In other + words, it's the used quota that is over the nominalQuota. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of the resource + type: string + total: + anyOf: + - type: integer + - type: string + description: |- + total is the total quantity of used quota, including the amount borrowed + from the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + pendingWorkloads: + description: |- + pendingWorkloads is the number of workloads currently waiting to be + admitted to this clusterQueue. + format: int32 + type: integer + pendingWorkloadsStatus: + description: |- + PendingWorkloadsStatus contains the information exposed about the current + status of the pending workloads in the cluster queue. + Deprecated: This field will be removed on v1beta2, use VisibilityOnDemand + (https://kueue.sigs.k8s.io/docs/tasks/manage/monitor_pending_workloads/pending_workloads_on_demand/) + instead. + properties: + clusterQueuePendingWorkload: + description: Head contains the list of top pending workloads. + items: + description: |- + ClusterQueuePendingWorkload contains the information identifying a pending workload + in the cluster queue. + properties: + name: + description: Name indicates the name of the pending workload. + type: string + namespace: + description: Namespace indicates the name of the pending + workload. + type: string + required: + - name + - namespace + type: object + type: array + x-kubernetes-list-type: atomic + lastChangeTime: + description: LastChangeTime indicates the time of the last change + of the structure. + format: date-time + type: string + required: + - lastChangeTime + type: object + reservingWorkloads: + description: |- + reservingWorkloads is the number of workloads currently reserving quota in this + clusterQueue. + format: int32 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: cohorts.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: Cohort + listKind: CohortList + plural: cohorts + singular: cohort + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + Cohort is the Schema for the cohorts API. Using Hierarchical + Cohorts (any Cohort which has a parent) with Fair Sharing + results in undefined behavior in 0.9 + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: CohortSpec defines the desired state of Cohort + properties: + parent: + description: |- + Parent references the name of the Cohort's parent, if + any. It satisfies one of three cases: + 1) Unset. This Cohort is the root of its Cohort tree. + 2) References a non-existent Cohort. We use default Cohort (no borrowing/lending limits). + 3) References an existent Cohort. + + If a cycle is created, we disable all members of the + Cohort, including ClusterQueues, until the cycle is + removed. We prevent further admission while the cycle + exists. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resourceGroups: + description: |- + ResourceGroups describes groupings of Resources and + Flavors. Each ResourceGroup defines a list of Resources + and a list of Flavors which provide quotas for these + Resources. Each Resource and each Flavor may only form part + of one ResourceGroup. There may be up to 16 ResourceGroups + within a Cohort. + + BorrowingLimit limits how much members of this Cohort + subtree can borrow from the parent subtree. + + LendingLimit limits how much members of this Cohort subtree + can lend to the parent subtree. + + Borrowing and Lending limits must only be set when the + Cohort has a parent. Otherwise, the Cohort create/update + will be rejected by the webhook. + items: + properties: + coveredResources: + description: |- + coveredResources is the list of resources covered by the flavors in this + group. + Examples: cpu, memory, vendor.com/gpu. + The list cannot be empty and it can contain up to 16 resources. + items: + description: ResourceName is the name identifying various + resources in a ResourceList. + type: string + maxItems: 16 + minItems: 1 + type: array + flavors: + description: |- + flavors is the list of flavors that provide the resources of this group. + Typically, different flavors represent different hardware models + (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot + cpus). + Each flavor MUST list all the resources listed for this group in the same + order as the .resources field. + The list cannot be empty and it can contain up to 16 flavors. + items: + properties: + name: + description: |- + name of this flavor. The name should match the .metadata.name of a + ResourceFlavor. If a matching ResourceFlavor does not exist, the + ClusterQueue will have an Active condition set to False. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: |- + resources is the list of quotas for this flavor per resource. + There could be up to 16 resources. + items: + properties: + borrowingLimit: + anyOf: + - type: integer + - type: string + description: |- + borrowingLimit is the maximum amount of quota for the [flavor, resource] + combination that this ClusterQueue is allowed to borrow from the unused + quota of other ClusterQueues in the same cohort. + In total, at a given time, Workloads in a ClusterQueue can consume a + quantity of quota equal to nominalQuota+borrowingLimit, assuming the other + ClusterQueues in the cohort have enough unused quota. + If null, it means that there is no borrowing limit. + If not null, it must be non-negative. + borrowingLimit must be null if spec.cohort is empty. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + lendingLimit: + anyOf: + - type: integer + - type: string + description: |- + lendingLimit is the maximum amount of unused quota for the [flavor, resource] + combination that this ClusterQueue can lend to other ClusterQueues in the same cohort. + In total, at a given time, ClusterQueue reserves for its exclusive use + a quantity of quota equals to nominalQuota - lendingLimit. + If null, it means that there is no lending limit, meaning that + all the nominalQuota can be borrowed by other clusterQueues in the cohort. + If not null, it must be non-negative. + lendingLimit must be null if spec.cohort is empty. + This field is in beta stage and is enabled by default. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + name: + description: name of this resource. + type: string + nominalQuota: + anyOf: + - type: integer + - type: string + description: |- + nominalQuota is the quantity of this resource that is available for + Workloads admitted by this ClusterQueue at a point in time. + The nominalQuota must be non-negative. + nominalQuota should represent the resources in the cluster available for + running jobs (after discounting resources consumed by system components + and pods not managed by kueue). In an autoscaled cluster, nominalQuota + should account for resources that can be provided by a component such as + Kubernetes cluster-autoscaler. + + If the ClusterQueue belongs to a cohort, the sum of the quotas for each + (flavor, resource) combination defines the maximum quantity that can be + allocated by a ClusterQueue in the cohort. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + - nominalQuota + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - coveredResources + - flavors + type: object + x-kubernetes-validations: + - message: flavors must have the same number of resources as the + coveredResources + rule: self.flavors.all(x, size(x.resources) == size(self.coveredResources)) + maxItems: 16 + type: array + x-kubernetes-list-type: atomic + type: object + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: localqueues.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: LocalQueue + listKind: LocalQueueList + plural: localqueues + shortNames: + - queue + - queues + - lq + singular: localqueue + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Backing ClusterQueue + jsonPath: .spec.clusterQueue + name: ClusterQueue + type: string + - description: Number of pending workloads + jsonPath: .status.pendingWorkloads + name: Pending Workloads + type: integer + - description: Number of admitted workloads that haven't finished yet. + jsonPath: .status.admittedWorkloads + name: Admitted Workloads + type: integer + name: v1beta1 + schema: + openAPIV3Schema: + description: LocalQueue is the Schema for the localQueues API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: LocalQueueSpec defines the desired state of LocalQueue + properties: + clusterQueue: + description: clusterQueue is a reference to a clusterQueue that backs + this localQueue. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + x-kubernetes-validations: + - message: field is immutable + rule: self == oldSelf + stopPolicy: + default: None + description: |- + stopPolicy - if set to a value different from None, the LocalQueue is considered Inactive, + no new reservation being made. + + Depending on its value, its associated workloads will: + + - None - Workloads are admitted + - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation. + - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation. + enum: + - None + - Hold + - HoldAndDrain + type: string + type: object + status: + description: LocalQueueStatus defines the observed state of LocalQueue + properties: + admittedWorkloads: + description: |- + admittedWorkloads is the number of workloads in this LocalQueue + admitted to a ClusterQueue and that haven't finished yet. + format: int32 + type: integer + conditions: + description: |- + Conditions hold the latest available observations of the LocalQueue + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + flavorUsage: + description: |- + flavorsUsage are the used quotas, by flavor currently in use by the + workloads assigned to this LocalQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: resources lists the quota usage for the resources + in this flavor. + items: + properties: + name: + description: name of the resource. + type: string + total: + anyOf: + - type: integer + - type: string + description: total is the total quantity of used quota. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + flavors: + description: flavors lists all currently available ResourceFlavors + in specified ClusterQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + nodeLabels: + additionalProperties: + type: string + description: |- + nodeLabels are labels that associate the ResourceFlavor with Nodes that + have the same labels. + maxProperties: 8 + type: object + x-kubernetes-map-type: atomic + nodeTaints: + description: |- + nodeTaints are taints that the nodes associated with this ResourceFlavor + have. + items: + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. + properties: + effect: + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to + a node. + type: string + timeAdded: + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint + key. + type: string + required: + - effect + - key + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + resources: + description: resources used in the flavor. + items: + description: ResourceName is the name identifying various + resources in a ResourceList. + type: string + maxItems: 16 + type: array + x-kubernetes-list-type: set + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + flavorsReservation: + description: |- + flavorsReservation are the reserved quotas, by flavor currently in use by the + workloads assigned to this LocalQueue. + items: + properties: + name: + description: name of the flavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + resources: + description: resources lists the quota usage for the resources + in this flavor. + items: + properties: + name: + description: name of the resource. + type: string + total: + anyOf: + - type: integer + - type: string + description: total is the total quantity of used quota. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - name + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - name + - resources + type: object + maxItems: 16 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + pendingWorkloads: + description: PendingWorkloads is the number of Workloads in the LocalQueue + not yet admitted to a ClusterQueue + format: int32 + type: integer + reservingWorkloads: + description: |- + reservingWorkloads is the number of workloads in this LocalQueue + reserving quota in a ClusterQueue and that haven't finished yet. + format: int32 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: multikueueclusters.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: MultiKueueCluster + listKind: MultiKueueClusterList + plural: multikueueclusters + singular: multikueuecluster + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: MultiKueueCluster is the Schema for the multikueue API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + kubeConfig: + description: Information how to connect to the cluster. + properties: + location: + description: |- + Location of the KubeConfig. + + If LocationType is Secret then Location is the name of the secret inside the namespace in + which the kueue controller manager is running. The config should be stored in the "kubeconfig" key. + type: string + locationType: + default: Secret + description: Type of the KubeConfig location. + enum: + - Secret + - Path + type: string + required: + - location + - locationType + type: object + required: + - kubeConfig + type: object + status: + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: multikueueconfigs.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: MultiKueueConfig + listKind: MultiKueueConfigList + plural: multikueueconfigs + singular: multikueueconfig + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: MultiKueueConfig is the Schema for the multikueue API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: MultiKueueConfigSpec defines the desired state of MultiKueueConfig + properties: + clusters: + description: List of MultiKueueClusters names where the workloads + from the ClusterQueue should be distributed. + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + required: + - clusters + type: object + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: provisioningrequestconfigs.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: ProvisioningRequestConfig + listKind: ProvisioningRequestConfigList + plural: provisioningrequestconfigs + singular: provisioningrequestconfig + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: ProvisioningRequestConfig is the Schema for the provisioningrequestconfig + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ProvisioningRequestConfigSpec defines the desired state of + ProvisioningRequestConfig + properties: + managedResources: + description: |- + managedResources contains the list of resources managed by the autoscaling. + + If empty, all resources are considered managed. + + If not empty, the ProvisioningRequest will contain only the podsets that are + requesting at least one of them. + + If none of the workloads podsets is requesting at least a managed resource, + the workload is considered ready. + items: + description: ResourceName is the name identifying various resources + in a ResourceList. + type: string + maxItems: 100 + type: array + x-kubernetes-list-type: set + parameters: + additionalProperties: + description: Parameter is limited to 255 characters. + maxLength: 255 + type: string + description: Parameters contains all other parameters classes may + require. + maxProperties: 100 + type: object + provisioningClassName: + description: |- + ProvisioningClassName describes the different modes of provisioning the resources. + Check autoscaling.x-k8s.io ProvisioningRequestSpec.ProvisioningClassName for details. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + retryStrategy: + default: + backoffBaseSeconds: 60 + backoffLimitCount: 3 + backoffMaxSeconds: 1800 + description: |- + retryStrategy defines strategy for retrying ProvisioningRequest. + If null, then the default configuration is applied with the following parameter values: + backoffLimitCount: 3 + backoffBaseSeconds: 60 - 1 min + backoffMaxSeconds: 1800 - 30 mins + + To switch off retry mechanism + set retryStrategy.backoffLimitCount to 0. + properties: + backoffBaseSeconds: + default: 60 + description: |- + BackoffBaseSeconds defines the base for the exponential backoff for + re-queuing an evicted workload. + + Defaults to 60. + format: int32 + type: integer + backoffLimitCount: + default: 3 + description: |- + BackoffLimitCount defines the maximum number of re-queuing retries. + Once the number is reached, the workload is deactivated (`.spec.activate`=`false`). + + Every backoff duration is about "b*2^(n-1)+Rand" where: + - "b" represents the base set by "BackoffBaseSeconds" parameter, + - "n" represents the "workloadStatus.requeueState.count", + - "Rand" represents the random jitter. + During this time, the workload is taken as an inadmissible and + other workloads will have a chance to be admitted. + By default, the consecutive requeue delays are around: (60s, 120s, 240s, ...). + + Defaults to 3. + format: int32 + type: integer + backoffMaxSeconds: + default: 1800 + description: |- + BackoffMaxSeconds defines the maximum backoff time to re-queue an evicted workload. + + Defaults to 1800. + format: int32 + type: integer + type: object + required: + - provisioningClassName + type: object + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: resourceflavors.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: ResourceFlavor + listKind: ResourceFlavorList + plural: resourceflavors + shortNames: + - flavor + - flavors + - rf + singular: resourceflavor + scope: Cluster + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: ResourceFlavor is the Schema for the resourceflavors API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ResourceFlavorSpec defines the desired state of the ResourceFlavor + properties: + nodeLabels: + additionalProperties: + type: string + description: |- + nodeLabels are labels that associate the ResourceFlavor with Nodes that + have the same labels. + When a Workload is admitted, its podsets can only get assigned + ResourceFlavors whose nodeLabels match the nodeSelector and nodeAffinity + fields. + Once a ResourceFlavor is assigned to a podSet, the ResourceFlavor's + nodeLabels should be injected into the pods of the Workload by the + controller that integrates with the Workload object. + + nodeLabels can be up to 8 elements. + maxProperties: 8 + type: object + x-kubernetes-map-type: atomic + nodeTaints: + description: |- + nodeTaints are taints that the nodes associated with this ResourceFlavor + have. + Workloads' podsets must have tolerations for these nodeTaints in order to + get assigned this ResourceFlavor during admission. + + An example of a nodeTaint is + cloud.provider.com/preemptible="true":NoSchedule + + nodeTaints can be up to 8 elements. + items: + description: |- + The node this Taint is attached to has the "effect" on + any pod that does not tolerate the Taint. + properties: + effect: + description: |- + Required. The effect of the taint on pods + that do not tolerate the taint. + Valid effects are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Required. The taint key to be applied to a node. + type: string + timeAdded: + description: |- + TimeAdded represents the time at which the taint was added. + It is only written for NoExecute taints. + format: date-time + type: string + value: + description: The taint value corresponding to the taint key. + type: string + required: + - effect + - key + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + x-kubernetes-validations: + - message: 'supported taint effect values: ''NoSchedule'', ''PreferNoSchedule'', + ''NoExecute''' + rule: self.all(x, x.effect in ['NoSchedule', 'PreferNoSchedule', + 'NoExecute']) + tolerations: + description: |- + tolerations are extra tolerations that will be added to the pods admitted in + the quota associated with this resource flavor. + + An example of a toleration is + cloud.provider.com/preemptible="true":NoSchedule + + tolerations can be up to 8 elements. + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + x-kubernetes-validations: + - message: operator must be Exists when 'key' is empty, which means + 'match all values and all keys' + rule: 'self.all(x, !has(x.key) ? x.operator == ''Exists'' : true)' + - message: effect must be 'NoExecute' when 'tolerationSeconds' is + set + rule: 'self.all(x, has(x.tolerationSeconds) ? x.effect == ''NoExecute'' + : true)' + - message: 'supported toleration values: ''Equal''(default), ''Exists''' + rule: self.all(x, !has(x.operator) || x.operator in ['Equal', 'Exists']) + - message: a value must be empty when 'operator' is 'Exists' + rule: 'self.all(x, has(x.operator) && x.operator == ''Exists'' ? + !has(x.value) : true)' + - message: 'supported taint effect values: ''NoSchedule'', ''PreferNoSchedule'', + ''NoExecute''' + rule: self.all(x, !has(x.effect) || x.effect in ['NoSchedule', 'PreferNoSchedule', + 'NoExecute']) + topologyName: + description: |- + topologyName indicates topology for the TAS ResourceFlavor. + When specified, it enables scraping of the topology information from the + nodes matching to the Resource Flavor node labels. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + type: object + x-kubernetes-validations: + - message: at least one nodeLabel is required when topology is set + rule: '!has(self.topologyName) || self.nodeLabels.size() >= 1' + - message: resourceFlavorSpec are immutable when topologyName is set + rule: '!has(oldSelf.topologyName) || self == oldSelf' + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: topologies.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: Topology + listKind: TopologyList + plural: topologies + singular: topology + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: Topology is the Schema for the topology API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: TopologySpec defines the desired state of Topology + properties: + levels: + description: levels define the levels of topology. + items: + description: TopologyLevel defines the desired state of TopologyLevel + properties: + nodeLabel: + description: |- + nodeLabel indicates the name of the node label for a specific topology + level. + + Examples: + - cloud.provider.com/topology-block + - cloud.provider.com/topology-rack + maxLength: 316 + minLength: 1 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - nodeLabel + type: object + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + x-kubernetes-validations: + - message: field is immutable + rule: self == oldSelf + - message: must be unique + rule: size(self.filter(i, size(self.filter(j, j == i)) > 1)) == + 0 + - message: the kubernetes.io/hostname label can only be used at the + lowest level of topology + rule: size(self.filter(i, i.nodeLabel == 'kubernetes.io/hostname')) + == 0 || self[size(self) - 1].nodeLabel == 'kubernetes.io/hostname' + required: + - levels + type: object + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: workloadpriorityclasses.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: WorkloadPriorityClass + listKind: WorkloadPriorityClassList + plural: workloadpriorityclasses + singular: workloadpriorityclass + scope: Cluster + versions: + - additionalPrinterColumns: + - description: Value of workloadPriorityClass's Priority + jsonPath: .value + name: Value + type: integer + name: v1beta1 + schema: + openAPIV3Schema: + description: WorkloadPriorityClass is the Schema for the workloadPriorityClass + API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + description: + description: |- + description is an arbitrary string that usually provides guidelines on + when this workloadPriorityClass should be used. + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + value: + description: |- + value represents the integer value of this workloadPriorityClass. This is the actual priority that workloads + receive when jobs have the name of this class in their workloadPriorityClass label. + Changing the value of workloadPriorityClass doesn't affect the priority of workloads that were already created. + format: int32 + type: integer + required: + - value + type: object + served: true + storage: true + subresources: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + controller-gen.kubebuilder.io/version: v0.16.5 + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: workloads.kueue.x-k8s.io +spec: + group: kueue.x-k8s.io + names: + kind: Workload + listKind: WorkloadList + plural: workloads + shortNames: + - wl + singular: workload + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: Name of the queue this workload was submitted to + jsonPath: .spec.queueName + name: Queue + type: string + - description: Name of the ClusterQueue where the workload is reserving quota + jsonPath: .status.admission.clusterQueue + name: Reserved in + type: string + - description: Admission status + jsonPath: .status.conditions[?(@.type=='Admitted')].status + name: Admitted + type: string + - description: Workload finished + jsonPath: .status.conditions[?(@.type=='Finished')].status + name: Finished + type: string + - description: Time this workload was created + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1beta1 + schema: + openAPIV3Schema: + description: Workload is the Schema for the workloads API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: WorkloadSpec defines the desired state of Workload + properties: + active: + default: true + description: |- + Active determines if a workload can be admitted into a queue. + Changing active from true to false will evict any running workloads. + Possible values are: + + - false: indicates that a workload should never be admitted and evicts running workloads + - true: indicates that a workload can be evaluated for admission into it's respective queue. + + Defaults to true + type: boolean + maximumExecutionTimeSeconds: + description: |- + maximumExecutionTimeSeconds if provided, determines the maximum time, in seconds, + the workload can be admitted before it's automatically deactivated. + + If unspecified, no execution time limit is enforced on the Workload. + format: int32 + minimum: 1 + type: integer + podSets: + description: |- + podSets is a list of sets of homogeneous pods, each described by a Pod spec + and a count. + There must be at least one element and at most 8. + podSets cannot be changed. + items: + properties: + count: + default: 1 + description: count is the number of pods for the spec. + format: int32 + minimum: 0 + type: integer + minCount: + description: |- + minCount is the minimum number of pods for the spec acceptable + if the workload supports partial admission. + + If not provided, partial admission for the current PodSet is not + enabled. + + Only one podSet within the workload can use this. + + This is an alpha field and requires enabling PartialAdmission feature gate. + format: int32 + minimum: 1 + type: integer + name: + default: main + description: name is the PodSet name. + maxLength: 63 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + template: + description: |- + template is the Pod template. + + The only allowed fields in template.metadata are labels and annotations. + + If requests are omitted for a container or initContainer, + they default to the limits if they are explicitly specified for the + container or initContainer. + + During admission, the rules in nodeSelector and + nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution that match + the keys in the nodeLabels from the ResourceFlavors considered for this + Workload are used to filter the ResourceFlavors that can be assigned to + this podSet. + properties: + metadata: + description: |- + Standard object's metadata. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: |- + Specification of the desired behavior of the pod. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status + properties: + activeDeadlineSeconds: + description: |- + Optional duration in seconds the pod may be active on the node relative to + StartTime before the system will actively try to mark it failed and kill associated containers. + Value must be a positive integer. + format: int64 + type: integer + affinity: + description: If specified, the pod's scheduling constraints + properties: + nodeAffinity: + description: Describes node affinity scheduling + rules for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, + in the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added + per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity + term, associated with the corresponding + weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same + node, zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added + per-node to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity + term, associated with the corresponding + weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + This is a beta field and requires enabling MatchLabelKeysInPodAffinity feature gate (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a + list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + automountServiceAccountToken: + description: AutomountServiceAccountToken indicates + whether a service account token should be automatically + mounted. + type: boolean + containers: + description: |- + List of containers belonging to the pod. + Containers cannot currently be added or removed. + There must be at least one container in a Pod. + Cannot be updated. + items: + description: A single application container that you + want to run within a pod. + properties: + args: + description: |- + Arguments to the entrypoint. + The container image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The container image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment + variable's value. Cannot be used if value + is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env + vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace + properties: + key: + description: The key of the secret + to select from. Must be a valid + secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be + a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: |- + Actions that the management system should take in response to container lifecycle events. + Cannot be updated. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: |- + List of ports to expose from the container. Not specifying a port here + DOES NOT prevent that port from being exposed. Any port which is + listening on the default "0.0.0.0" address inside a container will be + accessible from the network. + Modifying this array with strategic merge patch may corrupt the data. + For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network + port in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents + resource resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Compute Resources required by this container. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + RestartPolicy defines the restart behavior of individual containers in a pod. + This field may only be set for init containers, and the only allowed value is "Always". + For non-init containers or when this field is not specified, + the restart behavior is defined by the Pod's restart policy and the container type. + Setting the RestartPolicy as "Always" for the init container will have the following effect: + this init container will be continually restarted on + exit until all regular containers have terminated. Once all regular + containers have completed, all init containers with restartPolicy "Always" + will be shut down. This lifecycle differs from normal init containers and + is often referred to as a "sidecar" container. Although this init + container still starts in the init container sequence, it does not wait + for the container to complete before proceeding to the next init + container. Instead, the next init container starts immediately after this + init container is started, or after any startupProbe has successfully + completed. + type: string + securityContext: + description: |- + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label + that applies to the container. + type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is + the name of the GMSA credential spec + to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + This cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. + items: + description: volumeDevice describes a mapping + of a raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside + of the container that the device will + be mapped to. + type: string + name: + description: name must match the name of + a persistentVolumeClaim in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of + a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + If ReadOnly is false, this field has no meaning and must be unspecified. + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + dnsConfig: + description: |- + Specifies the DNS parameters of a pod. + Parameters specified here will be merged to the generated DNS + configuration based on DNSPolicy. + properties: + nameservers: + description: |- + A list of DNS name server IP addresses. + This will be appended to the base nameservers generated from DNSPolicy. + Duplicated nameservers will be removed. + items: + type: string + type: array + x-kubernetes-list-type: atomic + options: + description: |- + A list of DNS resolver options. + This will be merged with the base options generated from DNSPolicy. + Duplicated entries will be removed. Resolution options given in Options + will override those that appear in the base DNSPolicy. + items: + description: PodDNSConfigOption defines DNS resolver + options of a pod. + properties: + name: + description: Required. + type: string + value: + type: string + type: object + type: array + x-kubernetes-list-type: atomic + searches: + description: |- + A list of DNS search domains for host-name lookup. + This will be appended to the base search paths generated from DNSPolicy. + Duplicated search paths will be removed. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + dnsPolicy: + description: |- + Set DNS policy for the pod. + Defaults to "ClusterFirst". + Valid values are 'ClusterFirstWithHostNet', 'ClusterFirst', 'Default' or 'None'. + DNS parameters given in DNSConfig will be merged with the policy selected with DNSPolicy. + To have DNS options set along with hostNetwork, you have to specify DNS policy + explicitly to 'ClusterFirstWithHostNet'. + type: string + enableServiceLinks: + description: |- + EnableServiceLinks indicates whether information about services should be injected into pod's + environment variables, matching the syntax of Docker links. + Optional: Defaults to true. + type: boolean + ephemeralContainers: + description: |- + List of ephemeral containers run in this pod. Ephemeral containers may be run in an existing + pod to perform user-initiated actions such as debugging. This list cannot be specified when + creating a pod, and it cannot be modified by updating the pod spec. In order to add an + ephemeral container to an existing pod, use the pod's ephemeralcontainers subresource. + items: + description: |- + An EphemeralContainer is a temporary container that you may add to an existing Pod for + user-initiated activities such as debugging. Ephemeral containers have no resource or + scheduling guarantees, and they will not be restarted when they exit or when a Pod is + removed or restarted. The kubelet may evict a Pod if an ephemeral container causes the + Pod to exceed its resource allocation. + + To add an ephemeral container, use the ephemeralcontainers subresource of an existing + Pod. Ephemeral containers may not be removed or restarted. + properties: + args: + description: |- + Arguments to the entrypoint. + The image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment + variable's value. Cannot be used if value + is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env + vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace + properties: + key: + description: The key of the secret + to select from. Must be a valid + secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be + a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: Lifecycle is not allowed for ephemeral + containers. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: Probes are not allowed for ephemeral + containers. + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the ephemeral container specified as a DNS_LABEL. + This name must be unique among all containers, init containers and ephemeral containers. + type: string + ports: + description: Ports are not allowed for ephemeral + containers. + items: + description: ContainerPort represents a network + port in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: Probes are not allowed for ephemeral + containers. + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents + resource resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources + already allocated to the pod. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + Restart policy for the container to manage the restart behavior of each + container within a pod. + This may only be set for init containers. You cannot set this field on + ephemeral containers. + type: string + securityContext: + description: |- + Optional: SecurityContext defines the security options the ephemeral container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label + that applies to the container. + type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is + the name of the GMSA credential spec + to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: Probes are not allowed for ephemeral + containers. + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + targetContainerName: + description: |- + If set, the name of the container from PodSpec that this ephemeral container targets. + The ephemeral container will be run in the namespaces (IPC, PID, etc) of this container. + If not set then the ephemeral container uses the namespaces configured in the Pod spec. + + The container runtime must implement support for this feature. If the runtime does not + support namespace targeting then the result of setting this field is undefined. + type: string + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. + items: + description: volumeDevice describes a mapping + of a raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside + of the container that the device will + be mapped to. + type: string + name: + description: name must match the name of + a persistentVolumeClaim in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of + a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + If ReadOnly is false, this field has no meaning and must be unspecified. + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + hostAliases: + description: |- + HostAliases is an optional list of hosts and IPs that will be injected into the pod's hosts + file if specified. + items: + description: |- + HostAlias holds the mapping between IP and hostnames that will be injected as an entry in the + pod's hosts file. + properties: + hostnames: + description: Hostnames for the above IP address. + items: + type: string + type: array + x-kubernetes-list-type: atomic + ip: + description: IP address of the host file entry. + type: string + required: + - ip + type: object + type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map + hostIPC: + description: |- + Use the host's ipc namespace. + Optional: Default to false. + type: boolean + hostNetwork: + description: |- + Host networking requested for this pod. Use the host's network namespace. + If this option is set, the ports that will be used must be specified. + Default to false. + type: boolean + hostPID: + description: |- + Use the host's pid namespace. + Optional: Default to false. + type: boolean + hostUsers: + description: |- + Use the host's user namespace. + Optional: Default to true. + If set to true or not present, the pod will be run in the host user namespace, useful + for when the pod needs a feature only available to the host user namespace, such as + loading a kernel module with CAP_SYS_MODULE. + When set to false, a new userns is created for the pod. Setting false is useful for + mitigating container breakout vulnerabilities even allowing users to run their + containers as root without actually having root privileges on the host. + This field is alpha-level and is only honored by servers that enable the UserNamespacesSupport feature. + type: boolean + hostname: + description: |- + Specifies the hostname of the Pod + If not specified, the pod's hostname will be set to a system-defined value. + type: string + imagePullSecrets: + description: |- + ImagePullSecrets is an optional list of references to secrets in the same namespace to use for pulling any of the images used by this PodSpec. + If specified, these secrets will be passed to individual puller implementations for them to use. + More info: https://kubernetes.io/docs/concepts/containers/images#specifying-imagepullsecrets-on-a-pod + items: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + initContainers: + description: |- + List of initialization containers belonging to the pod. + Init containers are executed in order prior to containers being started. If any + init container fails, the pod is considered to have failed and is handled according + to its restartPolicy. The name for an init container or normal container must be + unique among all containers. + Init containers may not have Lifecycle actions, Readiness probes, Liveness probes, or Startup probes. + The resourceRequirements of an init container are taken into account during scheduling + by finding the highest request/limit for each resource type, and then using the max of + of that value or the sum of the normal containers. Limits are applied to init containers + in a similar fashion. + Init containers cannot currently be added or removed. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ + items: + description: A single application container that you + want to run within a pod. + properties: + args: + description: |- + Arguments to the entrypoint. + The container image's CMD is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + description: |- + Entrypoint array. Not executed within a shell. + The container image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's environment. If a variable + cannot be resolved, the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. "$$(VAR_NAME)" will + produce the string literal "$(VAR_NAME)". Escaped references will never be expanded, regardless + of whether the variable exists or not. Cannot be updated. + More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: |- + List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment + variable present in a Container. + properties: + name: + description: Name of the environment variable. + Must be a C_IDENTIFIER. + type: string + value: + description: |- + Variable references $(VAR_NAME) are expanded + using the previously defined environment variables in the container and + any service environment variables. If a variable cannot be resolved, + the reference in the input string will be unchanged. Double $$ are reduced + to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e. + "$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)". + Escaped references will never be expanded, regardless of whether the variable + exists or not. + Defaults to "". + type: string + valueFrom: + description: Source for the environment + variable's value. Cannot be used if value + is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + ConfigMap or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + description: |- + Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['']`, `metadata.annotations['']`, + spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs. + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env + vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + description: Selects a key of a secret + in the pod's namespace + properties: + key: + description: The key of the secret + to select from. Must be a valid + secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the + Secret or its key must be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + description: |- + List of sources to populate environment variables in the container. + The keys defined within a source must be a C_IDENTIFIER. All invalid keys + will be reported as an event when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take precedence. + Values defined by an Env with a duplicate key will take precedence. + Cannot be updated. + items: + description: EnvFromSource represents the source + of a set of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the ConfigMap + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + description: An optional identifier to prepend + to each key in the ConfigMap. Must be + a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + description: |- + Container image name. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + imagePullPolicy: + description: |- + Image pull policy. + One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/containers/images#updating-images + type: string + lifecycle: + description: |- + Actions that the management system should take in response to container lifecycle events. + Cannot be updated. + properties: + postStart: + description: |- + PostStart is called immediately after a container is created. If the handler fails, + the container is terminated and restarted according to its restart policy. + Other management of the container blocks until the hook completes. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + description: |- + PreStop is called immediately before a container is terminated due to an + API request or management event such as liveness/startup probe failure, + preemption, resource contention, etc. The handler is not called if the + container crashes or exits. The Pod's termination grace period countdown begins before the + PreStop hook is executed. Regardless of the outcome of the handler, the + container will eventually terminate within the Pod's termination grace + period (unless delayed by finalizers). Other management of the container blocks until the hook completes + or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks + properties: + exec: + description: Exec specifies the action + to take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + description: HTTPGet specifies the http + request to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set + in the request. HTTP allows repeated + headers. + items: + description: HTTPHeader describes + a custom header to be used in + HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field + value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the + HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + sleep: + description: Sleep represents the duration + that the container should sleep before + being terminated. + properties: + seconds: + description: Seconds is the number + of seconds to sleep. + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + description: |- + Deprecated. TCPSocket is NOT supported as a LifecycleHandler and kept + for the backward compatibility. There are no validation of this field and + lifecycle hooks will fail in runtime when tcp handler is specified. + properties: + host: + description: 'Optional: Host name + to connect to, defaults to the pod + IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + type: object + livenessProbe: + description: |- + Periodic probe of container liveness. + Container will be restarted if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + name: + description: |- + Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: |- + List of ports to expose from the container. Not specifying a port here + DOES NOT prevent that port from being exposed. Any port which is + listening on the default "0.0.0.0" address inside a container will be + accessible from the network. + Modifying this array with strategic merge patch may corrupt the data. + For more information See https://github.com/kubernetes/kubernetes/issues/108255. + Cannot be updated. + items: + description: ContainerPort represents a network + port in a single container. + properties: + containerPort: + description: |- + Number of port to expose on the pod's IP address. + This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external + port to. + type: string + hostPort: + description: |- + Number of port to expose on the host. + If specified, this must be a valid port number, 0 < x < 65536. + If HostNetwork is specified, this must match ContainerPort. + Most containers do not need this. + format: int32 + type: integer + name: + description: |- + If specified, this must be an IANA_SVC_NAME and unique within the pod. Each + named port in a pod must have a unique name. Name for the port that can be + referred to by services. + type: string + protocol: + default: TCP + description: |- + Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: |- + Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe fails. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + resizePolicy: + description: Resources resize policy for the container. + items: + description: ContainerResizePolicy represents + resource resize policy for the container. + properties: + resourceName: + description: |- + Name of the resource to which this resource resize policy applies. + Supported values: cpu, memory. + type: string + restartPolicy: + description: |- + Restart policy to apply when specified resource is resized. + If not specified, it defaults to NotRequired. + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + description: |- + Compute Resources required by this container. + Cannot be updated. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one + entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + restartPolicy: + description: |- + RestartPolicy defines the restart behavior of individual containers in a pod. + This field may only be set for init containers, and the only allowed value is "Always". + For non-init containers or when this field is not specified, + the restart behavior is defined by the Pod's restart policy and the container type. + Setting the RestartPolicy as "Always" for the init container will have the following effect: + this init container will be continually restarted on + exit until all regular containers have terminated. Once all regular + containers have completed, all init containers with restartPolicy "Always" + will be shut down. This lifecycle differs from normal init containers and + is often referred to as a "sidecar" container. Although this init + container still starts in the init container sequence, it does not wait + for the container to complete before proceeding to the next init + container. Instead, the next init container starts immediately after this + init container is started, or after any startupProbe has successfully + completed. + type: string + securityContext: + description: |- + SecurityContext defines the security options the container should be run with. + If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX + capabilities type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label + that applies to the container. + type: string + role: + description: Role is a SELinux role label + that applies to the container. + type: string + type: + description: Type is a SELinux type label + that applies to the container. + type: string + user: + description: User is a SELinux user label + that applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is + the name of the GMSA credential spec + to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + startupProbe: + description: |- + StartupProbe indicates that the Pod has successfully initialized. + If specified, no other probes are executed until this completes successfully. + If this probe fails, the Pod will be restarted, just as if the livenessProbe failed. + This can be used to provide different probe parameters at the beginning of a Pod's lifecycle, + when it might take a long time to load data or warm a cache, than during steady-state operation. + This cannot be updated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + properties: + exec: + description: Exec specifies the action to + take. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies an action involving + a GRPC port. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies the http request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in + the request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a + custom header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies an action + involving a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + stdin: + description: |- + Whether this container should allocate a buffer for stdin in the container runtime. If this + is not set, reads from stdin in the container will always result in EOF. + Default is false. + type: boolean + stdinOnce: + description: |- + Whether the container runtime should close the stdin channel after it has been opened by + a single attach. When stdin is true the stdin stream will remain open across multiple attach + sessions. If stdinOnce is set to true, stdin is opened on container start, is empty until the + first client attaches to stdin, and then remains open and accepts data until the client disconnects, + at which time stdin is closed and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin will never receive an EOF. + Default is false + type: boolean + terminationMessagePath: + description: |- + Optional: Path at which the file to which the container's termination message + will be written is mounted into the container's filesystem. + Message written is intended to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. The total message length across + all containers will be limited to 12kb. + Defaults to /dev/termination-log. + Cannot be updated. + type: string + terminationMessagePolicy: + description: |- + Indicate how the termination message should be populated. File will use the contents of + terminationMessagePath to populate the container status message on both success and failure. + FallbackToLogsOnError will use the last chunk of container log output if the termination + message file is empty and the container exited with an error. + The log output is limited to 2048 bytes or 80 lines, whichever is smaller. + Defaults to File. + Cannot be updated. + type: string + tty: + description: |- + Whether this container should allocate a TTY for itself, also requires 'stdin' to be true. + Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block + devices to be used by the container. + items: + description: volumeDevice describes a mapping + of a raw block device within a container. + properties: + devicePath: + description: devicePath is the path inside + of the container that the device will + be mapped to. + type: string + name: + description: name must match the name of + a persistentVolumeClaim in the pod + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + description: |- + Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting + of a Volume within a container. + properties: + mountPath: + description: |- + Path within the container at which the volume should be mounted. Must + not contain ':'. + type: string + mountPropagation: + description: |- + mountPropagation determines how mounts are propagated from the host + to container and the other way around. + When not set, MountPropagationNone is used. + This field is beta in 1.10. + When RecursiveReadOnly is set to IfPossible or to Enabled, MountPropagation must be None or unspecified + (which defaults to None). + type: string + name: + description: This must match the Name of + a Volume. + type: string + readOnly: + description: |- + Mounted read-only if true, read-write otherwise (false or unspecified). + Defaults to false. + type: boolean + recursiveReadOnly: + description: |- + RecursiveReadOnly specifies whether read-only mounts should be handled + recursively. + + If ReadOnly is false, this field has no meaning and must be unspecified. + + If ReadOnly is true, and this field is set to Disabled, the mount is not made + recursively read-only. If this field is set to IfPossible, the mount is made + recursively read-only, if it is supported by the container runtime. If this + field is set to Enabled, the mount is made recursively read-only if it is + supported by the container runtime, otherwise the pod will not be started and + an error will be generated to indicate the reason. + + If this field is set to IfPossible or Enabled, MountPropagation must be set to + None (or be unspecified, which defaults to None). + + If this field is not specified, it is treated as an equivalent of Disabled. + type: string + subPath: + description: |- + Path within the volume from which the container's volume should be mounted. + Defaults to "" (volume's root). + type: string + subPathExpr: + description: |- + Expanded path within the volume from which the container's volume should be mounted. + Behaves similarly to SubPath but environment variable references $(VAR_NAME) are expanded using the container's environment. + Defaults to "" (volume's root). + SubPathExpr and SubPath are mutually exclusive. + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + description: |- + Container's working directory. + If not specified, the container runtime's default will be used, which + might be configured in the container image. + Cannot be updated. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + nodeName: + description: |- + NodeName indicates in which node this pod is scheduled. + If empty, this pod is a candidate for scheduling by the scheduler defined in schedulerName. + Once this field is set, the kubelet for this node becomes responsible for the lifecycle of this pod. + This field should not be used to express a desire for the pod to be scheduled on a specific node. + https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodename + type: string + nodeSelector: + additionalProperties: + type: string + description: |- + NodeSelector is a selector which must be true for the pod to fit on a node. + Selector which must match a node's labels for the pod to be scheduled on that node. + More info: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ + type: object + x-kubernetes-map-type: atomic + os: + description: |- + Specifies the OS of the containers in the pod. + Some pod and container fields are restricted if this is set. + + If the OS field is set to linux, the following fields must be unset: + -securityContext.windowsOptions + + If the OS field is set to windows, following fields must be unset: + - spec.hostPID + - spec.hostIPC + - spec.hostUsers + - spec.securityContext.appArmorProfile + - spec.securityContext.seLinuxOptions + - spec.securityContext.seccompProfile + - spec.securityContext.fsGroup + - spec.securityContext.fsGroupChangePolicy + - spec.securityContext.sysctls + - spec.shareProcessNamespace + - spec.securityContext.runAsUser + - spec.securityContext.runAsGroup + - spec.securityContext.supplementalGroups + - spec.securityContext.supplementalGroupsPolicy + - spec.containers[*].securityContext.appArmorProfile + - spec.containers[*].securityContext.seLinuxOptions + - spec.containers[*].securityContext.seccompProfile + - spec.containers[*].securityContext.capabilities + - spec.containers[*].securityContext.readOnlyRootFilesystem + - spec.containers[*].securityContext.privileged + - spec.containers[*].securityContext.allowPrivilegeEscalation + - spec.containers[*].securityContext.procMount + - spec.containers[*].securityContext.runAsUser + - spec.containers[*].securityContext.runAsGroup + properties: + name: + description: |- + Name is the name of the operating system. The currently supported values are linux and windows. + Additional value may be defined in future and can be one of: + https://github.com/opencontainers/runtime-spec/blob/master/config.md#platform-specific-configuration + Clients should expect to handle additional values and treat unrecognized values in this field as os: null + type: string + required: + - name + type: object + overhead: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Overhead represents the resource overhead associated with running a pod for a given RuntimeClass. + This field will be autopopulated at admission time by the RuntimeClass admission controller. If + the RuntimeClass admission controller is enabled, overhead must not be set in Pod create requests. + The RuntimeClass admission controller will reject Pod create requests which have the overhead already + set. If RuntimeClass is configured and selected in the PodSpec, Overhead will be set to the value + defined in the corresponding RuntimeClass, otherwise it will remain unset and treated as zero. + More info: https://git.k8s.io/enhancements/keps/sig-node/688-pod-overhead/README.md + type: object + preemptionPolicy: + description: |- + PreemptionPolicy is the Policy for preempting pods with lower priority. + One of Never, PreemptLowerPriority. + Defaults to PreemptLowerPriority if unset. + type: string + priority: + description: |- + The priority value. Various system components use this field to find the + priority of the pod. When Priority Admission Controller is enabled, it + prevents users from setting this field. The admission controller populates + this field from PriorityClassName. + The higher the value, the higher the priority. + format: int32 + type: integer + priorityClassName: + description: |- + If specified, indicates the pod's priority. "system-node-critical" and + "system-cluster-critical" are two special keywords which indicate the + highest priorities with the former being the highest priority. Any other + name must be defined by creating a PriorityClass object with that name. + If not specified, the pod priority will be default or zero if there is no + default. + type: string + readinessGates: + description: |- + If specified, all readiness gates will be evaluated for pod readiness. + A pod is ready when all its containers are ready AND + all conditions specified in the readiness gates have status equal to "True" + More info: https://git.k8s.io/enhancements/keps/sig-network/580-pod-readiness-gates + items: + description: PodReadinessGate contains the reference + to a pod condition + properties: + conditionType: + description: ConditionType refers to a condition + in the pod's condition list with matching type. + type: string + required: + - conditionType + type: object + type: array + x-kubernetes-list-type: atomic + resourceClaims: + description: |- + ResourceClaims defines which ResourceClaims must be allocated + and reserved before the Pod is allowed to start. The resources + will be made available to those containers which consume them + by name. + + This is an alpha field and requires enabling the + DynamicResourceAllocation feature gate. + + This field is immutable. + items: + description: |- + PodResourceClaim references exactly one ResourceClaim, either directly + or by naming a ResourceClaimTemplate which is then turned into a ResourceClaim + for the pod. + + It adds a name to it that uniquely identifies the ResourceClaim inside the Pod. + Containers that need access to the ResourceClaim reference it with this name. + properties: + name: + description: |- + Name uniquely identifies this resource claim inside the pod. + This must be a DNS_LABEL. + type: string + resourceClaimName: + description: |- + ResourceClaimName is the name of a ResourceClaim object in the same + namespace as this pod. + + Exactly one of ResourceClaimName and ResourceClaimTemplateName must + be set. + type: string + resourceClaimTemplateName: + description: |- + ResourceClaimTemplateName is the name of a ResourceClaimTemplate + object in the same namespace as this pod. + + The template will be used to create a new ResourceClaim, which will + be bound to this pod. When this pod is deleted, the ResourceClaim + will also be deleted. The pod name and resource name, along with a + generated component, will be used to form a unique name for the + ResourceClaim, which will be recorded in pod.status.resourceClaimStatuses. + + This field is immutable and no changes will be made to the + corresponding ResourceClaim by the control plane after creating the + ResourceClaim. + + Exactly one of ResourceClaimName and ResourceClaimTemplateName must + be set. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + restartPolicy: + description: |- + Restart policy for all containers within the pod. + One of Always, OnFailure, Never. In some contexts, only a subset of those values may be permitted. + Default to Always. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#restart-policy + type: string + runtimeClassName: + description: |- + RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used + to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run. + If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an + empty definition that uses the default runtime handler. + More info: https://git.k8s.io/enhancements/keps/sig-node/585-runtime-class + type: string + schedulerName: + description: |- + If specified, the pod will be dispatched by specified scheduler. + If not specified, the pod will be dispatched by default scheduler. + type: string + schedulingGates: + description: |- + SchedulingGates is an opaque list of values that if specified will block scheduling the pod. + If schedulingGates is not empty, the pod will stay in the SchedulingGated state and the + scheduler will not attempt to schedule the pod. + + SchedulingGates can only be set at pod creation time, and be removed only afterwards. + items: + description: PodSchedulingGate is associated to a + Pod to guard its scheduling. + properties: + name: + description: |- + Name of the scheduling gate. + Each scheduling gate must have a unique name field. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + securityContext: + description: |- + SecurityContext holds pod-level security attributes and common container settings. + Optional: Defaults to empty. See type description for default values of each field. + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that + applies to the container. + type: string + role: + description: Role is a SELinux role label that + applies to the container. + type: string + type: + description: Type is a SELinux type label that + applies to the container. + type: string + user: + description: User is a SELinux user label that + applies to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter + to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name + of the GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + serviceAccount: + description: |- + DeprecatedServiceAccount is a deprecated alias for ServiceAccountName. + Deprecated: Use serviceAccountName instead. + type: string + serviceAccountName: + description: |- + ServiceAccountName is the name of the ServiceAccount to use to run this pod. + More info: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + type: string + setHostnameAsFQDN: + description: |- + If true the pod's hostname will be configured as the pod's FQDN, rather than the leaf name (the default). + In Linux containers, this means setting the FQDN in the hostname field of the kernel (the nodename field of struct utsname). + In Windows containers, this means setting the registry value of hostname for the registry key HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters to FQDN. + If a pod does not have FQDN, this has no effect. + Default to false. + type: boolean + shareProcessNamespace: + description: |- + Share a single process namespace between all of the containers in a pod. + When this is set containers will be able to view and signal processes from other containers + in the same pod, and the first process in each container will not be assigned PID 1. + HostPID and ShareProcessNamespace cannot both be set. + Optional: Default to false. + type: boolean + subdomain: + description: |- + If specified, the fully qualified Pod hostname will be "...svc.". + If not specified, the pod will not have a domainname at all. + type: string + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + If this value is nil, the default grace period will be used instead. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + Defaults to 30 seconds. + format: int64 + type: integer + tolerations: + description: If specified, the pod's tolerations. + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + x-kubernetes-list-type: atomic + topologySpreadConstraints: + description: |- + TopologySpreadConstraints describes how a group of pods ought to spread across topology + domains. Scheduler will schedule pods in a way which abides by the constraints. + All topologySpreadConstraints are ANDed. + items: + description: TopologySpreadConstraint specifies how + to spread matching pods among the given topology. + properties: + labelSelector: + description: |- + LabelSelector is used to find matching pods. + Pods that match this label selector are counted to determine the number of pods + in their corresponding topology domain. + properties: + matchExpressions: + description: matchExpressions is a list of + label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that + the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select the pods over which + spreading will be calculated. The keys are used to lookup values from the + incoming pod labels, those key-value labels are ANDed with labelSelector + to select the group of existing pods over which spreading will be calculated + for the incoming pod. The same key is forbidden to exist in both MatchLabelKeys and LabelSelector. + MatchLabelKeys cannot be set when LabelSelector isn't set. + Keys that don't exist in the incoming pod labels will + be ignored. A null or empty list means only match against labelSelector. + + This is a beta field and requires the MatchLabelKeysInPodTopologySpread feature gate to be enabled (enabled by default). + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + description: |- + MaxSkew describes the degree to which pods may be unevenly distributed. + When `whenUnsatisfiable=DoNotSchedule`, it is the maximum permitted difference + between the number of matching pods in the target topology and the global minimum. + The global minimum is the minimum number of matching pods in an eligible domain + or zero if the number of eligible domains is less than MinDomains. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 2/2/1: + In this case, the global minimum is 1. + | zone1 | zone2 | zone3 | + | P P | P P | P | + - if MaxSkew is 1, incoming pod can only be scheduled to zone3 to become 2/2/2; + scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) + violate MaxSkew(1). + - if MaxSkew is 2, incoming pod can be scheduled onto any zone. + When `whenUnsatisfiable=ScheduleAnyway`, it is used to give higher precedence + to topologies that satisfy it. + It's a required field. Default value is 1 and 0 is not allowed. + format: int32 + type: integer + minDomains: + description: |- + MinDomains indicates a minimum number of eligible domains. + When the number of eligible domains with matching topology keys is less than minDomains, + Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. + And when the number of eligible domains with matching topology keys equals or greater than minDomains, + this value has no effect on scheduling. + As a result, when the number of eligible domains is less than minDomains, + scheduler won't schedule more than maxSkew Pods to those domains. + If value is nil, the constraint behaves as if MinDomains is equal to 1. + Valid values are integers greater than 0. + When value is not nil, WhenUnsatisfiable must be DoNotSchedule. + + For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same + labelSelector spread as 2/2/2: + | zone1 | zone2 | zone3 | + | P P | P P | P P | + The number of domains is less than 5(MinDomains), so "global minimum" is treated as 0. + In this situation, new pod with the same labelSelector cannot be scheduled, + because computed skew will be 3(3 - 0) if new Pod is scheduled to any of the three zones, + it will violate MaxSkew. + format: int32 + type: integer + nodeAffinityPolicy: + description: |- + NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector + when calculating pod topology spread skew. Options are: + - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. + - Ignore: nodeAffinity/nodeSelector are ignored. All nodes are included in the calculations. + + If this value is nil, the behavior is equivalent to the Honor policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + nodeTaintsPolicy: + description: |- + NodeTaintsPolicy indicates how we will treat node taints when calculating + pod topology spread skew. Options are: + - Honor: nodes without taints, along with tainted nodes for which the incoming pod + has a toleration, are included. + - Ignore: node taints are ignored. All nodes are included. + + If this value is nil, the behavior is equivalent to the Ignore policy. + This is a beta-level feature default enabled by the NodeInclusionPolicyInPodTopologySpread feature flag. + type: string + topologyKey: + description: |- + TopologyKey is the key of node labels. Nodes that have a label with this key + and identical values are considered to be in the same topology. + We consider each as a "bucket", and try to put balanced number + of pods into each bucket. + We define a domain as a particular instance of a topology. + Also, we define an eligible domain as a domain whose nodes meet the requirements of + nodeAffinityPolicy and nodeTaintsPolicy. + e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. + And, if TopologyKey is "topology.kubernetes.io/zone", each zone is a domain of that topology. + It's a required field. + type: string + whenUnsatisfiable: + description: |- + WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy + the spread constraint. + - DoNotSchedule (default) tells the scheduler not to schedule it. + - ScheduleAnyway tells the scheduler to schedule the pod in any location, + but giving higher precedence to topologies that would help reduce the + skew. + A constraint is considered "Unsatisfiable" for an incoming pod + if and only if every possible node assignment for that pod would violate + "MaxSkew" on some topology. + For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same + labelSelector spread as 3/1/1: + | zone1 | zone2 | zone3 | + | P P P | P | P | + If WhenUnsatisfiable is set to DoNotSchedule, incoming pod can only be scheduled + to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies + MaxSkew(1). In other words, the cluster can still be imbalanced, but scheduler + won't make it *more* imbalanced. + It's a required field. + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array + x-kubernetes-list-map-keys: + - topologyKey + - whenUnsatisfiable + x-kubernetes-list-type: map + volumes: + description: |- + List of volumes that can be mounted by containers belonging to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes + items: + description: Volume represents a named volume in a + pod that may be accessed by any container in the + pod. + properties: + awsElasticBlockStore: + description: |- + awsElasticBlockStore represents an AWS Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + format: int32 + type: integer + readOnly: + description: |- + readOnly value true will force the readOnly setting in VolumeMounts. + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: boolean + volumeID: + description: |- + volumeID is unique ID of the persistent disk resource in AWS (Amazon EBS volume). + More info: https://kubernetes.io/docs/concepts/storage/volumes#awselasticblockstore + type: string + required: + - volumeID + type: object + azureDisk: + description: azureDisk represents an Azure Data + Disk mount on the host and bind mount to the + pod. + properties: + cachingMode: + description: 'cachingMode is the Host Caching + mode: None, Read Only, Read Write.' + type: string + diskName: + description: diskName is the Name of the data + disk in the blob storage + type: string + diskURI: + description: diskURI is the URI of data disk + in the blob storage + type: string + fsType: + default: ext4 + description: |- + fsType is Filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + kind: + description: 'kind expected values are Shared: + multiple blob disks per storage account Dedicated: + single blob disk per storage account Managed: + azure managed data disk (only in managed + availability set). defaults to shared' + type: string + readOnly: + default: false + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + required: + - diskName + - diskURI + type: object + azureFile: + description: azureFile represents an Azure File + Service mount on the host and bind mount to + the pod. + properties: + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretName: + description: secretName is the name of secret + that contains Azure Storage Account Name + and Key + type: string + shareName: + description: shareName is the azure share + Name + type: string + required: + - secretName + - shareName + type: object + cephfs: + description: cephFS represents a Ceph FS mount + on the host that shares a pod's lifetime + properties: + monitors: + description: |- + monitors is Required: Monitors is a collection of Ceph monitors + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + items: + type: string + type: array + x-kubernetes-list-type: atomic + path: + description: 'path is Optional: Used as the + mounted root, rather than the full Ceph + tree, default is /' + type: string + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: boolean + secretFile: + description: |- + secretFile is Optional: SecretFile is the path to key ring for User, default is /etc/ceph/user.secret + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + secretRef: + description: |- + secretRef is Optional: SecretRef is reference to the authentication secret for User, default is empty. + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + user: + description: |- + user is optional: User is the rados user name, default is admin + More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it + type: string + required: + - monitors + type: object + cinder: + description: |- + cinder represents a cinder volume attached and mounted on kubelets host machine. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: boolean + secretRef: + description: |- + secretRef is optional: points to a secret object containing parameters used to connect + to OpenStack. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + volumeID: + description: |- + volumeID used to identify the volume in cinder. + More info: https://examples.k8s.io/mysql-cinder-pd/README.md + type: string + required: + - volumeID + type: object + configMap: + description: configMap represents a configMap + that should populate this volume + properties: + defaultMode: + description: |- + defaultMode is optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path + within a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional specify whether the + ConfigMap or its keys must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + csi: + description: csi (Container Storage Interface) + represents ephemeral storage that is handled + by certain external CSI drivers (Beta feature). + properties: + driver: + description: |- + driver is the name of the CSI driver that handles this volume. + Consult with your admin for the correct name as registered in the cluster. + type: string + fsType: + description: |- + fsType to mount. Ex. "ext4", "xfs", "ntfs". + If not provided, the empty value is passed to the associated CSI driver + which will determine the default filesystem to apply. + type: string + nodePublishSecretRef: + description: |- + nodePublishSecretRef is a reference to the secret object containing + sensitive information to pass to the CSI driver to complete the CSI + NodePublishVolume and NodeUnpublishVolume calls. + This field is optional, and may be empty if no secret is required. If the + secret object contains more than one secret, all secret references are passed. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + readOnly: + description: |- + readOnly specifies a read-only configuration for the volume. + Defaults to false (read/write). + type: boolean + volumeAttributes: + additionalProperties: + type: string + description: |- + volumeAttributes stores driver-specific properties that are passed to the CSI + driver. Consult your driver's documentation for supported values. + type: object + required: + - driver + type: object + downwardAPI: + description: downwardAPI represents downward API + about the pod that should populate this volume + properties: + defaultMode: + description: |- + Optional: mode bits to use on created files by default. Must be a + Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: Items is a list of downward API + volume file + items: + description: DownwardAPIVolumeFile represents + information to create the file containing + the pod field + properties: + fieldRef: + description: 'Required: Selects a field + of the pod: only annotations, labels, + name, namespace and uid are supported.' + properties: + apiVersion: + description: Version of the schema + the FieldPath is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to + select in the specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path is the + relative path name of the file to + be created. Must not be absolute or + contain the ''..'' path. Must be utf-8 + encoded. The first item of the relative + path must not start with ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container name: required + for volumes, optional for env + vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output + format of the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource + to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + emptyDir: + description: |- + emptyDir represents a temporary directory that shares a pod's lifetime. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + properties: + medium: + description: |- + medium represents what type of storage medium should back this directory. + The default is "" which means to use the node's default medium. + Must be an empty string (default) or Memory. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + type: string + sizeLimit: + anyOf: + - type: integer + - type: string + description: |- + sizeLimit is the total amount of local storage required for this EmptyDir volume. + The size limit is also applicable for memory medium. + The maximum usage on memory medium EmptyDir would be the minimum value between + the SizeLimit specified here and the sum of memory limits of all containers in a pod. + The default is nil which means that the limit is undefined. + More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + ephemeral: + description: |- + ephemeral represents a volume that is handled by a cluster storage driver. + The volume's lifecycle is tied to the pod that defines it - it will be created before the pod starts, + and deleted when the pod is removed. + + Use this if: + a) the volume is only needed while the pod runs, + b) features of normal volumes like restoring from snapshot or capacity + tracking are needed, + c) the storage driver is specified through a storage class, and + d) the storage driver supports dynamic volume provisioning through + a PersistentVolumeClaim (see EphemeralVolumeSource for more + information on the connection between this volume type + and PersistentVolumeClaim). + + Use PersistentVolumeClaim or one of the vendor-specific + APIs for volumes that persist for longer than the lifecycle + of an individual pod. + + Use CSI for light-weight local ephemeral volumes if the CSI driver is meant to + be used that way - see the documentation of the driver for + more information. + + A pod can use both types of ephemeral volumes and + persistent volumes at the same time. + properties: + volumeClaimTemplate: + description: |- + Will be used to create a stand-alone PVC to provision the volume. + The pod in which this EphemeralVolumeSource is embedded will be the + owner of the PVC, i.e. the PVC will be deleted together with the + pod. The name of the PVC will be `-` where + `` is the name from the `PodSpec.Volumes` array + entry. Pod validation will reject the pod if the concatenated name + is not valid for a PVC (for example, too long). + + An existing PVC with that name that is not owned by the pod + will *not* be used for the pod to avoid using an unrelated + volume by mistake. Starting the pod is then blocked until + the unrelated PVC is removed. If such a pre-created PVC is + meant to be used by the pod, the PVC has to updated with an + owner reference to the pod once the pod exists. Normally + this should not be necessary, but it may be useful when + manually reconstructing a broken cluster. + + This field is read-only and no changes will be made by Kubernetes + to the PVC after it has been created. + + Required, must not be nil. + properties: + metadata: + description: |- + May contain labels and annotations that will be copied into the PVC + when creating it. No other fields are allowed and will be rejected during + validation. + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: |- + The specification for the PersistentVolumeClaim. The entire content is + copied unchanged into the PVC that gets created from this + template. The same fields as in a PersistentVolumeClaim + are also valid here. + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type + of resource being referenced + type: string + name: + description: Name is the name + of resource being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type + of resource being referenced + type: string + name: + description: Name is the name + of resource being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query + over volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding + reference to the PersistentVolume + backing this claim. + type: string + type: object + required: + - spec + type: object + type: object + fc: + description: fc represents a Fibre Channel resource + that is attached to a kubelet's host machine + and then exposed to the pod. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + lun: + description: 'lun is Optional: FC target lun + number' + format: int32 + type: integer + readOnly: + description: |- + readOnly is Optional: Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + targetWWNs: + description: 'targetWWNs is Optional: FC target + worldwide names (WWNs)' + items: + type: string + type: array + x-kubernetes-list-type: atomic + wwids: + description: |- + wwids Optional: FC volume world wide identifiers (wwids) + Either wwids or combination of targetWWNs and lun must be set, but not both simultaneously. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + flexVolume: + description: |- + flexVolume represents a generic volume resource that is + provisioned/attached using an exec based plugin. + properties: + driver: + description: driver is the name of the driver + to use for this volume. + type: string + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". The default filesystem depends on FlexVolume script. + type: string + options: + additionalProperties: + type: string + description: 'options is Optional: this field + holds extra command options if any.' + type: object + readOnly: + description: |- + readOnly is Optional: defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef is Optional: secretRef is reference to the secret object containing + sensitive information to pass to the plugin scripts. This may be + empty if no secret object is specified. If the secret object + contains more than one secret, all secrets are passed to the plugin + scripts. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + required: + - driver + type: object + flocker: + description: flocker represents a Flocker volume + attached to a kubelet's host machine. This depends + on the Flocker control service being running + properties: + datasetName: + description: |- + datasetName is Name of the dataset stored as metadata -> name on the dataset for Flocker + should be considered as deprecated + type: string + datasetUUID: + description: datasetUUID is the UUID of the + dataset. This is unique identifier of a + Flocker dataset + type: string + type: object + gcePersistentDisk: + description: |- + gcePersistentDisk represents a GCE Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + properties: + fsType: + description: |- + fsType is filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: string + partition: + description: |- + partition is the partition in the volume that you want to mount. + If omitted, the default is to mount by volume name. + Examples: For volume /dev/sda1, you specify the partition as "1". + Similarly, the volume partition for /dev/sda is "0" (or you can leave the property empty). + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + format: int32 + type: integer + pdName: + description: |- + pdName is unique name of the PD resource in GCE. Used to identify the disk in GCE. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#gcepersistentdisk + type: boolean + required: + - pdName + type: object + gitRepo: + description: |- + gitRepo represents a git repository at a particular revision. + DEPRECATED: GitRepo is deprecated. To provision a container with a git repo, mount an + EmptyDir into an InitContainer that clones the repo using git, then mount the EmptyDir + into the Pod's container. + properties: + directory: + description: |- + directory is the target directory name. + Must not contain or start with '..'. If '.' is supplied, the volume directory will be the + git repository. Otherwise, if specified, the volume will contain the git repository in + the subdirectory with the given name. + type: string + repository: + description: repository is the URL + type: string + revision: + description: revision is the commit hash for + the specified revision. + type: string + required: + - repository + type: object + glusterfs: + description: |- + glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/glusterfs/README.md + properties: + endpoints: + description: |- + endpoints is the endpoint name that details Glusterfs topology. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + path: + description: |- + path is the Glusterfs volume path. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: string + readOnly: + description: |- + readOnly here will force the Glusterfs volume to be mounted with read-only permissions. + Defaults to false. + More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + type: boolean + required: + - endpoints + - path + type: object + hostPath: + description: |- + hostPath represents a pre-existing file or directory on the host + machine that is directly exposed to the container. This is generally + used for system agents or other privileged things that are allowed + to see the host machine. Most containers will NOT need this. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + properties: + path: + description: |- + path of the directory on the host. + If the path is a symlink, it will follow the link to the real path. + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + type: + description: |- + type for HostPath Volume + Defaults to "" + More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath + type: string + required: + - path + type: object + image: + description: |- + image represents an OCI object (a container image or artifact) pulled and mounted on the kubelet's host machine. + The volume is resolved at pod startup depending on which PullPolicy value is provided: + + - Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. + - Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. + - IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails. + + The volume gets re-resolved if the pod gets deleted and recreated, which means that new remote content will become available on pod recreation. + A failure to resolve or pull the image during pod startup will block containers from starting and may add significant latency. Failures will be retried using normal volume backoff and will be reported on the pod reason and message. + The types of objects that may be mounted by this volume are defined by the container runtime implementation on a host machine and at minimum must include all valid types supported by the container image field. + The OCI object gets mounted in a single directory (spec.containers[*].volumeMounts.mountPath) by merging the manifest layers in the same way as for container images. + The volume will be mounted read-only (ro) and non-executable files (noexec). + Sub path mounts for containers are not supported (spec.containers[*].volumeMounts.subpath). + The field spec.securityContext.fsGroupChangePolicy has no effect on this volume type. + properties: + pullPolicy: + description: |- + Policy for pulling OCI objects. Possible values are: + Always: the kubelet always attempts to pull the reference. Container creation will fail If the pull fails. + Never: the kubelet never pulls the reference and only uses a local image or artifact. Container creation will fail if the reference isn't present. + IfNotPresent: the kubelet pulls if the reference isn't already present on disk. Container creation will fail if the reference isn't present and the pull fails. + Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. + type: string + reference: + description: |- + Required: Image or artifact reference to be used. + Behaves in the same way as pod.spec.containers[*].image. + Pull secrets will be assembled in the same way as for the container image by looking up node credentials, SA image pull secrets, and pod spec image pull secrets. + More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management to default or override + container images in workload controllers like Deployments and StatefulSets. + type: string + type: object + iscsi: + description: |- + iscsi represents an ISCSI Disk resource that is attached to a + kubelet's host machine and then exposed to the pod. + More info: https://examples.k8s.io/volumes/iscsi/README.md + properties: + chapAuthDiscovery: + description: chapAuthDiscovery defines whether + support iSCSI Discovery CHAP authentication + type: boolean + chapAuthSession: + description: chapAuthSession defines whether + support iSCSI Session CHAP authentication + type: boolean + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#iscsi + type: string + initiatorName: + description: |- + initiatorName is the custom iSCSI Initiator Name. + If initiatorName is specified with iscsiInterface simultaneously, new iSCSI interface + : will be created for the connection. + type: string + iqn: + description: iqn is the target iSCSI Qualified + Name. + type: string + iscsiInterface: + default: default + description: |- + iscsiInterface is the interface Name that uses an iSCSI transport. + Defaults to 'default' (tcp). + type: string + lun: + description: lun represents iSCSI Target Lun + number. + format: int32 + type: integer + portals: + description: |- + portals is the iSCSI Target Portal List. The portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + items: + type: string + type: array + x-kubernetes-list-type: atomic + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + type: boolean + secretRef: + description: secretRef is the CHAP Secret + for iSCSI target and initiator authentication + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + targetPortal: + description: |- + targetPortal is iSCSI Target Portal. The Portal is either an IP or ip_addr:port if the port + is other than default (typically TCP ports 860 and 3260). + type: string + required: + - iqn + - lun + - targetPortal + type: object + name: + description: |- + name of the volume. + Must be a DNS_LABEL and unique within the pod. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + nfs: + description: |- + nfs represents an NFS mount on the host that shares a pod's lifetime + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + properties: + path: + description: |- + path that is exported by the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + readOnly: + description: |- + readOnly here will force the NFS export to be mounted with read-only permissions. + Defaults to false. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: boolean + server: + description: |- + server is the hostname or IP address of the NFS server. + More info: https://kubernetes.io/docs/concepts/storage/volumes#nfs + type: string + required: + - path + - server + type: object + persistentVolumeClaim: + description: |- + persistentVolumeClaimVolumeSource represents a reference to a + PersistentVolumeClaim in the same namespace. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + properties: + claimName: + description: |- + claimName is the name of a PersistentVolumeClaim in the same namespace as the pod using this volume. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims + type: string + readOnly: + description: |- + readOnly Will force the ReadOnly setting in VolumeMounts. + Default false. + type: boolean + required: + - claimName + type: object + photonPersistentDisk: + description: photonPersistentDisk represents a + PhotonController persistent disk attached and + mounted on kubelets host machine + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + pdID: + description: pdID is the ID that identifies + Photon Controller persistent disk + type: string + required: + - pdID + type: object + portworxVolume: + description: portworxVolume represents a portworx + volume attached and mounted on kubelets host + machine + properties: + fsType: + description: |- + fSType represents the filesystem type to mount + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + volumeID: + description: volumeID uniquely identifies + a Portworx volume + type: string + required: + - volumeID + type: object + projected: + description: projected items for all in one resources + secrets, configmaps, and downward API + properties: + defaultMode: + description: |- + defaultMode are the mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + sources: + description: |- + sources is the list of volume projections. Each entry in this list + handles one source. + items: + description: |- + Projection that may be projected along with other supported volume types. + Exactly one of these fields must be set. + properties: + clusterTrustBundle: + description: |- + ClusterTrustBundle allows a pod to access the `.spec.trustBundle` field + of ClusterTrustBundle objects in an auto-updating file. + + Alpha, gated by the ClusterTrustBundleProjection feature gate. + + ClusterTrustBundle objects can either be selected by name, or by the + combination of signer name and a label selector. + + Kubelet performs aggressive normalization of the PEM contents written + into the pod filesystem. Esoteric PEM features such as inter-block + comments and block headers are stripped. Certificates are deduplicated. + The ordering of certificates within the file is arbitrary, and Kubelet + may change the order over time. + properties: + labelSelector: + description: |- + Select all ClusterTrustBundles that match this label selector. Only has + effect if signerName is set. Mutually-exclusive with name. If unset, + interpreted as "match nothing". If set but empty, interpreted as "match + everything". + properties: + matchExpressions: + description: matchExpressions + is a list of label selector + requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the + label key that the selector + applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + name: + description: |- + Select a single ClusterTrustBundle by object name. Mutually-exclusive + with signerName and labelSelector. + type: string + optional: + description: |- + If true, don't block pod startup if the referenced ClusterTrustBundle(s) + aren't available. If using name, then the named ClusterTrustBundle is + allowed not to exist. If using signerName, then the combination of + signerName and labelSelector is allowed to match zero + ClusterTrustBundles. + type: boolean + path: + description: Relative path from + the volume root to write the bundle. + type: string + signerName: + description: |- + Select all ClusterTrustBundles that match this signer name. + Mutually-exclusive with name. The contents of all selected + ClusterTrustBundles will be unified and deduplicated. + type: string + required: + - path + type: object + configMap: + description: configMap information about + the configMap data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + ConfigMap will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the ConfigMap, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key + to a path within a volume. + properties: + key: + description: key is the key + to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional specify whether + the ConfigMap or its keys must + be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + downwardAPI: + description: downwardAPI information + about the downwardAPI data to project + properties: + items: + description: Items is a list of + DownwardAPIVolume file + items: + description: DownwardAPIVolumeFile + represents information to create + the file containing the pod + field + properties: + fieldRef: + description: 'Required: Selects + a field of the pod: only + annotations, labels, name, + namespace and uid are supported.' + properties: + apiVersion: + description: Version of + the schema the FieldPath + is written in terms + of, defaults to "v1". + type: string + fieldPath: + description: Path of the + field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + description: |- + Optional: mode bits used to set permissions on this file, must be an octal value + between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: 'Required: Path + is the relative path name + of the file to be created. + Must not be absolute or + contain the ''..'' path. + Must be utf-8 encoded. The + first item of the relative + path must not start with + ''..''' + type: string + resourceFieldRef: + description: |- + Selects a resource of the container: only resources limits and requests + (limits.cpu, limits.memory, requests.cpu and requests.memory) are currently supported. + properties: + containerName: + description: 'Container + name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies + the output format of + the exposed resources, + defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: + resource to select' + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + secret: + description: secret information about + the secret data to project + properties: + items: + description: |- + items if unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key + to a path within a volume. + properties: + key: + description: key is the key + to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: optional field specify + whether the Secret or its key + must be defined + type: boolean + type: object + x-kubernetes-map-type: atomic + serviceAccountToken: + description: serviceAccountToken is + information about the serviceAccountToken + data to project + properties: + audience: + description: |- + audience is the intended audience of the token. A recipient of a token + must identify itself with an identifier specified in the audience of the + token, and otherwise should reject the token. The audience defaults to the + identifier of the apiserver. + type: string + expirationSeconds: + description: |- + expirationSeconds is the requested duration of validity of the service + account token. As the token approaches expiration, the kubelet volume + plugin will proactively rotate the service account token. The kubelet will + start trying to rotate the token if the token is older than 80 percent of + its time to live or if the token is older than 24 hours.Defaults to 1 hour + and must be at least 10 minutes. + format: int64 + type: integer + path: + description: |- + path is the path relative to the mount point of the file to project the + token into. + type: string + required: + - path + type: object + type: object + type: array + x-kubernetes-list-type: atomic + type: object + quobyte: + description: quobyte represents a Quobyte mount + on the host that shares a pod's lifetime + properties: + group: + description: |- + group to map volume access to + Default is no group + type: string + readOnly: + description: |- + readOnly here will force the Quobyte volume to be mounted with read-only permissions. + Defaults to false. + type: boolean + registry: + description: |- + registry represents a single or multiple Quobyte Registry services + specified as a string as host:port pair (multiple entries are separated with commas) + which acts as the central registry for volumes + type: string + tenant: + description: |- + tenant owning the given Quobyte volume in the Backend + Used with dynamically provisioned Quobyte volumes, value is set by the plugin + type: string + user: + description: |- + user to map volume access to + Defaults to serivceaccount user + type: string + volume: + description: volume is a string that references + an already created Quobyte volume by name. + type: string + required: + - registry + - volume + type: object + rbd: + description: |- + rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. + More info: https://examples.k8s.io/volumes/rbd/README.md + properties: + fsType: + description: |- + fsType is the filesystem type of the volume that you want to mount. + Tip: Ensure that the filesystem type is supported by the host operating system. + Examples: "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + More info: https://kubernetes.io/docs/concepts/storage/volumes#rbd + type: string + image: + description: |- + image is the rados image name. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + keyring: + default: /etc/ceph/keyring + description: |- + keyring is the path to key ring for RBDUser. + Default is /etc/ceph/keyring. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + monitors: + description: |- + monitors is a collection of Ceph monitors. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + items: + type: string + type: array + x-kubernetes-list-type: atomic + pool: + default: rbd + description: |- + pool is the rados pool name. + Default is rbd. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + readOnly: + description: |- + readOnly here will force the ReadOnly setting in VolumeMounts. + Defaults to false. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: boolean + secretRef: + description: |- + secretRef is name of the authentication secret for RBDUser. If provided + overrides keyring. + Default is nil. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + user: + default: admin + description: |- + user is the rados user name. + Default is admin. + More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it + type: string + required: + - image + - monitors + type: object + scaleIO: + description: scaleIO represents a ScaleIO persistent + volume attached and mounted on Kubernetes nodes. + properties: + fsType: + default: xfs + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". + Default is "xfs". + type: string + gateway: + description: gateway is the host address of + the ScaleIO API Gateway. + type: string + protectionDomain: + description: protectionDomain is the name + of the ScaleIO Protection Domain for the + configured storage. + type: string + readOnly: + description: |- + readOnly Defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef references to the secret for ScaleIO user and other + sensitive information. If this is not provided, Login operation will fail. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + sslEnabled: + description: sslEnabled Flag enable/disable + SSL communication with Gateway, default + false + type: boolean + storageMode: + default: ThinProvisioned + description: |- + storageMode indicates whether the storage for a volume should be ThickProvisioned or ThinProvisioned. + Default is ThinProvisioned. + type: string + storagePool: + description: storagePool is the ScaleIO Storage + Pool associated with the protection domain. + type: string + system: + description: system is the name of the storage + system as configured in ScaleIO. + type: string + volumeName: + description: |- + volumeName is the name of a volume already created in the ScaleIO system + that is associated with this volume source. + type: string + required: + - gateway + - secretRef + - system + type: object + secret: + description: |- + secret represents a secret that should populate this volume. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + properties: + defaultMode: + description: |- + defaultMode is Optional: mode bits used to set permissions on created files by default. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values + for mode bits. Defaults to 0644. + Directories within the path are not affected by this setting. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + items: + description: |- + items If unspecified, each key-value pair in the Data field of the referenced + Secret will be projected into the volume as a file whose name is the + key and content is the value. If specified, the listed keys will be + projected into the specified paths, and unlisted keys will not be + present. If a key is specified which is not present in the Secret, + the volume setup will error unless it is marked optional. Paths must be + relative and may not contain the '..' path or start with '..'. + items: + description: Maps a string key to a path + within a volume. + properties: + key: + description: key is the key to project. + type: string + mode: + description: |- + mode is Optional: mode bits used to set permissions on this file. + Must be an octal value between 0000 and 0777 or a decimal value between 0 and 511. + YAML accepts both octal and decimal values, JSON requires decimal values for mode bits. + If not specified, the volume defaultMode will be used. + This might be in conflict with other options that affect the file + mode, like fsGroup, and the result can be other mode bits set. + format: int32 + type: integer + path: + description: |- + path is the relative path of the file to map the key to. + May not be an absolute path. + May not contain the path element '..'. + May not start with the string '..'. + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + optional: + description: optional field specify whether + the Secret or its keys must be defined + type: boolean + secretName: + description: |- + secretName is the name of the secret in the pod's namespace to use. + More info: https://kubernetes.io/docs/concepts/storage/volumes#secret + type: string + type: object + storageos: + description: storageOS represents a StorageOS + volume attached and mounted on Kubernetes nodes. + properties: + fsType: + description: |- + fsType is the filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + readOnly: + description: |- + readOnly defaults to false (read/write). ReadOnly here will force + the ReadOnly setting in VolumeMounts. + type: boolean + secretRef: + description: |- + secretRef specifies the secret to use for obtaining the StorageOS API + credentials. If not specified, default values will be attempted. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + volumeName: + description: |- + volumeName is the human-readable name of the StorageOS volume. Volume + names are only unique within a namespace. + type: string + volumeNamespace: + description: |- + volumeNamespace specifies the scope of the volume within StorageOS. If no + namespace is specified then the Pod's namespace will be used. This allows the + Kubernetes name scoping to be mirrored within StorageOS for tighter integration. + Set VolumeName to any name to override the default behaviour. + Set to "default" if you are not using namespaces within StorageOS. + Namespaces that do not pre-exist within StorageOS will be created. + type: string + type: object + vsphereVolume: + description: vsphereVolume represents a vSphere + volume attached and mounted on kubelets host + machine + properties: + fsType: + description: |- + fsType is filesystem type to mount. + Must be a filesystem type supported by the host operating system. + Ex. "ext4", "xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified. + type: string + storagePolicyID: + description: storagePolicyID is the storage + Policy Based Management (SPBM) profile ID + associated with the StoragePolicyName. + type: string + storagePolicyName: + description: storagePolicyName is the storage + Policy Based Management (SPBM) profile name. + type: string + volumePath: + description: volumePath is the path that identifies + vSphere volume vmdk + type: string + required: + - volumePath + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - containers + type: object + type: object + topologyRequest: + description: topologyRequest defines the topology request for + the PodSet. + properties: + podIndexLabel: + description: |- + PodIndexLabel indicates the name of the label indexing the pods. + For example, in the context of + - kubernetes job this is: kubernetes.io/job-completion-index + - JobSet: kubernetes.io/job-completion-index (inherited from Job) + - Kubeflow: training.kubeflow.org/replica-index + type: string + preferred: + description: |- + preferred indicates the topology level preferred by the PodSet, as + indicated by the `kueue.x-k8s.io/podset-preferred-topology` PodSet + annotation. + type: string + required: + description: |- + required indicates the topology level required by the PodSet, as + indicated by the `kueue.x-k8s.io/podset-required-topology` PodSet + annotation. + type: string + subGroupCount: + description: |- + SubGroupIndexLabel indicates the count of replicated Jobs (groups) within a PodSet. + For example, in the context of JobSet this value is read from jobset.sigs.k8s.io/replicatedjob-replicas. + format: int32 + type: integer + subGroupIndexLabel: + description: |- + SubGroupIndexLabel indicates the name of the label indexing the instances of replicated Jobs (groups) + within a PodSet. For example, in the context of JobSet this is jobset.sigs.k8s.io/job-index. + type: string + type: object + required: + - count + - template + type: object + x-kubernetes-validations: + - message: minCount should be positive and less or equal to count + rule: 'has(self.minCount) ? self.minCount <= self.count : true' + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + priority: + description: |- + Priority determines the order of access to the resources managed by the + ClusterQueue where the workload is queued. + The priority value is populated from PriorityClassName. + The higher the value, the higher the priority. + If priorityClassName is specified, priority must not be null. + format: int32 + type: integer + priorityClassName: + description: |- + If specified, indicates the workload's priority. + "system-node-critical" and "system-cluster-critical" are two special + keywords which indicate the highest priorities with the former being + the highest priority. Any other name must be defined by creating a + PriorityClass object with that name. If not specified, the workload + priority will be default or zero if there is no default. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + priorityClassSource: + default: "" + description: |- + priorityClassSource determines whether the priorityClass field refers to a pod PriorityClass or kueue.x-k8s.io/workloadpriorityclass. + Workload's PriorityClass can accept the name of a pod priorityClass or a workloadPriorityClass. + When using pod PriorityClass, a priorityClassSource field has the scheduling.k8s.io/priorityclass value. + enum: + - kueue.x-k8s.io/workloadpriorityclass + - scheduling.k8s.io/priorityclass + - "" + type: string + queueName: + description: |- + queueName is the name of the LocalQueue the Workload is associated with. + queueName cannot be changed while .status.admission is not null. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - podSets + type: object + x-kubernetes-validations: + - message: priority should not be nil when priorityClassName is set + rule: 'has(self.priorityClassName) ? has(self.priority) : true' + status: + description: WorkloadStatus defines the observed state of Workload + properties: + accumulatedPastExexcutionTimeSeconds: + description: |- + accumulatedPastExexcutionTimeSeconds holds the total time, in seconds, the workload spent + in Admitted state, in the previous `Admit` - `Evict` cycles. + format: int32 + type: integer + admission: + description: |- + admission holds the parameters of the admission of the workload by a + ClusterQueue. admission can be set back to null, but its fields cannot be + changed once set. + properties: + clusterQueue: + description: clusterQueue is the name of the ClusterQueue that + admitted this workload. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + podSetAssignments: + description: PodSetAssignments hold the admission results for + each of the .spec.podSets entries. + items: + properties: + count: + description: |- + count is the number of pods taken into account at admission time. + This field will not change in case of quota reclaim. + Value could be missing for Workloads created before this field was added, + in that case spec.podSets[*].count value will be used. + format: int32 + minimum: 0 + type: integer + flavors: + additionalProperties: + description: ResourceFlavorReference is the name of the + ResourceFlavor. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + description: Flavors are the flavors assigned to the workload + for each resource. + type: object + name: + default: main + description: Name is the name of the podSet. It should match + one of the names in .spec.podSets. + maxLength: 63 + pattern: ^(?i)[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + resourceUsage: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + resourceUsage keeps track of the total resources all the pods in the podset need to run. + + Beside what is provided in podSet's specs, this calculation takes into account + the LimitRange defaults and RuntimeClass overheads at the moment of admission. + This field will not change in case of quota reclaim. + type: object + topologyAssignment: + description: |- + topologyAssignment indicates the topology assignment divided into + topology domains corresponding to the lowest level of the topology. + The assignment specifies the number of Pods to be scheduled per topology + domain and specifies the node selectors for each topology domain, in the + following way: the node selector keys are specified by the levels field + (same for all domains), and the corresponding node selector value is + specified by the domains.values subfield. If the TopologySpec.Levels field contains + "kubernetes.io/hostname" label, topologyAssignment will contain data only for + this label, and omit higher levels in the topology + + Example: + + topologyAssignment: + levels: + - cloud.provider.com/topology-block + - cloud.provider.com/topology-rack + domains: + - values: [block-1, rack-1] + count: 4 + - values: [block-1, rack-2] + count: 2 + + Here: + - 4 Pods are to be scheduled on nodes matching the node selector: + cloud.provider.com/topology-block: block-1 + cloud.provider.com/topology-rack: rack-1 + - 2 Pods are to be scheduled on nodes matching the node selector: + cloud.provider.com/topology-block: block-1 + cloud.provider.com/topology-rack: rack-2 + + Example: + Below there is an equivalent of the above example assuming, Topology + object defines kubernetes.io/hostname as the lowest level in topology. + Hence we omit higher level of topologies, since the hostname label + is sufficient to explicitly identify a proper node. + + topologyAssignment: + levels: + - kubernetes.io/hostname + domains: + - values: [hostname-1] + count: 4 + - values: [hostname-2] + count: 2 + properties: + domains: + description: |- + domains is a list of topology assignments split by topology domains at + the lowest level of the topology. + items: + properties: + count: + description: |- + count indicates the number of Pods to be scheduled in the topology + domain indicated by the values field. + format: int32 + minimum: 1 + type: integer + values: + description: |- + values is an ordered list of node selector values describing a topology + domain. The values correspond to the consecutive topology levels, from + the highest to the lowest. + items: + type: string + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + required: + - count + - values + type: object + type: array + levels: + description: |- + levels is an ordered list of keys denoting the levels of the assigned + topology (i.e. node label keys), from the highest to the lowest level of + the topology. + items: + type: string + maxItems: 8 + minItems: 1 + type: array + x-kubernetes-list-type: atomic + required: + - domains + - levels + type: object + required: + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + required: + - clusterQueue + - podSetAssignments + type: object + admissionChecks: + description: admissionChecks list all the admission checks required + by the workload and the current status + items: + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + name: + description: name identifies the admission check. + maxLength: 316 + type: string + podSetUpdates: + items: + description: |- + PodSetUpdate contains a list of pod set modifications suggested by AdmissionChecks. + The modifications should be additive only - modifications of already existing keys + or having the same key provided by multiple AdmissionChecks is not allowed and will + result in failure during workload admission. + properties: + annotations: + additionalProperties: + type: string + type: object + labels: + additionalProperties: + type: string + type: object + name: + description: Name of the PodSet to modify. Should match + to one of the Workload's PodSets. + type: string + nodeSelector: + additionalProperties: + type: string + type: object + tolerations: + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + maxItems: 8 + type: array + x-kubernetes-validations: + - message: operator must be Exists when 'key' is empty, + which means 'match all values and all keys' + rule: 'self.all(x, !has(x.key) ? x.operator == ''Exists'' + : true)' + - message: effect must be 'NoExecute' when 'tolerationSeconds' + is set + rule: 'self.all(x, has(x.tolerationSeconds) ? x.effect + == ''NoExecute'' : true)' + - message: 'supported toleration values: ''Equal''(default), + ''Exists''' + rule: self.all(x, !has(x.operator) || x.operator in + ['Equal', 'Exists']) + - message: a value must be empty when 'operator' is 'Exists' + rule: 'self.all(x, has(x.operator) && x.operator == + ''Exists'' ? !has(x.value) : true)' + - message: 'supported taint effect values: ''NoSchedule'', + ''PreferNoSchedule'', ''NoExecute''' + rule: self.all(x, !has(x.effect) || x.effect in ['NoSchedule', + 'PreferNoSchedule', 'NoExecute']) + required: + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-type: atomic + state: + description: state of the admissionCheck, one of Pending, Ready, + Retry, Rejected + enum: + - Pending + - Ready + - Retry + - Rejected + type: string + required: + - lastTransitionTime + - message + - name + - state + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + conditions: + description: |- + conditions hold the latest available observations of the Workload + current state. + + The type of the condition could be: + + - Admitted: the Workload was admitted through a ClusterQueue. + - Finished: the associated workload finished running (failed or succeeded). + - PodsReady: at least `.spec.podSets[*].count` Pods are ready or have + succeeded. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + reclaimablePods: + description: |- + reclaimablePods keeps track of the number pods within a podset for which + the resource reservation is no longer needed. + items: + properties: + count: + description: count is the number of pods for which the requested + resources are no longer needed. + format: int32 + minimum: 0 + type: integer + name: + description: name is the PodSet name. + type: string + required: + - count + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + requeueState: + description: |- + requeueState holds the re-queue state + when a workload meets Eviction with PodsReadyTimeout reason. + properties: + count: + description: |- + count records the number of times a workload has been re-queued + When a deactivated (`.spec.activate`=`false`) workload is reactivated (`.spec.activate`=`true`), + this count would be reset to null. + format: int32 + minimum: 0 + type: integer + requeueAt: + description: |- + requeueAt records the time when a workload will be re-queued. + When a deactivated (`.spec.activate`=`false`) workload is reactivated (`.spec.activate`=`true`), + this time would be reset to null. + format: date-time + type: string + type: object + resourceRequests: + description: |- + resourceRequests provides a detailed view of the resources that were + requested by a non-admitted workload when it was considered for admission. + If admission is non-null, resourceRequests will be empty because + admission.resourceUsage contains the detailed information. + items: + properties: + name: + default: main + description: name is the name of the podSet. It should match + one of the names in .spec.podSets. + maxLength: 63 + pattern: ^(?i)[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + resources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + resources is the total resources all the pods in the podset need to run. + + Beside what is provided in podSet's specs, this value also takes into account + the LimitRange defaults and RuntimeClass overheads at the moment of consideration + and the application of resource.excludeResourcePrefixes and resource.transformations. + type: object + required: + - name + type: object + maxItems: 8 + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + type: object + x-kubernetes-validations: + - message: podSetAssignments must have the same number of podSets as the spec + rule: 'has(self.status) && has(self.status.conditions) && self.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'') && has(self.status.admission) + ? size(self.spec.podSets) == size(self.status.admission.podSetAssignments) + : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'')) ? (oldSelf.spec.priorityClassSource + == self.spec.priorityClassSource) : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'') && has(oldSelf.spec.priorityClassName) + && has(self.spec.priorityClassName)) ? (oldSelf.spec.priorityClassName + == self.spec.priorityClassName) : true' + - message: field is immutable + rule: '(has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == ''QuotaReserved'' && c.status == ''True'')) && (has(self.status) + && has(self.status.conditions) && self.status.conditions.exists(c, c.type + == ''QuotaReserved'' && c.status == ''True'')) && has(oldSelf.spec.queueName) + && has(self.spec.queueName) ? oldSelf.spec.queueName == self.spec.queueName + : true' + - message: maximumExecutionTimeSeconds is immutable while admitted + rule: ((has(oldSelf.status) && has(oldSelf.status.conditions) && oldSelf.status.conditions.exists(c, + c.type == 'Admitted' && c.status == 'True')) && (has(self.status) && has(self.status.conditions) + && self.status.conditions.exists(c, c.type == 'Admitted' && c.status == + 'True')))?((has(oldSelf.spec.maximumExecutionTimeSeconds)?oldSelf.spec.maximumExecutionTimeSeconds:0) + == (has(self.spec.maximumExecutionTimeSeconds)?self.spec.maximumExecutionTimeSeconds:0)):true + served: true + storage: true + subresources: + status: {} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-leader-election-role + namespace: kueue-system +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +--- +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.kueue.x-k8s.io/batch-admin: "true" +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-batch-admin-role +--- +aggregationRule: + clusterRoleSelectors: + - matchLabels: + rbac.kueue.x-k8s.io/batch-user: "true" +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-batch-user-role +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-clusterqueue-editor-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - clusterqueues + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - clusterqueues/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-clusterqueue-viewer-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - clusterqueues + verbs: + - get + - list + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - clusterqueues/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-job-editor-role +rules: +- apiGroups: + - batch + resources: + - jobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-job-viewer-role +rules: +- apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-jobset-editor-role +rules: +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-jobset-viewer-role +rules: +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets + verbs: + - get + - list + - watch +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-localqueue-editor-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - localqueues + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - localqueues/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-localqueue-viewer-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - localqueues + verbs: + - get + - list + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - localqueues/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-manager-role +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - watch +- apiGroups: + - "" + resources: + - limitranges + - namespaces + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: + - pods/finalizers + verbs: + - get + - update +- apiGroups: + - "" + resources: + - pods/status + verbs: + - get + - patch +- apiGroups: + - "" + resources: + - podtemplates + verbs: + - create + - delete + - get + - list + - update + - watch +- apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - update + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - get + - list + - update + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - validatingadmissionpolicies + - validatingadmissionpolicybindings + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling.x-k8s.io + resources: + - provisioningrequests + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - autoscaling.x-k8s.io + resources: + - provisioningrequests/status + verbs: + - get +- apiGroups: + - batch + resources: + - jobs + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - batch + resources: + - jobs/finalizers + - jobs/status + verbs: + - get + - patch + - update +- apiGroups: + - flowcontrol.apiserver.k8s.io + resources: + - flowschemas + - prioritylevelconfigurations + verbs: + - list + - watch +- apiGroups: + - flowcontrol.apiserver.k8s.io + resources: + - flowschemas/status + verbs: + - patch +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/finalizers + verbs: + - get + - update +- apiGroups: + - jobset.x-k8s.io + resources: + - jobsets/status + verbs: + - get + - patch + - update +- apiGroups: + - kubeflow.org + resources: + - mpijobs + - mxjobs + - paddlejobs + - pytorchjobs + - tfjobs + - xgboostjobs + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - mpijobs/finalizers + - mxjobs/finalizers + - mxjobs/status + - paddlejobs/finalizers + - pytorchjobs/finalizers + - tfjobs/finalizers + - xgboostjobs/finalizers + verbs: + - get + - update +- apiGroups: + - kubeflow.org + resources: + - mpijobs/status + - paddlejobs/status + - pytorchjobs/status + - tfjobs/status + - xgboostjobs/status + verbs: + - get + - patch + - update +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks + - clusterqueues + - cohorts + - localqueues + - workloads + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks/finalizers + - clusterqueues/finalizers + - localqueues/finalizers + - resourceflavors/finalizers + - topology/finalizers + - workloads/finalizers + verbs: + - update +- apiGroups: + - kueue.x-k8s.io + resources: + - admissionchecks/status + - clusterqueues/status + - localqueues/status + - multikueueclusters/status + - workloads/status + verbs: + - get + - patch + - update +- apiGroups: + - kueue.x-k8s.io + resources: + - multikueueclusters + - multikueueconfigs + - provisioningrequestconfigs + - topologies + - workloadpriorityclasses + verbs: + - get + - list + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - resourceflavors + verbs: + - delete + - get + - list + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - topology + verbs: + - get + - list + - update + - watch +- apiGroups: + - node.k8s.io + resources: + - runtimeclasses + verbs: + - get + - list + - watch +- apiGroups: + - ray.io + resources: + - rayclusters + - rayjobs + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayclusters/finalizers + - rayclusters/status + - rayjobs/finalizers + - rayjobs/status + verbs: + - get + - update +- apiGroups: + - scheduling.k8s.io + resources: + - priorityclasses + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-metrics-reader +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-mpijob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - mpijobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - mpijobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-mpijob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - mpijobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - mpijobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-mxjob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - mxjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - mxjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-mxjob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - mxjobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - mxjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-paddlejob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - paddlejobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - paddlejobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-paddlejob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - paddlejobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - paddlejobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-pending-workloads-cq-viewer-role +rules: +- apiGroups: + - visibility.kueue.x-k8s.io + resources: + - clusterqueues/pendingworkloads + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-pending-workloads-lq-viewer-role +rules: +- apiGroups: + - visibility.kueue.x-k8s.io + resources: + - localqueues/pendingworkloads + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-proxy-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-pytorchjob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-pytorchjob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - pytorchjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-raycluster-editor-role +rules: +- apiGroups: + - ray.io + resources: + - rayclusters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayclusters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-raycluster-viewer-role +rules: +- apiGroups: + - ray.io + resources: + - rayclusters + verbs: + - get + - list + - watch +- apiGroups: + - ray.io + resources: + - rayclusters/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-rayjob-editor-role +rules: +- apiGroups: + - ray.io + resources: + - rayjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ray.io + resources: + - rayjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-rayjob-viewer-role +rules: +- apiGroups: + - ray.io + resources: + - rayjobs + verbs: + - get + - list + - watch +- apiGroups: + - ray.io + resources: + - rayjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-resourceflavor-editor-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - resourceflavors + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-resourceflavor-viewer-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - resourceflavors + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-tfjob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - tfjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - tfjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-tfjob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - tfjobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - tfjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + name: kueue-workload-editor-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-workload-viewer-role +rules: +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads + verbs: + - get + - list + - watch +- apiGroups: + - kueue.x-k8s.io + resources: + - workloads/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-xgboostjob-editor-role +rules: +- apiGroups: + - kubeflow.org + resources: + - xgboostjobs + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - kubeflow.org + resources: + - xgboostjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + rbac.kueue.x-k8s.io/batch-admin: "true" + rbac.kueue.x-k8s.io/batch-user: "true" + name: kueue-xgboostjob-viewer-role +rules: +- apiGroups: + - kubeflow.org + resources: + - xgboostjobs + verbs: + - get + - list + - watch +- apiGroups: + - kubeflow.org + resources: + - xgboostjobs/status + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-visibility-server-auth-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: +- kind: ServiceAccount + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-leader-election-rolebinding + namespace: kueue-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kueue-leader-election-role +subjects: +- kind: ServiceAccount + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kueue-manager-role +subjects: +- kind: ServiceAccount + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-proxy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kueue-proxy-role +subjects: +- kind: ServiceAccount + name: kueue-controller-manager + namespace: kueue-system +--- +apiVersion: v1 +data: + controller_manager_config.yaml: | + apiVersion: config.kueue.x-k8s.io/v1beta1 + kind: Configuration + health: + healthProbeBindAddress: :8081 + metrics: + bindAddress: :8080 + # enableClusterQueueResources: true + webhook: + port: 9443 + leaderElection: + leaderElect: true + resourceName: c1f6bfd2.kueue.x-k8s.io + controller: + groupKindConcurrency: + Job.batch: 5 + Pod: 5 + Workload.kueue.x-k8s.io: 5 + LocalQueue.kueue.x-k8s.io: 1 + Cohort.kueue.x-k8s.io: 1 + ClusterQueue.kueue.x-k8s.io: 1 + ResourceFlavor.kueue.x-k8s.io: 1 + clientConnection: + qps: 50 + burst: 100 + #pprofBindAddress: :8083 + #waitForPodsReady: + # enable: false + # timeout: 5m + # blockAdmission: false + # requeuingStrategy: + # timestamp: Eviction + # backoffLimitCount: null # null indicates infinite requeuing + # backoffBaseSeconds: 60 + # backoffMaxSeconds: 3600 + #manageJobsWithoutQueueName: true + #managedJobsNamespaceSelector: + # matchLabels: + # kueue-managed: "true" + #internalCertManagement: + # enable: false + # webhookServiceName: "" + # webhookSecretName: "" + integrations: + frameworks: + - "batch/job" + - "kubeflow.org/mpijob" + - "ray.io/rayjob" + - "ray.io/raycluster" + - "jobset.x-k8s.io/jobset" + - "kubeflow.org/mxjob" + - "kubeflow.org/paddlejob" + - "kubeflow.org/pytorchjob" + - "kubeflow.org/tfjob" + - "kubeflow.org/xgboostjob" + # - "pod" + # - "deployment" # requires enabling pod integration + # - "statefulset" # requires enabling pod integration + # externalFrameworks: + # - "Foo.v1.example.com" + # podOptions: + # namespaceSelector: + # matchExpressions: + # - key: kubernetes.io/metadata.name + # operator: NotIn + # values: [ kube-system, kueue-system ] + #fairSharing: + # enable: true + # preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare] + #resources: + # excludeResourcePrefixes: [] + # transformations: + # - input: nvidia.com/mig-4g.5gb + # strategy: Replace | Retain + # outputs: + # example.com/accelerator-memory: 5Gi + # example.com/accelerator-gpc: 4 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-manager-config + namespace: kueue-system +--- +apiVersion: v1 +kind: Secret +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-webhook-server-cert + namespace: kueue-system +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-controller-manager-metrics-service + namespace: kueue-system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: https + selector: + control-plane: controller-manager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-visibility-server + namespace: kueue-system +spec: + ports: + - name: https + port: 443 + protocol: TCP + targetPort: 8082 + selector: + control-plane: controller-manager +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-webhook-service + namespace: kueue-system +spec: + ports: + - port: 443 + protocol: TCP + targetPort: 9443 + selector: + control-plane: controller-manager +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-controller-manager + namespace: kueue-system +spec: + replicas: 1 + selector: + matchLabels: + control-plane: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + spec: + containers: + - args: + - --config=/controller_manager_config.yaml + - --zap-log-level=2 + - --feature-gates=TopologyAwareScheduling=true + command: + - /manager + image: registry.k8s.io/kueue/kueue:v0.10.0 + imagePullPolicy: Always + livenessProbe: + httpGet: + path: /healthz + port: 8081 + initialDelaySeconds: 15 + periodSeconds: 20 + name: manager + ports: + - containerPort: 8082 + name: visibility + protocol: TCP + - containerPort: 9443 + name: webhook-server + protocol: TCP + readinessProbe: + httpGet: + path: /readyz + port: 8081 + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 500m + memory: 512Mi + securityContext: + allowPrivilegeEscalation: false + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + - mountPath: /controller_manager_config.yaml + name: manager-config + subPath: controller_manager_config.yaml + - args: + - --secure-listen-address=0.0.0.0:8443 + - --upstream=http://127.0.0.1:8080/ + - --logtostderr=true + - --v=10 + image: registry.k8s.io/kubebuilder/kube-rbac-proxy:v0.16.0 + name: kube-rbac-proxy + ports: + - containerPort: 8443 + name: https + protocol: TCP + securityContext: + runAsNonRoot: true + serviceAccountName: kueue-controller-manager + terminationGracePeriodSeconds: 10 + volumes: + - name: cert + secret: + defaultMode: 420 + secretName: kueue-webhook-server-cert + - configMap: + name: kueue-manager-config + name: manager-config + - effect: NoSchedule + key: components.gke.io/gke-managed-components + operator: Equal + value: "true" +--- +apiVersion: apiregistration.k8s.io/v1 +kind: APIService +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: v1beta1.visibility.kueue.x-k8s.io +spec: + group: visibility.kueue.x-k8s.io + groupPriorityMinimum: 100 + insecureSkipTLSVerify: true + service: + name: kueue-visibility-server + namespace: kueue-system + version: v1beta1 + versionPriority: 100 +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-mutating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate--v1-pod + failurePolicy: Fail + name: mpod.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kueue-system + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-apps-v1-deployment + failurePolicy: Fail + name: mdeployment.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kueue-system + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - deployments + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-batch-v1-job + failurePolicy: Fail + name: mjob.kb.io + rules: + - apiGroups: + - batch + apiVersions: + - v1 + operations: + - CREATE + resources: + - jobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-jobset-x-k8s-io-v1alpha2-jobset + failurePolicy: Fail + name: mjobset.kb.io + rules: + - apiGroups: + - jobset.x-k8s.io + apiVersions: + - v1alpha2 + operations: + - CREATE + resources: + - jobsets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-mxjob + failurePolicy: Fail + name: mmxjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - mxjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-paddlejob + failurePolicy: Fail + name: mpaddlejob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - paddlejobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-pytorchjob + failurePolicy: Fail + name: mpytorchjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - pytorchjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-tfjob + failurePolicy: Fail + name: mtfjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - tfjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v1-xgboostjob + failurePolicy: Fail + name: mxgboostjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + resources: + - xgboostjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kubeflow-org-v2beta1-mpijob + failurePolicy: Fail + name: mmpijob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v2beta1 + operations: + - CREATE + resources: + - mpijobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-ray-io-v1-raycluster + failurePolicy: Fail + name: mraycluster.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + resources: + - rayclusters + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-ray-io-v1-rayjob + failurePolicy: Fail + name: mrayjob.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + resources: + - rayjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-apps-v1-statefulset + failurePolicy: Fail + name: mstatefulset.kb.io + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - statefulsets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kueue-x-k8s-io-v1beta1-clusterqueue + failurePolicy: Fail + name: mclusterqueue.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + resources: + - clusterqueues + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kueue-x-k8s-io-v1beta1-resourceflavor + failurePolicy: Fail + name: mresourceflavor.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + resources: + - resourceflavors + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /mutate-kueue-x-k8s-io-v1beta1-workload + failurePolicy: Fail + name: mworkload.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + resources: + - workloads + sideEffects: None +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + labels: + app.kubernetes.io/component: controller + app.kubernetes.io/name: kueue + control-plane: controller-manager + name: kueue-validating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate--v1-pod + failurePolicy: Fail + name: vpod.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kueue-system + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - pods + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-apps-v1-deployment + failurePolicy: Fail + name: vdeployment.kb.io + namespaceSelector: + matchExpressions: + - key: kubernetes.io/metadata.name + operator: NotIn + values: + - kube-system + - kueue-system + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - deployments + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-batch-v1-job + failurePolicy: Fail + name: vjob.kb.io + rules: + - apiGroups: + - batch + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - jobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-jobset-x-k8s-io-v1alpha2-jobset + failurePolicy: Fail + name: vjobset.kb.io + rules: + - apiGroups: + - jobset.x-k8s.io + apiVersions: + - v1alpha2 + operations: + - CREATE + - UPDATE + resources: + - jobsets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-mxjob + failurePolicy: Fail + name: vmxjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - mxjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-paddlejob + failurePolicy: Fail + name: vpaddlejob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - paddlejobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-pytorchjob + failurePolicy: Fail + name: vpytorchjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - pytorchjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-tfjob + failurePolicy: Fail + name: vtfjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - tfjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v1-xgboostjob + failurePolicy: Fail + name: vxgboostjob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - xgboostjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kubeflow-org-v2beta1-mpijob + failurePolicy: Fail + name: vmpijob.kb.io + rules: + - apiGroups: + - kubeflow.org + apiVersions: + - v2beta1 + operations: + - CREATE + - UPDATE + resources: + - mpijobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-ray-io-v1-raycluster + failurePolicy: Fail + name: vraycluster.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - rayclusters + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-ray-io-v1-rayjob + failurePolicy: Fail + name: vrayjob.kb.io + rules: + - apiGroups: + - ray.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - rayjobs + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-apps-v1-statefulset + failurePolicy: Fail + name: vstatefulset.kb.io + rules: + - apiGroups: + - apps + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - statefulsets + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kueue-x-k8s-io-v1beta1-clusterqueue + failurePolicy: Fail + name: vclusterqueue.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - clusterqueues + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kueue-x-k8s-io-v1alpha1-cohort + failurePolicy: Fail + name: vcohort.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - cohorts + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kueue-x-k8s-io-v1beta1-resourceflavor + failurePolicy: Fail + name: vresourceflavor.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - resourceflavors + sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: kueue-webhook-service + namespace: kueue-system + path: /validate-kueue-x-k8s-io-v1beta1-workload + failurePolicy: Fail + name: vworkload.kb.io + rules: + - apiGroups: + - kueue.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - workloads + - workloads/status + sideEffects: None diff --git a/modules/management/kubectl-apply/variables.tf b/modules/management/kubectl-apply/variables.tf index c493332e7c..6c65342e65 100644 --- a/modules/management/kubectl-apply/variables.tf +++ b/modules/management/kubectl-apply/variables.tf @@ -15,7 +15,7 @@ */ locals { - kueue_supported_versions = ["v0.9.1", "v0.9.0", "v0.8.1"] + kueue_supported_versions = ["v0.10.0", "v0.9.1", "v0.9.0", "v0.8.1"] jobset_supported_versions = ["v0.7.1", "v0.5.2"] } diff --git a/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml b/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml index 726768ffa8..a09e180732 100644 --- a/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml +++ b/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml @@ -93,6 +93,6 @@ deployment_groups: settings: kueue: install: true - version: v0.9.0 + version: v0.10.0 jobset: install: true From 539711e463759d760a32f6c51d37b7ba5f0f2c91 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Tue, 17 Dec 2024 20:27:08 +0000 Subject: [PATCH 034/140] Fix clean_up placement policy bug --- .../modules/slurm_files/scripts/slurmsync.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py index 21d9324e79..b06d093e78 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py @@ -303,13 +303,11 @@ def _seconds_since_timestamp(timestamp): def delete_placement_groups(placement_groups): - def delete_placement_request(pg_name, region): - return lookup().compute.resourcePolicies().delete( - project=lookup().project, region=region, resourcePolicy=pg_name - ) - requests = { - pg.name: delete_placement_request(pg["name"], util.trim_self_link(pg["region"])) + pg["name"]: lookup().compute.resourcePolicies().delete( + project=lookup().project, + region=util.trim_self_link(pg["region"]), + resourcePolicy=pg["name"]) for pg in placement_groups } From e675d5541e94838cc9825bcbb50397d70c171b32 Mon Sep 17 00:00:00 2001 From: Ankit Kinra <1037624+ankitkinra@users.noreply.github.com> Date: Tue, 17 Dec 2024 23:00:39 +0000 Subject: [PATCH 035/140] Update validation script to skip the new a3u blueprint --- tools/validate_configs/validate_configs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/validate_configs/validate_configs.sh b/tools/validate_configs/validate_configs.sh index 996b25006f..a414f2eb33 100755 --- a/tools/validate_configs/validate_configs.sh +++ b/tools/validate_configs/validate_configs.sh @@ -120,7 +120,7 @@ check_background() { fi } -CONFIGS=$(find examples/ community/examples/ tools/validate_configs/test_configs/ docs/tutorials/ docs/videos/build-your-own-blueprint/ -name "*.yaml" -type f -not -path 'examples/machine-learning/a3-megagpu-8g/*') +CONFIGS=$(find examples/ community/examples/ tools/validate_configs/test_configs/ docs/tutorials/ docs/videos/build-your-own-blueprint/ -name "*.yaml" -type f -not -path 'examples/machine-learning/a3-megagpu-8g/*' -not -path 'examples/gke-a3-ultragpu/*') cwd=$(pwd) NPROCS=${NPROCS:-$(nproc)} echo "Running tests in $NPROCS processes" From bb3640291926ef9de917a716f7b437e2765a407a Mon Sep 17 00:00:00 2001 From: Ankit Kinra <1037624+ankitkinra@users.noreply.github.com> Date: Wed, 18 Dec 2024 00:18:26 +0000 Subject: [PATCH 036/140] resolve linter errors --- examples/gke-a3-ultragpu/README.md | 2 +- .../gke-a3-ultragpu-deployment.yaml | 42 ++++++++++++------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/examples/gke-a3-ultragpu/README.md b/examples/gke-a3-ultragpu/README.md index 73b37bbfcb..a7831af889 100644 --- a/examples/gke-a3-ultragpu/README.md +++ b/examples/gke-a3-ultragpu/README.md @@ -1 +1 @@ -Refer to [AI Hypercomputer Documentation](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute#create-cluster) for instructions. \ No newline at end of file +Refer to [AI Hypercomputer Documentation](https://cloud.google.com/ai-hypercomputer/docs/create/gke-ai-hypercompute#create-cluster) for instructions. diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml index b7a8d24071..0e475ec2d6 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml @@ -1,16 +1,30 @@ +# Copyright 2024 "Google LLC" +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + --- - terraform_backend_defaults: - type: gcs - configuration: - bucket: BUCKET_NAME +terraform_backend_defaults: + type: gcs + configuration: + bucket: BUCKET_NAME - vars: - deployment_name: gke-a3-ultra - project_id: PROJECT_ID - region: COMPUTE_REGION - zone: COMPUTE_ZONE - authorized_cidr: / - # In order to not target a BLOCK_NAME, extended_reservation can be inputed as - # extended_reservation: RESERVATION_NAME - extended_reservation: RESERVATION_NAME/reservationBlocks/BLOCK_NAME - static_node_count: NODE_COUNT +vars: + deployment_name: gke-a3-ultra + project_id: PROJECT_ID + region: COMPUTE_REGION + zone: COMPUTE_ZONE + authorized_cidr: / + # In order to not target a BLOCK_NAME, extended_reservation can be inputted as + # extended_reservation: RESERVATION_NAME + extended_reservation: RESERVATION_NAME/reservationBlocks/BLOCK_NAME + static_node_count: NODE_COUNT From 9f505af82e0bf84ed069edef7589b4fc1ad3f169 Mon Sep 17 00:00:00 2001 From: Harsh Thakkar Date: Wed, 18 Dec 2024 11:57:40 +0000 Subject: [PATCH 037/140] Update terraform provider from google-beta to beta for parallelstore --- modules/file-system/parallelstore/README.md | 6 +++--- modules/file-system/parallelstore/main.tf | 1 - modules/file-system/parallelstore/versions.tf | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/file-system/parallelstore/README.md b/modules/file-system/parallelstore/README.md index 46f0969b93..745affcc64 100644 --- a/modules/file-system/parallelstore/README.md +++ b/modules/file-system/parallelstore/README.md @@ -114,7 +114,7 @@ limitations under the License. | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 0.13 | -| [google-beta](#requirement\_google-beta) | >= 5.25.0 | +| [google](#requirement\_google) | >= 6.13.0 | | [null](#requirement\_null) | ~> 3.0 | | [random](#requirement\_random) | ~> 3.0 | @@ -122,7 +122,7 @@ limitations under the License. | Name | Version | |------|---------| -| [google-beta](#provider\_google-beta) | >= 5.25.0 | +| [google](#provider\_google) | >= 6.13.0 | | [null](#provider\_null) | ~> 3.0 | | [random](#provider\_random) | ~> 3.0 | @@ -134,7 +134,7 @@ No modules. | Name | Type | |------|------| -| [google-beta_google_parallelstore_instance.instance](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_parallelstore_instance) | resource | +| [google_parallelstore_instance.instance](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/parallelstore_instance) | resource | | [null_resource.hydration](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [random_id.resource_name_suffix](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/id) | resource | diff --git a/modules/file-system/parallelstore/main.tf b/modules/file-system/parallelstore/main.tf index 3de3b94f3a..acf3b07beb 100644 --- a/modules/file-system/parallelstore/main.tf +++ b/modules/file-system/parallelstore/main.tf @@ -54,7 +54,6 @@ resource "google_parallelstore_instance" "instance" { labels = local.labels - provider = google-beta depends_on = [var.private_vpc_connection_peering] } diff --git a/modules/file-system/parallelstore/versions.tf b/modules/file-system/parallelstore/versions.tf index 24069a479c..174b5281e4 100644 --- a/modules/file-system/parallelstore/versions.tf +++ b/modules/file-system/parallelstore/versions.tf @@ -18,9 +18,9 @@ terraform { required_version = ">= 0.13" required_providers { - google-beta = { - source = "hashicorp/google-beta" - version = ">= 5.25.0" + google = { + source = "hashicorp/google" + version = ">= 6.13.0" } random = { From 7d8061d5607531eb4f2b80ad76c24a5fb4de490c Mon Sep 17 00:00:00 2001 From: Rachael Tamakloe Date: Tue, 17 Dec 2024 01:07:06 +0000 Subject: [PATCH 038/140] Adding integration test for ansible os coverage --- .../startup-script/files/install_ansible.sh | 5 +- .../test-validation/test-ansible-vm.yml | 24 ++++ .../daily-tests/blueprints/ansible-vm.yaml | 115 ++++++++++++++++++ .../daily-tests/builds/ansible-vm.yaml | 41 +++++++ .../daily-tests/tests/ansible-vm.yml | 25 ++++ 5 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-ansible-vm.yml create mode 100644 tools/cloud-build/daily-tests/blueprints/ansible-vm.yaml create mode 100644 tools/cloud-build/daily-tests/builds/ansible-vm.yaml create mode 100644 tools/cloud-build/daily-tests/tests/ansible-vm.yml diff --git a/modules/scripts/startup-script/files/install_ansible.sh b/modules/scripts/startup-script/files/install_ansible.sh index 41c483307f..7146ecea36 100644 --- a/modules/scripts/startup-script/files/install_ansible.sh +++ b/modules/scripts/startup-script/files/install_ansible.sh @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +set -e REQ_ANSIBLE_VERSION=2.11 REQ_ANSIBLE_PIP_VERSION=4.10.0 REQ_PIP_WHEEL_VERSION=0.37.1 @@ -197,13 +198,13 @@ main() { fi # upgrade wheel if necessary - wheel_pkg=$(${venv_python_path} -m pip list --format=freeze | grep "^wheel") + wheel_pkg=$(${venv_python_path} -m pip list --format=freeze | grep "^wheel" || true) if [ "$wheel_pkg" != "wheel==${REQ_PIP_WHEEL_VERSION}" ]; then ${venv_python_path} -m pip install -U wheel==${REQ_PIP_WHEEL_VERSION} fi # upgrade setuptools if necessary - setuptools_pkg=$(${venv_python_path} -m pip list --format=freeze | grep "^setuptools") + setuptools_pkg=$(${venv_python_path} -m pip list --format=freeze | grep "^setuptools" || true) if [ "$setuptools_pkg" != "setuptools==${REQ_PIP_SETUPTOOLS_VERSION}" ]; then ${venv_python_path} -m pip install -U setuptools==${REQ_PIP_SETUPTOOLS_VERSION} fi diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-ansible-vm.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-ansible-vm.yml new file mode 100644 index 0000000000..da73958dd5 --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-ansible-vm.yml @@ -0,0 +1,24 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +- name: Check if Ansible is installed + ansible.builtin.shell: | + command -v ansible >/dev/null 2>&1 && echo "Ansible is installed" || echo "Ansible is not installed" + register: ansible_check_result + +- name: Assert Ansible is installed + ansible.builtin.assert: + that: + - ansible_check_result.stdout == "Ansible is installed" diff --git a/tools/cloud-build/daily-tests/blueprints/ansible-vm.yaml b/tools/cloud-build/daily-tests/blueprints/ansible-vm.yaml new file mode 100644 index 0000000000..2b5ac93131 --- /dev/null +++ b/tools/cloud-build/daily-tests/blueprints/ansible-vm.yaml @@ -0,0 +1,115 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- + +blueprint_name: test-workstation-ansible + +vars: + project_id: ## Set GCP Project ID Here ## + deployment_name: test-workstation-ansible + region: us-central1 + zone: us-central1-a + machine_type: n2-standard-2 + disk_type: pd-ssd + instance_count: 1 + +deployment_groups: +- group: primary + modules: + + - id: network1 + source: modules/network/pre-existing-vpc + + - id: startup-script + source: modules/scripts/startup-script + settings: + install_ansible: true + runners: + - type: shell + destination: startup.sh + content: | + #!/bin/bash + set -ex + echo \$(ansible --version) + + - id: workstation-centos + source: modules/compute/vm-instance + use: + - network1 + - startup-script + settings: + name_prefix: centos + add_deployment_name_before_prefix: true + instance_image: + name: centos-7-v20240611 + project: centos-cloud + + - id: workstation-ubuntu-2004 + source: modules/compute/vm-instance + use: + - network1 + - startup-script + settings: + name_prefix: ubuntu2004 + add_deployment_name_before_prefix: true + instance_image: + family: ubuntu-2004-lts + project: ubuntu-os-cloud + + - id: workstation-ubuntu-2204 + source: modules/compute/vm-instance + use: + - network1 + - startup-script + settings: + name_prefix: ubuntu2204 + add_deployment_name_before_prefix: true + instance_image: + family: ubuntu-2204-lts + project: ubuntu-os-cloud + + - id: workstation-debian + source: modules/compute/vm-instance + use: + - network1 + - startup-script + settings: + name_prefix: debian + instance_image: + family: debian-11 + project: debian-cloud + + - id: workstation-rocky-8 + source: modules/compute/vm-instance + use: + - network1 + - startup-script + settings: + name_prefix: rocky8 + add_deployment_name_before_prefix: true + instance_image: + family: rocky-linux-8-optimized-gcp + project: rocky-linux-cloud + + - id: wait-for-startup + source: community/modules/scripts/wait-for-startup + settings: + instance_names: + - $(workstation-centos.name[0]) + - $(workstation-ubuntu-2004.name[0]) + - $(workstation-ubuntu-2204.name[0]) + - $(workstation-debian.name[0]) + - $(workstation-rocky-8.name[0]) + timeout: 7200 diff --git a/tools/cloud-build/daily-tests/builds/ansible-vm.yaml b/tools/cloud-build/daily-tests/builds/ansible-vm.yaml new file mode 100644 index 0000000000..a3aba07522 --- /dev/null +++ b/tools/cloud-build/daily-tests/builds/ansible-vm.yaml @@ -0,0 +1,41 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +tags: +- m.pre-existing-vpc +- m.startup-script +- m.vm-instance +- m.wait-for-startup +- vm + +timeout: 14400s # 4hr +steps: +- id: anisble-vm + name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner + entrypoint: /bin/bash + env: + - "ANSIBLE_HOST_KEY_CHECKING=false" + - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" + args: + - -c + - | + set -x -e + cd /workspace && make + BUILD_ID_FULL=$BUILD_ID + BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} + + ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/ansible-vm.yml" diff --git a/tools/cloud-build/daily-tests/tests/ansible-vm.yml b/tools/cloud-build/daily-tests/tests/ansible-vm.yml new file mode 100644 index 0000000000..39f773d544 --- /dev/null +++ b/tools/cloud-build/daily-tests/tests/ansible-vm.yml @@ -0,0 +1,25 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- + +test_name: test-workstation-ansible +deployment_name: "ansible-vm-{{ build }}" +zone: us-central1-a +workspace: /workspace +blueprint_yaml: "{{ workspace }}/tools/cloud-build/daily-tests/blueprints/ansible-vm.yaml" +network: "default" +remote_node: "{{ deployment_name }}-centos-0" +post_deploy_tests: +- test-validation/test-ansible-vm.yml From 2479b7f7bb42879b12ef42be51278ed170eb6e0d Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Wed, 18 Dec 2024 06:18:04 +0000 Subject: [PATCH 039/140] add reservations for kueue integration tests --- .../daily-tests/blueprints/gke-a2-highgpu.yaml | 3 ++- .../daily-tests/tests/gke-a2-highgpu-kueue.yml | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml b/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml index a09e180732..4c77cf0bcb 100644 --- a/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml +++ b/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml @@ -20,7 +20,7 @@ vars: project_id: hpc-toolkit-dev ## Set GCP Project ID Here ## deployment_name: gke-a2-highgpu region: us-central1 - zone: us-central1-c + zone: us-central1-f # Cidr block containing the IP of the machine calling terraform. # The following line must be updated for this example to work. @@ -84,6 +84,7 @@ deployment_groups: zones: [$(vars.zone)] image_type: UBUNTU_CONTAINERD placement_policy: + name: a2-highgpu-compact type: "COMPACT" outputs: [instructions] diff --git a/tools/cloud-build/daily-tests/tests/gke-a2-highgpu-kueue.yml b/tools/cloud-build/daily-tests/tests/gke-a2-highgpu-kueue.yml index 549fbac367..0735f4f970 100644 --- a/tools/cloud-build/daily-tests/tests/gke-a2-highgpu-kueue.yml +++ b/tools/cloud-build/daily-tests/tests/gke-a2-highgpu-kueue.yml @@ -22,13 +22,19 @@ workspace: /workspace blueprint_yaml: "{{ workspace }}/tools/cloud-build/daily-tests/blueprints/gke-a2-highgpu.yaml" network: "gke-a2high-net-{{ build }}" region: us-central1 -zone: us-central1-c +zone: us-central1-f remote_node: "{{ deployment_name }}-remote-node-0" +reservation_affinity: + consume_reservation_type: SPECIFIC_RESERVATION + specific_reservations: + - name: a2-reservation-0 + project: "{{ project }}" cli_deployment_vars: region: "{{ region }}" zone: "{{ zone }}" network_name: "{{ network }}" - local_ssd_count_nvme_block: 8 + reservation_affinity: "{{ reservation_affinity }}" + local_ssd_count_nvme_block: 2 custom_vars: project: "{{ project }}" post_deploy_tests: From 6534ee37ec652325a3335437166288cb99cb2ecd Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Wed, 18 Dec 2024 17:06:05 +0000 Subject: [PATCH 040/140] Updated gke cluster module source path check --- pkg/config/expand.go | 2 +- pkg/config/expand_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/config/expand.go b/pkg/config/expand.go index 1005eb780f..87a8fac9fa 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -189,7 +189,7 @@ func (bp Blueprint) expandBackend(grp *Group) { func kubectlProviderRequiredModule(grp *Group) (bool, Module) { for _, mod := range grp.Modules { - if strings.Contains(mod.Source, "gke-cluster") || strings.Contains(mod.Source, "pre-existing-gke-cluster") { + if strings.Contains(mod.Source, "modules/scheduler/gke-cluster") || strings.Contains(mod.Source, "modules/scheduler/pre-existing-gke-cluster") { return true, mod } } diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index f9f273efd8..bafb967eeb 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -106,11 +106,11 @@ func (s *zeroSuite) TestExpandProviders(c *C) { Configuration: testKubectlConf} testGKEClusterModule := Module{ - Source: "module/test/gke-cluster/dummy", + Source: "modules/scheduler/gke-cluster", ID: testGKEClusterModuleID} testPreExistingGKEClusterModule := Module{ - Source: "module/test/pre-existing-gke-cluster/dummy", + Source: "modules/scheduler/pre-existing-gke-cluster", ID: testGKEClusterModuleID} defaultProvider := map[string]PR{ From 86ec68ff3fd7b94c456fca84823b753411263e58 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 13 Dec 2024 06:00:29 +0000 Subject: [PATCH 041/140] SlurmGCP. Move TPU code into separate file + refactoring --- .../modules/slurm_files/scripts/conf.py | 3 +- .../slurm_files/scripts/get_tpu_vmcount.py | 6 +- .../modules/slurm_files/scripts/resume.py | 43 +-- .../modules/slurm_files/scripts/slurmsync.py | 27 +- .../modules/slurm_files/scripts/suspend.py | 41 +-- .../slurm_files/scripts/tests/test_resume.py | 6 +- .../scripts/tests/test_topology.py | 10 +- .../modules/slurm_files/scripts/tpu.py | 329 ++++++++++++++++++ .../modules/slurm_files/scripts/util.py | 254 -------------- 9 files changed, 367 insertions(+), 352 deletions(-) create mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tpu.py diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py index 4af58a7831..a4ff1e488a 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py @@ -21,6 +21,7 @@ from pathlib import Path import util from util import dirs, slurmdirs +import tpu FILE_PREAMBLE = """ # Warning: @@ -519,7 +520,7 @@ def _walk( def add_tpu_nodeset_topology(nodeset: object, bldr: TopologyBuilder, lkp: util.Lookup): - tpuobj = util.TPU(nodeset) + tpuobj = tpu.TPU.make(nodeset.nodeset_name, lkp) static, dynamic = lkp.nodenames(nodeset) pref = ["tpu-root", f"ns_{nodeset.nodeset_name}"] diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/get_tpu_vmcount.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/get_tpu_vmcount.py index 1557d6020b..1e194426fd 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/get_tpu_vmcount.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/get_tpu_vmcount.py @@ -16,12 +16,14 @@ import argparse import util +import tpu def get_vmcount_of_tpu_part(part): res = 0 - for ns in util.lookup().cfg.partitions[part].partition_nodeset_tpu: - tpu_obj = util.TPU(util.lookup().cfg.nodeset_tpu[ns]) + lkp = util.lookup() + for ns in lkp.cfg.partitions[part].partition_nodeset_tpu: + tpu_obj = tpu.TPU.make(ns, lkp) if res == 0: res = tpu_obj.vmcount else: diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py index 5d88751a41..3a4fa74dc9 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py @@ -42,6 +42,7 @@ wait_for_operation, ) from util import lookup, NSDict +import tpu import slurm_gcp_plugins @@ -269,7 +270,8 @@ def group_nodes_bulk(nodes: List[str], resume_data: Optional[ResumeData], lkp: u def chunk_nodes(nodes: List[str]): chunk_size = BULK_INSERT_LIMIT if nodes and lkp.node_is_tpu(nodes[0]): - chunk_size = util.TPU(lkp.node_nodeset(nodes[0])).vmcount + ns = lkp.node_nodeset_name(nodes[0]) + chunk_size = tpu.TPU.make(ns, lkp).vmcount return chunked(nodes, n=chunk_size) chunks = [ @@ -287,34 +289,6 @@ def chunk_nodes(nodes: List[str]): return {chunk.name: chunk for chunk in chunks} -def start_tpu(data): - tpu = data["tpu"] - node = data["node"] - if len(node) == 1: - node = node[0] - log.debug( - f"Will create a TPU of type {tpu.node_type} tf_version {tpu.tf_version} in zone {tpu.zone} with name {node}" - ) - tpunode = tpu.get_node(node) - if tpunode is None: - if not tpu.create_node(nodename=node): - log.error("Error creating tpu node {node}") - else: - if tpu.preserve_tpu: - if not tpu.start_node(nodename=node): - log.error("Error starting tpu node {node}") - else: - log.info( - f"Tpu node {node} is already created, but will not start it because nodeset does not have preserve_tpu option active." - ) - else: - log.debug( - f"Will create a multi-vm TPU of type {tpu.node_type} tf_version {tpu.tf_version} in zone {tpu.zone} with name {node[0]}" - ) - if not tpu.create_node(nodename=node): - log.error("Error creating tpu node {node}") - - def resume_nodes(nodes: List[str], resume_data: Optional[ResumeData]): """resume nodes in nodelist""" # Prevent dormant nodes associated with a future reservation from being resumed @@ -339,17 +313,13 @@ def resume_nodes(nodes: List[str], resume_data: Optional[ResumeData]): "node bulk groups: \n{}".format(yaml.safe_dump(grouped_nodelists).rstrip()) ) - tpu_start_data = [] - tpu_objs = {} + tpu_chunks = [] bi_inserts = {} for group, chunk in grouped_nodes.items(): model = chunk.nodes[0] if lookup().node_is_tpu(model): - # do not create multiple tpu_objs if nodes with the same prefix are used - if chunk.prefix not in tpu_objs.keys(): - tpu_objs[chunk.prefix] = util.TPU(lookup().node_nodeset(model)) - tpu_start_data.append({"tpu": tpu_objs[chunk.prefix], "node": chunk.nodes}) + tpu_chunks.append(chunk.nodes) else: bi_inserts[group] = create_instances_request( chunk.nodes, chunk.placement_group, chunk.excl_job_id @@ -384,8 +354,7 @@ def resume_nodes(nodes: List[str], resume_data: Optional[ResumeData]): bulk_operations = {group: wait_for_operation(op) for group, op in started.items()} # Start TPU after regular nodes so that regular nodes are not affected by the slower TPU nodes - log.debug(f"tpu_start_data={yaml.safe_dump(tpu_start_data)}") - execute_with_futures(start_tpu, tpu_start_data) + execute_with_futures(tpu.start_tpu, tpu_chunks) all_successful_inserts = [] diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py index b06d093e78..65bf15ede5 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py @@ -22,7 +22,6 @@ import sys import shlex from datetime import datetime, timedelta -from enum import Enum from itertools import chain from pathlib import Path from dataclasses import dataclass @@ -40,13 +39,12 @@ separate, to_hostlist, NodeState, - TPU, chunked, dirs, ) from util import lookup from suspend import delete_instances -from resume import start_tpu +import tpu import conf log = logging.getLogger() @@ -130,18 +128,19 @@ def start_instance_op(inst): def start_instances(node_list): log.info("{} instances to start ({})".format(len(node_list), ",".join(node_list))) - - normal, tpu_nodes = separate(lookup().node_is_tpu, node_list) + lkp = lookup() + # TODO: use code from resume.py to assign proper placement + normal, tpu_nodes = separate(lkp.node_is_tpu, node_list) ops = {inst: start_instance_op(inst) for inst in normal} done, failed = batch_execute(ops) tpu_start_data = [] - for ns, nodes in util.groupby_unsorted(tpu_nodes, lookup().node_nodeset_name): - tpuobj = TPU(lookup().cfg.nodeset_tpu[ns]) + for ns, nodes in util.groupby_unsorted(tpu_nodes, lkp.node_nodeset_name): + tpuobj = tpu.TPU.make(ns, lkp) for snodes in chunked(nodes, n=tpuobj.vmcount): tpu_start_data.append({"tpu": tpuobj, "node": snodes}) - execute_with_futures(start_tpu, tpu_start_data) + execute_with_futures(tpu.start_tpu, tpu_start_data) def _find_dynamic_node_status() -> NodeAction: @@ -163,14 +162,14 @@ def get_fr_action(fr: FutureReservation, nodename:str, state:NodeState) -> Optio return NodeActionDown(reason=msg) def _find_tpu_node_action(nodename, state) -> NodeAction: - ns = lookup().node_nodeset(nodename) - tpuobj = TPU(ns) + lkp = lookup() + tpuobj = tpu.TPU.make(lkp.node_nodeset_name(nodename), lkp) inst = tpuobj.get_node(nodename) # If we do not find the node but it is from a Tpu that has multiple vms look for the master node if inst is None and tpuobj.vmcount > 1: # Get the tpu slurm nodelist of the nodes in the same tpu group as nodename nodelist = run( - f"{lookup().scontrol} show topo {nodename}" + f"{lkp.scontrol} show topo {nodename}" + " | awk -F'=' '/Level=0/ { print $NF }'", shell=True, ).stdout @@ -200,13 +199,13 @@ def _find_tpu_node_action(nodename, state) -> NodeAction: & state.flags ): return NodeActionDown(reason="Unbacked instance") - if lookup().is_static_node(nodename): + if lkp.is_static_node(nodename): return NodeActionPowerUp() elif ( state is not None and "POWERED_DOWN" not in state.flags and "POWERING_DOWN" not in state.flags - and inst.state == TPU.State.STOPPED + and inst.state == tpu.TPU.State.STOPPED ): if tpuobj.preemptible: return NodeActionPrempt() @@ -214,7 +213,7 @@ def _find_tpu_node_action(nodename, state) -> NodeAction: return NodeActionDown(reason="Instance terminated") elif ( state is None or "POWERED_DOWN" in state.flags - ) and inst.state == TPU.State.READY: + ) and inst.state == tpu.TPU.State.READY: return NodeActionDelete() elif state is None: # if state is None here, the instance exists but it's not in Slurm diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py index f01013e1a2..7d6ae28f9f 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/suspend.py @@ -27,9 +27,9 @@ to_hostlist, wait_for_operations, separate, - execute_with_futures, ) -from util import lookup, TPU +from util import lookup +import tpu import slurm_gcp_plugins @@ -58,33 +58,6 @@ def delete_instance_request(instance): return request -def stop_tpu(data): - tpu_nodeset = data["nodeset"] - node = data["node"] - tpu = data["tpu"] - if tpu_nodeset.preserve_tpu and tpu.vmcount == 1: - log.info(f"stopping node {node}") - if tpu.stop_node(node): - return - log.error("Error stopping node {node} will delete instead") - log.info(f"deleting node {node}") - if not tpu.delete_node(node): - log.error("Error deleting node {node}") - - -def delete_tpu_instances(instances): - stop_data = [] - for prefix, nodes in util.groupby_unsorted(instances, lookup().node_prefix): - log.info(f"Deleting TPU nodes from prefix {prefix}") - lnodes = list(nodes) - tpu_nodeset = lookup().node_nodeset(lnodes[0]) - tpu = TPU(tpu_nodeset) - stop_data.extend( - [{"tpu": tpu, "node": node, "nodeset": tpu_nodeset} for node in lnodes] - ) - execute_with_futures(stop_tpu, stop_data) - - def delete_instances(instances): """delete instances individually""" invalid, valid = separate(lambda inst: bool(lookup().instance(inst)), instances) @@ -106,15 +79,11 @@ def delete_instances(instances): def suspend_nodes(nodes: List[str]) -> None: - tpu_nodes, other_nodes = [], [] - for node in nodes[:]: - if lookup().node_is_tpu(node): - tpu_nodes.append(node) - else: - other_nodes.append(node) + lkp = lookup() + other_nodes, tpu_nodes = util.separate(lkp.node_is_tpu, nodes) delete_instances(other_nodes) - delete_tpu_instances(tpu_nodes) + tpu.delete_tpu_instances(tpu_nodes) def main(nodelist): diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_resume.py index 147ba00658..3c637bbe10 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_resume.py @@ -63,7 +63,7 @@ def test_get_resume_file_data(): mock_to_hostnames.assert_called_once_with("green-[0-2]") -@unittest.mock.patch("util.TPU") +@unittest.mock.patch("tpu.TPU.make") @unittest.mock.patch("resume.create_placements") def test_group_nodes_bulk(mock_create_placements, mock_tpu): cfg = TstCfg( @@ -106,8 +106,8 @@ def mock_create_placements_se(nodes, excl_job_id, lkp): raise AssertionError(f"unexpected invocation: '{args}'") mock_create_placements.side_effect = mock_create_placements_se - def mock_tpu_se(ns: TstNodeset) -> TstTPU: - if ns.nodeset_name == "t": + def mock_tpu_se(ns: str, lkp) -> TstTPU: + if ns == "t": return TstTPU(vmcount=2) raise AssertionError(f"unexpected invocation: '{ns}'") mock_tpu.side_effect = mock_tpu_se diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_topology.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_topology.py index 78715bc5f6..a3680f15d7 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_topology.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_topology.py @@ -35,7 +35,7 @@ def test_gen_topology_conf_empty(): assert open(cfg.output_dir + "/cloud_topology.conf").read() == PRELUDE + "\n" -@mock.patch("util.TPU") +@mock.patch("tpu.TPU.make") def test_gen_topology_conf(tpu_mock): cfg = TstCfg( nodeset_tpu={ @@ -50,12 +50,12 @@ def test_gen_topology_conf(tpu_mock): output_dir=tempfile.mkdtemp(), ) - def tpu_se(ns: TstNodeset) -> TstTPU: - if ns.nodeset_name == "bold": + def tpu_se(ns: str, lkp) -> TstTPU: + if ns == "bold": return TstTPU(vmcount=3) - if ns.nodeset_name == "slim": + if ns == "slim": return TstTPU(vmcount=1) - raise AssertionError(f"unexpected TPU name: '{ns.nodeset_name}'") + raise AssertionError(f"unexpected TPU name: '{ns}'") tpu_mock.side_effect = tpu_se diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tpu.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tpu.py new file mode 100644 index 0000000000..d8632652f8 --- /dev/null +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tpu.py @@ -0,0 +1,329 @@ +# Copyright 2024 "Google LLC" +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +import socket +import logging +from dataclasses import dataclass +from pathlib import Path +import yaml + +import util +from util import create_client_options, ApiEndpoint + +from google.cloud import tpu_v2 as tpu # noqa: E402 +import google.api_core.exceptions as gExceptions # noqa: E402 + +log = logging.getLogger() + +_tpu_cache = {} + +class TPU: + """Class for handling the TPU-vm nodes""" + + State = tpu.types.cloud_tpu.Node.State + TPUS_PER_VM = 4 + __expected_states = { + "create": State.READY, + "start": State.READY, + "stop": State.STOPPED, + } + + __tpu_version_mapping = { + "V2": tpu.AcceleratorConfig().Type.V2, + "V3": tpu.AcceleratorConfig().Type.V3, + "V4": tpu.AcceleratorConfig().Type.V4, + } + + @classmethod + def make(cls, nodeset_name: str, lkp: util.Lookup) -> "TPU": + key = (id(lkp), nodeset_name) + if key not in _tpu_cache: + nodeset = lkp.cfg.nodeset_tpu[nodeset_name] + _tpu_cache[key] = cls(nodeset, lkp) + return _tpu_cache[key] + + + def __init__(self, nodeset: object, lkp: util.Lookup): + self._nodeset = nodeset + self.lkp = lkp + self._parent = f"projects/{lkp.project}/locations/{nodeset.zone}" + co = create_client_options(ApiEndpoint.TPU) + self._client = tpu.TpuClient(client_options=co) + self.data_disks = [] + for data_disk in nodeset.data_disks: + ad = tpu.AttachedDisk() + ad.source_disk = data_disk + ad.mode = tpu.AttachedDisk.DiskMode.DISK_MODE_UNSPECIFIED + self.data_disks.append(ad) + ns_ac = nodeset.accelerator_config + if ns_ac.topology != "" and ns_ac.version != "": + ac = tpu.AcceleratorConfig() + ac.topology = ns_ac.topology + ac.type_ = self.__tpu_version_mapping[ns_ac.version] + self.ac = ac + else: + req = tpu.GetAcceleratorTypeRequest( + name=f"{self._parent}/acceleratorTypes/{nodeset.node_type}" + ) + self.ac = self._client.get_accelerator_type(req).accelerator_configs[0] + self.vmcount = self.__calc_vm_from_topology(self.ac.topology) + + @property + def nodeset(self): + return self._nodeset + + @property + def preserve_tpu(self): + return self._nodeset.preserve_tpu + + @property + def node_type(self): + return self._nodeset.node_type + + @property + def tf_version(self): + return self._nodeset.tf_version + + @property + def enable_public_ip(self): + return self._nodeset.enable_public_ip + + @property + def preemptible(self): + return self._nodeset.preemptible + + @property + def reserved(self): + return self._nodeset.reserved + + @property + def service_account(self): + return self._nodeset.service_account + + @property + def zone(self): + return self._nodeset.zone + + def check_node_type(self): + if self.node_type is None: + return False + try: + request = tpu.GetAcceleratorTypeRequest( + name=f"{self._parent}/acceleratorTypes/{self.node_type}" + ) + return self._client.get_accelerator_type(request=request) is not None + except Exception: + return False + + def check_tf_version(self): + try: + request = tpu.GetRuntimeVersionRequest( + name=f"{self._parent}/runtimeVersions/{self.tf_version}" + ) + return self._client.get_runtime_version(request=request) is not None + except Exception: + return False + + def __calc_vm_from_topology(self, topology): + topo = topology.split("x") + tot = 1 + for num in topo: + tot = tot * int(num) + return tot // self.TPUS_PER_VM + + def __check_resp(self, response, op_name): + des_state = self.__expected_states.get(op_name) + # If the state is not in the table just print the response + if des_state is None: + return False + if response.__class__.__name__ != "Node": # If the response is not a node fail + return False + if response.state == des_state: + return True + return False + + def list_nodes(self): + try: + request = tpu.ListNodesRequest(parent=self._parent) + res = self._client.list_nodes(request=request) + except gExceptions.NotFound: + res = None + return res + + def list_node_names(self): + return [node.name.split("/")[-1] for node in self.list_nodes()] + + def start_node(self, nodename): + request = tpu.StartNodeRequest(name=f"{self._parent}/nodes/{nodename}") + resp = self._client.start_node(request=request).result() + return self.__check_resp(resp, "start") + + def stop_node(self, nodename): + request = tpu.StopNodeRequest(name=f"{self._parent}/nodes/{nodename}") + resp = self._client.stop_node(request=request).result() + return self.__check_resp(resp, "stop") + + def get_node(self, nodename): + try: + request = tpu.GetNodeRequest(name=f"{self._parent}/nodes/{nodename}") + res = self._client.get_node(request=request) + except gExceptions.NotFound: + res = None + return res + + def _register_node(self, nodename, ip_addr): + dns_name = socket.getnameinfo((ip_addr, 0), 0)[0] + util.run( + f"{self.lkp.scontrol} update nodename={nodename} nodeaddr={ip_addr} nodehostname={dns_name}" + ) + + def create_node(self, nodename): + if self.vmcount > 1 and not isinstance(nodename, list): + log.error( + f"Tried to create a {self.vmcount} node TPU on nodeset {self._nodeset.nodeset_name} but only received one nodename {nodename}" + ) + return False + if self.vmcount > 1 and ( + isinstance(nodename, list) and len(nodename) != self.vmcount + ): + log.error( + f"Expected to receive a list of {self.vmcount} nodenames for TPU node creation in nodeset {self._nodeset.nodeset_name}, but received this list {nodename}" + ) + return False + + node = tpu.Node() + node.accelerator_config = self.ac + node.runtime_version = f"tpu-vm-tf-{self.tf_version}" + startup_script = """ + #!/bin/bash + echo "startup script not found > /var/log/startup_error.log" + """ + with open( + Path(self.lkp.cfg.slurm_scripts_dir or util.dirs.scripts) / "startup.sh", "r" + ) as script: + startup_script = script.read() + if isinstance(nodename, list): + node_id = nodename[0] + slurm_names = [] + wid = 0 + for node_wid in nodename: + slurm_names.append(f"WORKER_{wid}:{node_wid}") + wid += 1 + else: + node_id = nodename + slurm_names = [f"WORKER_0:{nodename}"] + node.metadata = { + "slurm_docker_image": self.nodeset.docker_image, + "startup-script": startup_script, + "slurm_instance_role": "compute", + "slurm_cluster_name": self.lkp.cfg.slurm_cluster_name, + "slurm_bucket_path": self.lkp.cfg.bucket_path, + "slurm_names": ";".join(slurm_names), + "universe_domain": util.universe_domain(), + } + node.tags = [self.lkp.cfg.slurm_cluster_name] + if self.nodeset.service_account: + node.service_account.email = self.nodeset.service_account.email + node.service_account.scope = self.nodeset.service_account.scopes + node.scheduling_config.preemptible = self.preemptible + node.scheduling_config.reserved = self.reserved + node.network_config.subnetwork = self.nodeset.subnetwork + node.network_config.enable_external_ips = self.enable_public_ip + if self.data_disks: + node.data_disks = self.data_disks + + request = tpu.CreateNodeRequest(parent=self._parent, node=node, node_id=node_id) + resp = self._client.create_node(request=request).result() + if not self.__check_resp(resp, "create"): + return False + if isinstance(nodename, list): + for node_id, net_endpoint in zip(nodename, resp.network_endpoints): + self._register_node(node_id, net_endpoint.ip_address) + else: + ip_add = resp.network_endpoints[0].ip_address + self._register_node(nodename, ip_add) + return True + + def delete_node(self, nodename): + request = tpu.DeleteNodeRequest(name=f"{self._parent}/nodes/{nodename}") + try: + resp = self._client.delete_node(request=request).result() + if resp: + return self.get_node(nodename=nodename) is None + return False + except gExceptions.NotFound: + # log only error if vmcount is 1 as for other tpu vm count, this could be "phantom" nodes + if self.vmcount == 1: + log.error(f"Tpu single node {nodename} not found") + else: + # for the TPU nodes that consist in more than one vm, only the first node of the TPU a.k.a. the master node will + # exist as real TPU nodes, so the other ones are expected to not be found, check the hostname of the node that has + # not been found, and if it ends in 0, it means that is the master node and it should have been found, and in consequence + # log an error + nodehostname = yaml.safe_load( + util.run(f"{self.lkp.scontrol} --yaml show node {nodename}").stdout.rstrip() + )["nodes"][0]["hostname"] + if nodehostname.split("-")[-1] == "0": + log.error(f"TPU master node {nodename} not found") + else: + log.info(f"Deleted TPU 'phantom' node {nodename}") + # If the node is not found it is tecnichally deleted, so return success. + return True + +def _stop_tpu(node: str) -> None: + lkp = util.lookup() + tpuobj = TPU.make(lkp.node_nodeset_name(node), lkp) + if tpuobj.nodeset.preserve_tpu and tpuobj.vmcount == 1: + log.info(f"stopping node {node}") + if tpuobj.stop_node(node): + return + log.error("Error stopping node {node} will delete instead") + log.info(f"deleting node {node}") + if not tpuobj.delete_node(node): + log.error("Error deleting node {node}") + + +def delete_tpu_instances(instances: List[str]) -> None: + util.execute_with_futures(_stop_tpu, instances) + + +def start_tpu(node: List[str]): + lkp = util.lookup() + tpuobj = TPU.make(lkp.node_nodeset_name(node[0]), lkp) + + if len(node) == 1: + node = node[0] + log.debug( + f"Will create a TPU of type {tpuobj.node_type} tf_version {tpuobj.tf_version} in zone {tpuobj.zone} with name {node}" + ) + tpunode = tpuobj.get_node(node) + if tpunode is None: + if not tpuobj.create_node(nodename=node): + log.error("Error creating tpu node {node}") + else: + if tpuobj.preserve_tpu: + if not tpuobj.start_node(nodename=node): + log.error("Error starting tpu node {node}") + else: + log.info( + f"Tpu node {node} is already created, but will not start it because nodeset does not have preserve_tpu option active." + ) + else: + log.debug( + f"Will create a multi-vm TPU of type {tpuobj.node_type} tf_version {tpuobj.tf_version} in zone {tpuobj.zone} with name {node[0]}" + ) + if not tpuobj.create_node(nodename=node): + log.error("Error creating tpu node {node}") diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index 017443002f..62fafde6e1 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -56,13 +56,6 @@ from google.api_core.client_options import ClientOptions # noqa: E402 import httplib2 # noqa: E402 -try: - from google.cloud import tpu_v2 as tpu # noqa: E402 - can_tpu = True -except ImportError: # TODO: remove once CentOS 7 is deprecated or dependency is added - f"WARNING: Missing Python module 'google.cloud.tpu_v2 (pip:google-cloud-tpu)', TPU support will not work." - can_tpu = False - import google.api_core.exceptions as gExceptions # noqa: E402 from requests import get as get_url # noqa: E402 @@ -1189,253 +1182,6 @@ def represent_path(dumper, path): return dumper.represent_scalar("tag:yaml.org,2002:str", str(path)) -class TPU: - """Class for handling the TPU-vm nodes""" - - if can_tpu: - State = tpu.types.cloud_tpu.Node.State - TPUS_PER_VM = 4 - __expected_states = { - "create": State.READY, - "start": State.READY, - "stop": State.STOPPED, - } - - __tpu_version_mapping = { - "V2": tpu.AcceleratorConfig().Type.V2, - "V3": tpu.AcceleratorConfig().Type.V3, - "V4": tpu.AcceleratorConfig().Type.V4, - } - - def __init__(self, nodeset): - if not can_tpu: - raise Exception("TPU pip package not installed") - self._nodeset = nodeset - self._parent = f"projects/{lookup().project}/locations/{nodeset.zone}" - co = create_client_options(ApiEndpoint.TPU) - self._client = tpu.TpuClient(client_options=co) - self.data_disks = [] - for data_disk in nodeset.data_disks: - ad = tpu.AttachedDisk() - ad.source_disk = data_disk - ad.mode = tpu.AttachedDisk.DiskMode.DISK_MODE_UNSPECIFIED - self.data_disks.append(ad) - ns_ac = nodeset.accelerator_config - if ns_ac.topology != "" and ns_ac.version != "": - ac = tpu.AcceleratorConfig() - ac.topology = ns_ac.topology - ac.type_ = self.__tpu_version_mapping[ns_ac.version] - self.ac = ac - else: - req = tpu.GetAcceleratorTypeRequest( - name=f"{self._parent}/acceleratorTypes/{nodeset.node_type}" - ) - self.ac = self._client.get_accelerator_type(req).accelerator_configs[0] - self.vmcount = self.__calc_vm_from_topology(self.ac.topology) - - @property - def nodeset(self): - return self._nodeset - - @property - def preserve_tpu(self): - return self._nodeset.preserve_tpu - - @property - def node_type(self): - return self._nodeset.node_type - - @property - def tf_version(self): - return self._nodeset.tf_version - - @property - def enable_public_ip(self): - return self._nodeset.enable_public_ip - - @property - def preemptible(self): - return self._nodeset.preemptible - - @property - def reserved(self): - return self._nodeset.reserved - - @property - def service_account(self): - return self._nodeset.service_account - - @property - def zone(self): - return self._nodeset.zone - - def check_node_type(self): - if self.node_type is None: - return False - try: - request = tpu.GetAcceleratorTypeRequest( - name=f"{self._parent}/acceleratorTypes/{self.node_type}" - ) - return self._client.get_accelerator_type(request=request) is not None - except Exception: - return False - - def check_tf_version(self): - try: - request = tpu.GetRuntimeVersionRequest( - name=f"{self._parent}/runtimeVersions/{self.tf_version}" - ) - return self._client.get_runtime_version(request=request) is not None - except Exception: - return False - - def __calc_vm_from_topology(self, topology): - topo = topology.split("x") - tot = 1 - for num in topo: - tot = tot * int(num) - return tot // self.TPUS_PER_VM - - def __check_resp(self, response, op_name): - des_state = self.__expected_states.get(op_name) - # If the state is not in the table just print the response - if des_state is None: - return False - if response.__class__.__name__ != "Node": # If the response is not a node fail - return False - if response.state == des_state: - return True - return False - - def list_nodes(self): - try: - request = tpu.ListNodesRequest(parent=self._parent) - res = self._client.list_nodes(request=request) - except gExceptions.NotFound: - res = None - return res - - def list_node_names(self): - return [node.name.split("/")[-1] for node in self.list_nodes()] - - def start_node(self, nodename): - request = tpu.StartNodeRequest(name=f"{self._parent}/nodes/{nodename}") - resp = self._client.start_node(request=request).result() - return self.__check_resp(resp, "start") - - def stop_node(self, nodename): - request = tpu.StopNodeRequest(name=f"{self._parent}/nodes/{nodename}") - resp = self._client.stop_node(request=request).result() - return self.__check_resp(resp, "stop") - - def get_node(self, nodename): - try: - request = tpu.GetNodeRequest(name=f"{self._parent}/nodes/{nodename}") - res = self._client.get_node(request=request) - except gExceptions.NotFound: - res = None - return res - - def _register_node(self, nodename, ip_addr): - dns_name = socket.getnameinfo((ip_addr, 0), 0)[0] - run( - f"{lookup().scontrol} update nodename={nodename} nodeaddr={ip_addr} nodehostname={dns_name}" - ) - - def create_node(self, nodename): - if self.vmcount > 1 and not isinstance(nodename, list): - log.error( - f"Tried to create a {self.vmcount} node TPU on nodeset {self._nodeset.nodeset_name} but only received one nodename {nodename}" - ) - return False - if self.vmcount > 1 and ( - isinstance(nodename, list) and len(nodename) != self.vmcount - ): - log.error( - f"Expected to receive a list of {self.vmcount} nodenames for TPU node creation in nodeset {self._nodeset.nodeset_name}, but received this list {nodename}" - ) - return False - - node = tpu.Node() - node.accelerator_config = self.ac - node.runtime_version = f"tpu-vm-tf-{self.tf_version}" - startup_script = """ - #!/bin/bash - echo "startup script not found > /var/log/startup_error.log" - """ - with open( - Path(lookup().cfg.slurm_scripts_dir or dirs.scripts) / "startup.sh", "r" - ) as script: - startup_script = script.read() - if isinstance(nodename, list): - node_id = nodename[0] - slurm_names = [] - wid = 0 - for node_wid in nodename: - slurm_names.append(f"WORKER_{wid}:{node_wid}") - wid += 1 - else: - node_id = nodename - slurm_names = [f"WORKER_0:{nodename}"] - node.metadata = { - "slurm_docker_image": self.nodeset.docker_image, - "startup-script": startup_script, - "slurm_instance_role": "compute", - "slurm_cluster_name": lookup().cfg.slurm_cluster_name, - "slurm_bucket_path": lookup().cfg.bucket_path, - "slurm_names": ";".join(slurm_names), - "universe_domain": universe_domain(), - } - node.tags = [lookup().cfg.slurm_cluster_name] - if self.nodeset.service_account: - node.service_account.email = self.nodeset.service_account.email - node.service_account.scope = self.nodeset.service_account.scopes - node.scheduling_config.preemptible = self.preemptible - node.scheduling_config.reserved = self.reserved - node.network_config.subnetwork = self.nodeset.subnetwork - node.network_config.enable_external_ips = self.enable_public_ip - if self.data_disks: - node.data_disks = self.data_disks - - request = tpu.CreateNodeRequest(parent=self._parent, node=node, node_id=node_id) - resp = self._client.create_node(request=request).result() - if not self.__check_resp(resp, "create"): - return False - if isinstance(nodename, list): - for node_id, net_endpoint in zip(nodename, resp.network_endpoints): - self._register_node(node_id, net_endpoint.ip_address) - else: - ip_add = resp.network_endpoints[0].ip_address - self._register_node(nodename, ip_add) - return True - - def delete_node(self, nodename): - request = tpu.DeleteNodeRequest(name=f"{self._parent}/nodes/{nodename}") - try: - resp = self._client.delete_node(request=request).result() - if resp: - return self.get_node(nodename=nodename) is None - return False - except gExceptions.NotFound: - # log only error if vmcount is 1 as for other tpu vm count, this could be "phantom" nodes - if self.vmcount == 1: - log.error(f"Tpu single node {nodename} not found") - else: - # for the TPU nodes that consist in more than one vm, only the first node of the TPU a.k.a. the master node will - # exist as real TPU nodes, so the other ones are expected to not be found, check the hostname of the node that has - # not been found, and if it ends in 0, it means that is the master node and it should have been found, and in consequence - # log an error - nodehostname = yaml.safe_load( - run(f"{lookup().scontrol} --yaml show node {nodename}").stdout.rstrip() - )["nodes"][0]["hostname"] - if nodehostname.split("-")[-1] == "0": - log.error(f"TPU master node {nodename} not found") - else: - log.info(f"Deleted TPU 'phantom' node {nodename}") - # If the node is not found it is tecnichally deleted, so return success. - return True - - @dataclass(frozen=True) class ReservationDetails: project: str From f8c81a3e49969c4bff5d7379d4a4a69a677214f8 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 13 Dec 2024 18:17:42 +0000 Subject: [PATCH 042/140] SlurmGCP. Don't query insert-ops if builkInsert-op is totally successful --- .../modules/slurm_files/scripts/resume.py | 96 +++++++++++-------- 1 file changed, 57 insertions(+), 39 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py index 5d88751a41..a26762ced6 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py @@ -387,48 +387,66 @@ def resume_nodes(nodes: List[str], resume_data: Optional[ResumeData]): log.debug(f"tpu_start_data={yaml.safe_dump(tpu_start_data)}") execute_with_futures(start_tpu, tpu_start_data) - all_successful_inserts = [] - - for group, bulk_op in bulk_operations.items(): - group_id = bulk_op["operationGroupId"] - bulk_op_name = bulk_op["name"] - if "error" in bulk_op: - error = bulk_op["error"]["errors"][0] - group_nodes = to_hostlist(grouped_nodes[group].nodes) - log.warning( - f"bulkInsert operation errors: {error['code']} name={bulk_op_name} operationGroupId={group_id} nodes={group_nodes}" - ) - successful_inserts, failed_inserts = separate( - lambda op: "error" in op, get_insert_operations(group_id) + for group, op in bulk_operations.items(): + _handle_bulk_insert_op(op, grouped_nodes[group].nodes, resume_data) + + +def _handle_bulk_insert_op(op: object, nodes: List[str], resume_data: Optional[ResumeData]) -> None: + """ + Handles **DONE** BulkInsert operations + """ + assert op["operationType"] == "bulkInsert" and op["status"] == "DONE", f"unexpected op: {op}" + + group_id = op["operationGroupId"] + if "error" in op: + error = op["error"]["errors"][0] + log.warning( + f"bulkInsert operation error: {error['code']} name={op['name']} operationGroupId={group_id} nodes={to_hostlist(nodes)}" ) - # Apparently multiple errors are possible... so join with +. - by_error_inserts = util.groupby_unsorted( - failed_inserts, - lambda op: "+".join(err["code"] for err in op["error"]["errors"]), + # TODO: does it make sense to query for insert-ops in case of bulkInsert-op error? + + created = 0 + for status in op["instancesBulkInsertOperationMetadata"]["perLocationStatus"].values(): + created += status.get("createdVmCount", 0) + if created == len(nodes): + log.info(f"created {len(nodes)} instances: nodes={to_hostlist(nodes)}") + return # no need to gather status of insert-operations. + + # TODO: + # * don't perform globalOperations aggregateList request to gather insert-operations, + # instead use specific locations from `instancesBulkInsertOperationMetadata`, + # most of the time single zone should be sufficient. + # * don't gather insert-operations per bulkInsert request, instead aggregate it across + # all bulkInserts (goes one level above this function) + successful_inserts, failed_inserts = separate( + lambda op: "error" in op, get_insert_operations(group_id) + ) + # Apparently multiple errors are possible... so join with +. + by_error_inserts = util.groupby_unsorted( + failed_inserts, + lambda op: "+".join(err["code"] for err in op["error"]["errors"]), + ) + for code, failed_ops in by_error_inserts: + failed_nodes = {trim_self_link(op["targetLink"]): op for op in failed_ops} + hostlist = util.to_hostlist(failed_nodes) + count = len(failed_nodes) + log.error( + f"{count} instances failed to start: {code} ({hostlist}) operationGroupId={group_id}" + ) + failed_node, failed_op = next(iter(failed_nodes.items())) + msg = "; ".join( + f"{err['code']}: {err['message'] if 'message' in err else 'no message'}" + for err in failed_op["error"]["errors"] + ) + if code != "RESOURCE_ALREADY_EXISTS": + down_nodes_notify_jobs(failed_nodes, f"GCP Error: {msg}", resume_data) + log.error( + f"errors from insert for node '{failed_node}' ({failed_op['name']}): {msg}" ) - for code, failed_ops in by_error_inserts: - failed_nodes = {trim_self_link(op["targetLink"]): op for op in failed_ops} - hostlist = util.to_hostlist(failed_nodes) - count = len(failed_nodes) - log.error( - f"{count} instances failed to start: {code} ({hostlist}) operationGroupId={group_id}" - ) - failed_node, failed_op = next(iter(failed_nodes.items())) - msg = "; ".join( - f"{err['code']}: {err['message'] if 'message' in err else 'no message'}" - for err in failed_op["error"]["errors"] - ) - if code != "RESOURCE_ALREADY_EXISTS": - down_nodes_notify_jobs(failed_nodes, f"GCP Error: {msg}", resume_data) - log.error( - f"errors from insert for node '{failed_node}' ({failed_op['name']}): {msg}" - ) - ready_nodes = {trim_self_link(op["targetLink"]) for op in successful_inserts} - if len(ready_nodes) > 0: - ready_nodelist = to_hostlist(ready_nodes) - log.info(f"created {len(ready_nodes)} instances: nodes={ready_nodelist}") - all_successful_inserts.extend(successful_inserts) + ready_nodes = {trim_self_link(op["targetLink"]) for op in successful_inserts} + if len(ready_nodes) > 0: + log.info(f"created {len(ready_nodes)} instances: nodes={to_hostlist(ready_nodes)}") def down_nodes_notify_jobs(nodes: List[str], reason: str, resume_data: Optional[ResumeData]) -> None: From a39d1c215c4d678974d05df5768449e0fa1598d1 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Wed, 18 Dec 2024 19:19:26 +0000 Subject: [PATCH 043/140] Update ops to operation --- examples/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/README.md b/examples/README.md index 73272df3cb..46ab3d11c0 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1525,11 +1525,11 @@ This blueprint shows how to use managed parallelstore storage options with GKE i The blueprint contains the following: * A K8s Job that uses a managed parallelstore storage volume option. -* A K8s Job that demonstrates ML training workload with managed parallelstore storage disk ops. +* A K8s Job that demonstrates ML training workload with managed parallelstore storage disk operation. > **Warning**: In this example blueprint, when storage type `Parallelstore` is specified in `gke-storage` module. > The lifecycle of the parallelstore is managed by the blueprint. -> On glcuster destroy ops, the Parallelstore storage created will also be destroyed. +> On glcuster destroy operation, the Parallelstore storage created will also be destroyed. > > [!Note] > The Kubernetes API server will only allow requests from authorized networks. From 8c26d4a1aac024b0ce848e5e1ab7b962b8e1812b Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Wed, 18 Dec 2024 18:18:15 +0000 Subject: [PATCH 044/140] update tas job definitions and add required toleration to kueue v0.10.0 --- .../management/kubectl-apply/manifests/kueue-v0.10.0.yaml | 1 + .../blueprints/kueue-config-files/tas-queues.yaml | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/management/kubectl-apply/manifests/kueue-v0.10.0.yaml b/modules/management/kubectl-apply/manifests/kueue-v0.10.0.yaml index 696e9b1ffb..8fb5db3638 100644 --- a/modules/management/kubectl-apply/manifests/kueue-v0.10.0.yaml +++ b/modules/management/kubectl-apply/manifests/kueue-v0.10.0.yaml @@ -12465,6 +12465,7 @@ spec: - configMap: name: kueue-manager-config name: manager-config + tolerations: - effect: NoSchedule key: components.gke.io/gke-managed-components operator: Equal diff --git a/tools/cloud-build/daily-tests/blueprints/kueue-config-files/tas-queues.yaml b/tools/cloud-build/daily-tests/blueprints/kueue-config-files/tas-queues.yaml index adaae65769..139bd3cbf5 100644 --- a/tools/cloud-build/daily-tests/blueprints/kueue-config-files/tas-queues.yaml +++ b/tools/cloud-build/daily-tests/blueprints/kueue-config-files/tas-queues.yaml @@ -31,6 +31,10 @@ spec: nodeLabels: cloud.google.com/gke-nodepool: "a2-highgpu-2g-a2highgpupool" topologyName: "gke-default" + tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: NoSchedule --- apiVersion: kueue.x-k8s.io/v1beta1 kind: ClusterQueue @@ -44,7 +48,7 @@ spec: - name: "tas-flavor" resources: - name: "nvidia.com/gpu" - nominalQuota: 12 # 6 nodes, 2 GPU each + nominalQuota: 10000000 # infinite quota --- apiVersion: kueue.x-k8s.io/v1beta1 kind: LocalQueue From af1575358170a98bb0286fdada078b4e0b2d6fc2 Mon Sep 17 00:00:00 2001 From: Ankit Kinra <1037624+ankitkinra@users.noreply.github.com> Date: Wed, 18 Dec 2024 20:00:56 +0000 Subject: [PATCH 045/140] fix linter errors and deploy and test the blueprint --- examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml | 2 +- examples/gke-a3-ultragpu/nccl-installer.yaml | 70 +++++++++---------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml index 7069b90797..72bf5e2bf2 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml @@ -110,7 +110,7 @@ deployment_groups: - name: no-minor-or-node-upgrades-indefinite start_time: "2024-12-01T00:00:00Z" end_time: "2025-12-22T00:00:00Z" - exclusion_scope: NO_MINOR_OR_NODE_UPGRADES + exclusion_scope: NO_MINOR_OR_NODE_UPGRADES additional_networks: $(concat( [{ diff --git a/examples/gke-a3-ultragpu/nccl-installer.yaml b/examples/gke-a3-ultragpu/nccl-installer.yaml index f2239b2584..0227658184 100644 --- a/examples/gke-a3-ultragpu/nccl-installer.yaml +++ b/examples/gke-a3-ultragpu/nccl-installer.yaml @@ -36,45 +36,45 @@ spec: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - - matchExpressions: - - key: cloud.google.com/gke-accelerator - operator: In - values: - - nvidia-h200-141gb + - matchExpressions: + - key: cloud.google.com/gke-accelerator + operator: In + values: + - nvidia-h200-141gb tolerations: - - operator: "Exists" + - operator: "Exists" hostNetwork: true hostPID: true volumes: + - name: library-dir-host + hostPath: + path: /home/kubernetes/bin/nvidia/lib64 + type: DirectoryOrCreate + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + initContainers: + - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + name: nccl-rdma-installer + resources: + requests: + cpu: 150m + securityContext: + privileged: true + volumeMounts: - name: library-dir-host - hostPath: - path: /home/kubernetes/bin/nvidia/lib64 - type: DirectoryOrCreate + mountPath: /usr/local/home/kubernetes/bin/nvidia/lib64 - name: gib - hostPath: - path: /home/kubernetes/bin/gib - initContainers: - - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 - name: nccl-rdma-installer - resources: - requests: - cpu: 150m - securityContext: - privileged: true - volumeMounts: - - name: library-dir-host - mountPath: /usr/local/home/kubernetes/bin/nvidia/lib64 - - name: gib - mountPath: /usr/local/home/kubernetes/bin/gib - command: ["/bin/sh", "-c"] - args: - - | - set -ex - /scripts/container_entry.sh install --install-nccl - cp -r /var/lib/gib/lib64/. /usr/local/home/kubernetes/bin/nvidia/lib64 - cp -r /var/lib/gib/. /usr/local/home/kubernetes/bin/gib - ibv_devinfo || exit 1 - echo "installation finishes" + mountPath: /usr/local/home/kubernetes/bin/gib + command: ["/bin/sh", "-c"] + args: + - | + set -ex + /scripts/container_entry.sh install --install-nccl + cp -r /var/lib/gib/lib64/. /usr/local/home/kubernetes/bin/nvidia/lib64 + cp -r /var/lib/gib/. /usr/local/home/kubernetes/bin/gib + ibv_devinfo || exit 1 + echo "installation finishes" containers: - - image: "gke.gcr.io/pause:3.8@sha256:880e63f94b145e46f1b1082bb71b85e21f16b99b180b9996407d61240ceb9830" - name: pause + - image: "gke.gcr.io/pause:3.8@sha256:880e63f94b145e46f1b1082bb71b85e21f16b99b180b9996407d61240ceb9830" + name: pause From 53571b3ac70caf362f05d0427db18b397b2858ae Mon Sep 17 00:00:00 2001 From: Ankit Kinra <1037624+ankitkinra@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:08:52 +0000 Subject: [PATCH 046/140] fix linter errors --- .../gke-a3-ultragpu-deployment.yaml | 14 +- .../gke-a3-ultragpu/nccl-jobset-example.yaml | 385 +++++++++--------- .../gke-a3-ultragpu/nccl-test-32-node.yaml | 385 +++++++++--------- 3 files changed, 405 insertions(+), 379 deletions(-) diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml index 0e475ec2d6..ae897e23d7 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu-deployment.yaml @@ -16,15 +16,15 @@ terraform_backend_defaults: type: gcs configuration: - bucket: BUCKET_NAME + bucket: gke-a3u-manual-test vars: deployment_name: gke-a3-ultra - project_id: PROJECT_ID - region: COMPUTE_REGION - zone: COMPUTE_ZONE - authorized_cidr: / + project_id: hpc-toolkit-dev + region: europe-west1 + zone: europe-west1-b + authorized_cidr: 0.0.0.0/0 # In order to not target a BLOCK_NAME, extended_reservation can be inputted as # extended_reservation: RESERVATION_NAME - extended_reservation: RESERVATION_NAME/reservationBlocks/BLOCK_NAME - static_node_count: NODE_COUNT + extended_reservation: slurm-dev-gcp-a3u-gsc + static_node_count: 0 diff --git a/examples/gke-a3-ultragpu/nccl-jobset-example.yaml b/examples/gke-a3-ultragpu/nccl-jobset-example.yaml index da49668d0a..4e3a437604 100644 --- a/examples/gke-a3-ultragpu/nccl-jobset-example.yaml +++ b/examples/gke-a3-ultragpu/nccl-jobset-example.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: jobset.x-k8s.io/v1alpha2 kind: JobSet metadata: @@ -9,200 +23,199 @@ spec: network: enableDNSHostnames: true replicatedJobs: - - name: w - template: - spec: - parallelism: 4 - completions: 4 - - template: - metadata: - annotations: - networking.gke.io/default-interface: 'eth0' - networking.gke.io/interfaces: | - [ - {"interfaceName":"eth0","network":"default"}, - {"interfaceName":"eth1","network":"gke-a3-ultra-sub-1"}, - {"interfaceName":"eth2","network":"gke-a3-ultra-rdma-sub-0"}, - {"interfaceName":"eth3","network":"gke-a3-ultra-rdma-sub-1"}, - {"interfaceName":"eth4","network":"gke-a3-ultra-rdma-sub-2"}, - {"interfaceName":"eth5","network":"gke-a3-ultra-rdma-sub-3"}, - {"interfaceName":"eth6","network":"gke-a3-ultra-rdma-sub-4"}, - {"interfaceName":"eth7","network":"gke-a3-ultra-rdma-sub-5"}, - {"interfaceName":"eth8","network":"gke-a3-ultra-rdma-sub-6"}, - {"interfaceName":"eth9","network":"gke-a3-ultra-rdma-sub-7"} - ] - spec: - # Limit benchmark run duration - activeDeadlineSeconds: 3600 - restartPolicy: Never - nodeSelector: - cloud.google.com/gke-nodepool: a3-ultragpu-8g-a3-ultragpu-pool - tolerations: - - key: cloud.google.com/gke-queued - effect: NoSchedule - value: "true" - - - key: "nvidia.com/gpu" - operator: "Exists" - effect: "NoSchedule" - - setHostnameAsFQDN: true - volumes: - - name: gib - hostPath: - path: /home/kubernetes/bin/gib + - name: w + template: + spec: + parallelism: 4 + completions: 4 + + template: + metadata: + annotations: + networking.gke.io/default-interface: 'eth0' + networking.gke.io/interfaces: | + [ + {"interfaceName":"eth0","network":"default"}, + {"interfaceName":"eth1","network":"gke-a3-ultra-sub-1"}, + {"interfaceName":"eth2","network":"gke-a3-ultra-rdma-sub-0"}, + {"interfaceName":"eth3","network":"gke-a3-ultra-rdma-sub-1"}, + {"interfaceName":"eth4","network":"gke-a3-ultra-rdma-sub-2"}, + {"interfaceName":"eth5","network":"gke-a3-ultra-rdma-sub-3"}, + {"interfaceName":"eth6","network":"gke-a3-ultra-rdma-sub-4"}, + {"interfaceName":"eth7","network":"gke-a3-ultra-rdma-sub-5"}, + {"interfaceName":"eth8","network":"gke-a3-ultra-rdma-sub-6"}, + {"interfaceName":"eth9","network":"gke-a3-ultra-rdma-sub-7"} + ] + spec: + # Limit benchmark run duration + activeDeadlineSeconds: 3600 + restartPolicy: Never + nodeSelector: + cloud.google.com/gke-nodepool: a3-ultragpu-8g-a3-ultragpu-pool + tolerations: + - key: cloud.google.com/gke-queued + effect: NoSchedule + value: "true" + + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" + + setHostnameAsFQDN: true + volumes: + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + - name: nvidia + hostPath: + path: /home/kubernetes/bin/nvidia + - name: lib64 + hostPath: + path: /lib64 + - name: shared-memory + emptyDir: + medium: "Memory" + sizeLimit: 250Gi + - name: sys + hostPath: + path: /sys + - name: proc-sys + hostPath: + path: /proc/sys + schedulingGates: + # Set this to a unique name per job. + - name: "gke.io/topology-aware-auto-ag-4" + + initContainers: + - name: gpu-healthcheck + image: alpine:latest + command: ["/bin/sh", "-c"] + args: + - | + apk add --no-cache bash # Install bash + /bin/bash -c "set -ex + NUM_GPUS=$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits | wc -l) + if [ \${NUM_GPUS} -lt 8 ]; then + echo \"Error: Only \${NUM_GPUS} GPUs and expected 8\" + exit 1 + fi + gpu_errors=(\$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=ecc.errors.uncorrected.volatile.total --format=csv,noheader,nounits)) + for gpu_index in \${!gpu_errors[@]}; do + if [ \${gpu_errors[\$gpu_index]} == '[N/A]' ]; then + echo 'Error: ERR detected in GPU index '\$gpu_index + exit 1 + elif [ \${gpu_errors[\$gpu_index]} -gt 0 ]; then + echo 'Error: Unrecoverable ECC errors detected in GPU index '\$gpu_index + exit 1 + fi + done + echo \${NUM_GPUS} GPUs found with no ERR or Unrecoverable ECC errors" + + volumeMounts: - name: nvidia - hostPath: - path: /home/kubernetes/bin/nvidia + mountPath: /usr/local/nvidia - name: lib64 - hostPath: - path: /lib64 - - name: shared-memory - emptyDir: - medium: "Memory" - sizeLimit: 250Gi - - name: sys - hostPath: - path: /sys - - name: proc-sys - hostPath: - path: /proc/sys - schedulingGates: - # Set this to a unique name per job. - - name: "gke.io/topology-aware-auto-ag-4" - - initContainers: - - name: gpu-healthcheck - image: alpine:latest - command: ["/bin/sh", "-c"] - args: - - | - apk add --no-cache bash # Install bash - /bin/bash -c "set -ex - NUM_GPUS=$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits | wc -l) - if [ \${NUM_GPUS} -lt 8 ]; then - echo \"Error: Only \${NUM_GPUS} GPUs and expected 8\" - exit 1 - fi - gpu_errors=(\$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=ecc.errors.uncorrected.volatile.total --format=csv,noheader,nounits)) - for gpu_index in \${!gpu_errors[@]}; do - if [ \${gpu_errors[\$gpu_index]} == '[N/A]' ]; then - echo 'Error: ERR detected in GPU index '\$gpu_index - exit 1 - elif [ \${gpu_errors[\$gpu_index]} -gt 0 ]; then - echo 'Error: Unrecoverable ECC errors detected in GPU index '\$gpu_index - exit 1 - fi - done - echo \${NUM_GPUS} GPUs found with no ERR or Unrecoverable ECC errors" - - volumeMounts: - - name: nvidia - mountPath: /usr/local/nvidia - - name: lib64 - mountPath: /lib64 - securityContext: - privileged: true - env: - - name: LD_LIBRARY_PATH - value: /usr/local/nvidia/lib64 - - containers: - - name: nccl - stdin: true - tty: true - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 - securityContext: - privileged: true - env: - - name: MY_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: OMPI_ALLOW_RUN_AS_ROOT - value: "1" - - name: OMPI_ALLOW_RUN_AS_ROOT_CONFIRM - value: "1" - command: - - bash - - -c - - | - set -x - export N_NODES=4 - echo "Starting workload container on ${MY_NODE_NAME} for $N_NODES benchmark" - - # Load all the cuda libs - /sbin/ldconfig - - # Install ping - apt update -y - apt install -y iputils-ping - - # Start sshd - /scripts/container_entry.sh daemon & - - # Get helper variables to form all hostnames - export POSTFIX=$(hostname | cut -d . -f 2-) - export WORKERS_BASENAME=$(hostname | cut -d . -f 1 | rev | cut -d - -f 2- | rev ) - export NODE_RANK=$JOB_COMPLETION_INDEX - - - # For every worker, wait till online and add to hostfile - for i in `seq 0 $(($N_NODES-1))`; do - OTHER=${WORKERS_BASENAME}-${i}.${POSTFIX} - until ssh -p 222 -o StrictHostKeyChecking=no $OTHER hostname; do - echo Waiting for ${OTHER}... - sleep 10 - done - echo ${OTHER} port=222 slots=8 | tee -a /tmp/hostfile; + mountPath: /lib64 + securityContext: + privileged: true + env: + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + + containers: + - name: nccl + stdin: true + tty: true + image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OMPI_ALLOW_RUN_AS_ROOT + value: "1" + - name: OMPI_ALLOW_RUN_AS_ROOT_CONFIRM + value: "1" + command: + - bash + - -c + - | + set -x + export N_NODES=4 + echo "Starting workload container on ${MY_NODE_NAME} for $N_NODES benchmark" + + # Load all the cuda libs + /sbin/ldconfig + + # Install ping + apt update -y + apt install -y iputils-ping + + # Start sshd + /scripts/container_entry.sh daemon & + + # Get helper variables to form all hostnames + export POSTFIX=$(hostname | cut -d . -f 2-) + export WORKERS_BASENAME=$(hostname | cut -d . -f 1 | rev | cut -d - -f 2- | rev ) + export NODE_RANK=$JOB_COMPLETION_INDEX + + + # For every worker, wait till online and add to hostfile + for i in `seq 0 $(($N_NODES-1))`; do + OTHER=${WORKERS_BASENAME}-${i}.${POSTFIX} + until ssh -p 222 -o StrictHostKeyChecking=no $OTHER hostname; do + echo Waiting for ${OTHER}... + sleep 10 done + echo ${OTHER} port=222 slots=8 | tee -a /tmp/hostfile; + done - cat /tmp/hostfile + cat /tmp/hostfile - # Launch from head node - if [[ "${NODE_RANK}" -eq "0" ]]; then + # Launch from head node + if [[ "${NODE_RANK}" -eq "0" ]]; then - # World Level = 0x0, Rail Aligned = 0x7 - export NCCL_TESTS_SPLIT_MASK="0x0"; + # World Level = 0x0, Rail Aligned = 0x7 + export NCCL_TESTS_SPLIT_MASK="0x0"; - # Force use of libnccl-gib - export NCCL_NET=gIB + # Force use of libnccl-gib + export NCCL_NET=gIB - # Set all the correct libnccl-gib environment variables - source /usr/local/gib/scripts/set_nccl_env.sh + # Set all the correct libnccl-gib environment variables + source /usr/local/gib/scripts/set_nccl_env.sh - # Get all relevant NCCL / env vars to pass to all workers - ENV_VARS=$(echo ${!NCCL*} ${!OMPI*} LD_LIBRARY_PATH PATH | sed 's/ / -x /g') + # Get all relevant NCCL / env vars to pass to all workers + ENV_VARS=$(echo ${!NCCL*} ${!OMPI*} LD_LIBRARY_PATH PATH | sed 's/ / -x /g') - mpirun --hostfile /tmp/hostfile \ - -x $ENV_VARS \ - -mca plm_rsh_no_tree_spawn 1 \ - --mca orte_keep_fqdn_hostnames 1 \ - --mca btl self,tcp \ - --mca btl_tcp_if_include eth0 \ - --bind-to none \ - --mca plm_rsh_agent "ssh -q -o LogLevel=ERROR -o StrictHostKeyChecking=no -p 222" \ - /third_party/nccl-tests/build/all_gather_perf -b 1K -e 8G -f 2 -g 1 -w 5 --iters 100 -c 1 + mpirun --hostfile /tmp/hostfile \ + -x $ENV_VARS \ + -mca plm_rsh_no_tree_spawn 1 \ + --mca orte_keep_fqdn_hostnames 1 \ + --mca btl self,tcp \ + --mca btl_tcp_if_include eth0 \ + --bind-to none \ + --mca plm_rsh_agent "ssh -q -o LogLevel=ERROR -o StrictHostKeyChecking=no -p 222" \ + /third_party/nccl-tests/build/all_gather_perf -b 1K -e 8G -f 2 -g 1 -w 5 --iters 100 -c 1 - else - while ping -c 1 ${WORKERS_BASENAME}-0.${POSTFIX}; do - sleep 5 - done - fi - - exit 0 - - volumeMounts: - - name: nvidia - mountPath: /usr/local/nvidia - - name: gib - mountPath: /usr/local/gib - - name: shared-memory - mountPath: /dev/shm - resources: - limits: - nvidia.com/gpu: 8 - requests: - nvidia.com/gpu: 8 - restartPolicy: Never + else + while ping -c 1 ${WORKERS_BASENAME}-0.${POSTFIX}; do + sleep 5 + done + fi + + exit 0 + + volumeMounts: + - name: nvidia + mountPath: /usr/local/nvidia + - name: gib + mountPath: /usr/local/gib + - name: shared-memory + mountPath: /dev/shm + resources: + limits: + nvidia.com/gpu: 8 + requests: + nvidia.com/gpu: 8 diff --git a/examples/gke-a3-ultragpu/nccl-test-32-node.yaml b/examples/gke-a3-ultragpu/nccl-test-32-node.yaml index 3ce2b490d6..3f51ecd239 100644 --- a/examples/gke-a3-ultragpu/nccl-test-32-node.yaml +++ b/examples/gke-a3-ultragpu/nccl-test-32-node.yaml @@ -1,3 +1,17 @@ +# Copyright 2024 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: jobset.x-k8s.io/v1alpha2 kind: JobSet metadata: @@ -9,200 +23,199 @@ spec: network: enableDNSHostnames: true replicatedJobs: - - name: w - template: - spec: - parallelism: 32 - completions: 32 - - template: - metadata: - annotations: - networking.gke.io/default-interface: 'eth0' - networking.gke.io/interfaces: | - [ - {"interfaceName":"eth0","network":"default"}, - {"interfaceName":"eth1","network":"gke-a3-ultra-sub-1"}, - {"interfaceName":"eth2","network":"gke-a3-ultra-rdma-sub-0"}, - {"interfaceName":"eth3","network":"gke-a3-ultra-rdma-sub-1"}, - {"interfaceName":"eth4","network":"gke-a3-ultra-rdma-sub-2"}, - {"interfaceName":"eth5","network":"gke-a3-ultra-rdma-sub-3"}, - {"interfaceName":"eth6","network":"gke-a3-ultra-rdma-sub-4"}, - {"interfaceName":"eth7","network":"gke-a3-ultra-rdma-sub-5"}, - {"interfaceName":"eth8","network":"gke-a3-ultra-rdma-sub-6"}, - {"interfaceName":"eth9","network":"gke-a3-ultra-rdma-sub-7"} - ] - spec: - # Limit benchmark run duration - activeDeadlineSeconds: 3600 - restartPolicy: Never - nodeSelector: - cloud.google.com/gke-nodepool: a3-ultragpu-8g-a3-ultragpu-pool - tolerations: - - key: cloud.google.com/gke-queued - effect: NoSchedule - value: "true" - - - key: "nvidia.com/gpu" - operator: "Exists" - effect: "NoSchedule" - - setHostnameAsFQDN: true - volumes: - - name: gib - hostPath: - path: /home/kubernetes/bin/gib + - name: w + template: + spec: + parallelism: 32 + completions: 32 + + template: + metadata: + annotations: + networking.gke.io/default-interface: 'eth0' + networking.gke.io/interfaces: | + [ + {"interfaceName":"eth0","network":"default"}, + {"interfaceName":"eth1","network":"gke-a3-ultra-sub-1"}, + {"interfaceName":"eth2","network":"gke-a3-ultra-rdma-sub-0"}, + {"interfaceName":"eth3","network":"gke-a3-ultra-rdma-sub-1"}, + {"interfaceName":"eth4","network":"gke-a3-ultra-rdma-sub-2"}, + {"interfaceName":"eth5","network":"gke-a3-ultra-rdma-sub-3"}, + {"interfaceName":"eth6","network":"gke-a3-ultra-rdma-sub-4"}, + {"interfaceName":"eth7","network":"gke-a3-ultra-rdma-sub-5"}, + {"interfaceName":"eth8","network":"gke-a3-ultra-rdma-sub-6"}, + {"interfaceName":"eth9","network":"gke-a3-ultra-rdma-sub-7"} + ] + spec: + # Limit benchmark run duration + activeDeadlineSeconds: 3600 + restartPolicy: Never + nodeSelector: + cloud.google.com/gke-nodepool: a3-ultragpu-8g-a3-ultragpu-pool + tolerations: + - key: cloud.google.com/gke-queued + effect: NoSchedule + value: "true" + + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" + + setHostnameAsFQDN: true + volumes: + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + - name: nvidia + hostPath: + path: /home/kubernetes/bin/nvidia + - name: lib64 + hostPath: + path: /lib64 + - name: shared-memory + emptyDir: + medium: "Memory" + sizeLimit: 250Gi + - name: sys + hostPath: + path: /sys + - name: proc-sys + hostPath: + path: /proc/sys + schedulingGates: + # Set this to a unique name per job. + - name: "gke.io/topology-aware-auto-ag-32" + + initContainers: + - name: gpu-healthcheck + image: alpine:latest + command: ["/bin/sh", "-c"] + args: + - | + apk add --no-cache bash # Install bash + /bin/bash -c "set -ex + NUM_GPUS=$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits | wc -l) + if [ \${NUM_GPUS} -lt 8 ]; then + echo \"Error: Only \${NUM_GPUS} GPUs and expected 8\" + exit 1 + fi + gpu_errors=(\$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=ecc.errors.uncorrected.volatile.total --format=csv,noheader,nounits)) + for gpu_index in \${!gpu_errors[@]}; do + if [ \${gpu_errors[\$gpu_index]} == '[N/A]' ]; then + echo 'Error: ERR detected in GPU index '\$gpu_index + exit 1 + elif [ \${gpu_errors[\$gpu_index]} -gt 0 ]; then + echo 'Error: Unrecoverable ECC errors detected in GPU index '\$gpu_index + exit 1 + fi + done + echo \${NUM_GPUS} GPUs found with no ERR or Unrecoverable ECC errors" + + volumeMounts: - name: nvidia - hostPath: - path: /home/kubernetes/bin/nvidia + mountPath: /usr/local/nvidia - name: lib64 - hostPath: - path: /lib64 - - name: shared-memory - emptyDir: - medium: "Memory" - sizeLimit: 250Gi - - name: sys - hostPath: - path: /sys - - name: proc-sys - hostPath: - path: /proc/sys - schedulingGates: - # Set this to a unique name per job. - - name: "gke.io/topology-aware-auto-ag-32" - - initContainers: - - name: gpu-healthcheck - image: alpine:latest - command: ["/bin/sh", "-c"] - args: - - | - apk add --no-cache bash # Install bash - /bin/bash -c "set -ex - NUM_GPUS=$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=driver_version --format=csv,noheader,nounits | wc -l) - if [ \${NUM_GPUS} -lt 8 ]; then - echo \"Error: Only \${NUM_GPUS} GPUs and expected 8\" - exit 1 - fi - gpu_errors=(\$(/usr/local/nvidia/bin/nvidia-smi --query-gpu=ecc.errors.uncorrected.volatile.total --format=csv,noheader,nounits)) - for gpu_index in \${!gpu_errors[@]}; do - if [ \${gpu_errors[\$gpu_index]} == '[N/A]' ]; then - echo 'Error: ERR detected in GPU index '\$gpu_index - exit 1 - elif [ \${gpu_errors[\$gpu_index]} -gt 0 ]; then - echo 'Error: Unrecoverable ECC errors detected in GPU index '\$gpu_index - exit 1 - fi - done - echo \${NUM_GPUS} GPUs found with no ERR or Unrecoverable ECC errors" - - volumeMounts: - - name: nvidia - mountPath: /usr/local/nvidia - - name: lib64 - mountPath: /lib64 - securityContext: - privileged: true - env: - - name: LD_LIBRARY_PATH - value: /usr/local/nvidia/lib64 - - containers: - - name: nccl - stdin: true - tty: true - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 - securityContext: - privileged: true - env: - - name: MY_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: OMPI_ALLOW_RUN_AS_ROOT - value: "1" - - name: OMPI_ALLOW_RUN_AS_ROOT_CONFIRM - value: "1" - command: - - bash - - -c - - | - set -x - export N_NODES=32 - echo "Starting workload container on ${MY_NODE_NAME} for $N_NODES benchmark" - - # Load all the cuda libs - /sbin/ldconfig - - # Install ping - apt update -y - apt install -y iputils-ping - - # Start sshd - /scripts/container_entry.sh daemon & - - # Get helper variables to form all hostnames - export POSTFIX=$(hostname | cut -d . -f 2-) - export WORKERS_BASENAME=$(hostname | cut -d . -f 1 | rev | cut -d - -f 2- | rev ) - export NODE_RANK=$JOB_COMPLETION_INDEX - - - # For every worker, wait till online and add to hostfile - for i in `seq 0 $(($N_NODES-1))`; do - OTHER=${WORKERS_BASENAME}-${i}.${POSTFIX} - until ssh -p 222 -o StrictHostKeyChecking=no $OTHER hostname; do - echo Waiting for ${OTHER}... - sleep 10 - done - echo ${OTHER} port=222 slots=8 | tee -a /tmp/hostfile; + mountPath: /lib64 + securityContext: + privileged: true + env: + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + + containers: + - name: nccl + stdin: true + tty: true + image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OMPI_ALLOW_RUN_AS_ROOT + value: "1" + - name: OMPI_ALLOW_RUN_AS_ROOT_CONFIRM + value: "1" + command: + - bash + - -c + - | + set -x + export N_NODES=32 + echo "Starting workload container on ${MY_NODE_NAME} for $N_NODES benchmark" + + # Load all the cuda libs + /sbin/ldconfig + + # Install ping + apt update -y + apt install -y iputils-ping + + # Start sshd + /scripts/container_entry.sh daemon & + + # Get helper variables to form all hostnames + export POSTFIX=$(hostname | cut -d . -f 2-) + export WORKERS_BASENAME=$(hostname | cut -d . -f 1 | rev | cut -d - -f 2- | rev ) + export NODE_RANK=$JOB_COMPLETION_INDEX + + + # For every worker, wait till online and add to hostfile + for i in `seq 0 $(($N_NODES-1))`; do + OTHER=${WORKERS_BASENAME}-${i}.${POSTFIX} + until ssh -p 222 -o StrictHostKeyChecking=no $OTHER hostname; do + echo Waiting for ${OTHER}... + sleep 10 done + echo ${OTHER} port=222 slots=8 | tee -a /tmp/hostfile; + done - cat /tmp/hostfile + cat /tmp/hostfile - # Launch from head node - if [[ "${NODE_RANK}" -eq "0" ]]; then + # Launch from head node + if [[ "${NODE_RANK}" -eq "0" ]]; then - # World Level = 0x0, Rail Aligned = 0x7 - export NCCL_TESTS_SPLIT_MASK="0x0"; + # World Level = 0x0, Rail Aligned = 0x7 + export NCCL_TESTS_SPLIT_MASK="0x0"; - # Force use of libnccl-gib - export NCCL_NET=gIB + # Force use of libnccl-gib + export NCCL_NET=gIB - # Set all the correct libnccl-gib environment variables - source /usr/local/gib/scripts/set_nccl_env.sh + # Set all the correct libnccl-gib environment variables + source /usr/local/gib/scripts/set_nccl_env.sh - # Get all relevant NCCL / env vars to pass to all workers - ENV_VARS=$(echo ${!NCCL*} ${!OMPI*} LD_LIBRARY_PATH PATH | sed 's/ / -x /g') + # Get all relevant NCCL / env vars to pass to all workers + ENV_VARS=$(echo ${!NCCL*} ${!OMPI*} LD_LIBRARY_PATH PATH | sed 's/ / -x /g') - mpirun --hostfile /tmp/hostfile \ - -x $ENV_VARS \ - -mca plm_rsh_no_tree_spawn 1 \ - --mca orte_keep_fqdn_hostnames 1 \ - --mca btl self,tcp \ - --mca btl_tcp_if_include eth0 \ - --bind-to none \ - --mca plm_rsh_agent "ssh -q -o LogLevel=ERROR -o StrictHostKeyChecking=no -p 222" \ - /third_party/nccl-tests/build/all_gather_perf -b 1K -e 8G -f 2 -g 1 -w 5 --iters 100 -c 1 + mpirun --hostfile /tmp/hostfile \ + -x $ENV_VARS \ + -mca plm_rsh_no_tree_spawn 1 \ + --mca orte_keep_fqdn_hostnames 1 \ + --mca btl self,tcp \ + --mca btl_tcp_if_include eth0 \ + --bind-to none \ + --mca plm_rsh_agent "ssh -q -o LogLevel=ERROR -o StrictHostKeyChecking=no -p 222" \ + /third_party/nccl-tests/build/all_gather_perf -b 1K -e 8G -f 2 -g 1 -w 5 --iters 100 -c 1 - else - while ping -c 1 ${WORKERS_BASENAME}-0.${POSTFIX}; do - sleep 5 - done - fi - - exit 0 - - volumeMounts: - - name: nvidia - mountPath: /usr/local/nvidia - - name: gib - mountPath: /usr/local/gib - - name: shared-memory - mountPath: /dev/shm - resources: - limits: - nvidia.com/gpu: 8 - requests: - nvidia.com/gpu: 8 - restartPolicy: Never + else + while ping -c 1 ${WORKERS_BASENAME}-0.${POSTFIX}; do + sleep 5 + done + fi + + exit 0 + + volumeMounts: + - name: nvidia + mountPath: /usr/local/nvidia + - name: gib + mountPath: /usr/local/gib + - name: shared-memory + mountPath: /dev/shm + resources: + limits: + nvidia.com/gpu: 8 + requests: + nvidia.com/gpu: 8 From e3a374aef529d17659a1e00e826c389fcb4f9be4 Mon Sep 17 00:00:00 2001 From: Ankit Kinra <1037624+ankitkinra@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:23:28 +0000 Subject: [PATCH 047/140] fix more linter errors --- examples/gke-a3-ultragpu/nccl-test.yaml | 122 +++++++++++------------- 1 file changed, 58 insertions(+), 64 deletions(-) diff --git a/examples/gke-a3-ultragpu/nccl-test.yaml b/examples/gke-a3-ultragpu/nccl-test.yaml index 994601472f..9b4fd881b7 100644 --- a/examples/gke-a3-ultragpu/nccl-test.yaml +++ b/examples/gke-a3-ultragpu/nccl-test.yaml @@ -53,41 +53,38 @@ metadata: ] spec: volumes: + - name: library-dir-host + hostPath: + path: /home/kubernetes/bin/nvidia + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + - name: shared-memory + emptyDir: + medium: "Memory" + sizeLimit: 250Gi + containers: + - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + name: test + volumeMounts: - name: library-dir-host - hostPath: - path: /home/kubernetes/bin/nvidia + mountPath: /usr/local/nvidia - name: gib - hostPath: - path: /home/kubernetes/bin/gib + mountPath: /usr/local/gib - name: shared-memory - emptyDir: - medium: "Memory" - sizeLimit: 250Gi - containers: - - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 - name: test - resources: - requests: - cpu: 150m - volumeMounts: - - name: library-dir-host - mountPath: /usr/local/nvidia - - name: gib - mountPath: /usr/local/gib - - name: shared-memory - mountPath: /dev/shm - env: - - name: LD_LIBRARY_PATH - value: /usr/local/nvidia/lib64 - resources: - limits: - nvidia.com/gpu: 8 - command: ["/bin/bash", "-c"] - args: - - | - /scripts/container_entry.sh shell - source /usr/local/gib/scripts/set_nccl_env.sh - sleep infinity + mountPath: /dev/shm + env: + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + resources: + limits: + nvidia.com/gpu: 8 + command: ["/bin/bash", "-c"] + args: + - | + /scripts/container_entry.sh shell + source /usr/local/gib/scripts/set_nccl_env.sh + sleep infinity --- apiVersion: v1 kind: Pod @@ -112,38 +109,35 @@ metadata: ] spec: volumes: + - name: library-dir-host + hostPath: + path: /home/kubernetes/bin/nvidia + - name: gib + hostPath: + path: /home/kubernetes/bin/gib + - name: shared-memory + emptyDir: + medium: "Memory" + sizeLimit: 250Gi + containers: + - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 + name: test + volumeMounts: - name: library-dir-host - hostPath: - path: /home/kubernetes/bin/nvidia + mountPath: /usr/local/nvidia - name: gib - hostPath: - path: /home/kubernetes/bin/gib + mountPath: /usr/local/gib - name: shared-memory - emptyDir: - medium: "Memory" - sizeLimit: 250Gi - containers: - - image: us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:v1.0.2 - name: test - resources: - requests: - cpu: 150m - volumeMounts: - - name: library-dir-host - mountPath: /usr/local/nvidia - - name: gib - mountPath: /usr/local/gib - - name: shared-memory - mountPath: /dev/shm - env: - - name: LD_LIBRARY_PATH - value: /usr/local/nvidia/lib64 - resources: - limits: - nvidia.com/gpu: 8 - command: ["/bin/bash", "-c"] - args: - - | - /scripts/container_entry.sh shell - source /usr/local/gib/scripts/set_nccl_env.sh - sleep infinity + mountPath: /dev/shm + env: + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib64 + resources: + limits: + nvidia.com/gpu: 8 + command: ["/bin/bash", "-c"] + args: + - | + /scripts/container_entry.sh shell + source /usr/local/gib/scripts/set_nccl_env.sh + sleep infinity From cb8d7d0a85dab53e264a8fcb5b66738b86ff1647 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Mon, 16 Dec 2024 05:10:51 +0000 Subject: [PATCH 048/140] Use dataclass to represent machine type --- .../modules/slurm_files/scripts/conf.py | 13 +- .../slurm_files/scripts/tests/common.py | 3 +- .../slurm_files/scripts/tests/test_conf.py | 4 +- .../slurm_files/scripts/tests/test_util.py | 54 +++++- .../modules/slurm_files/scripts/util.py | 164 ++++++++++-------- 5 files changed, 154 insertions(+), 84 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py index a4ff1e488a..dd3d628cbb 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py @@ -72,7 +72,7 @@ def get(key, default): no_comma_params = get("no_comma_params", False) any_gpus = any( - lkp.template_info(nodeset.instance_template).gpu_count > 0 + lkp.template_info(nodeset.instance_template).gpu for nodeset in lkp.cfg.nodeset.values() ) @@ -136,7 +136,7 @@ def nodeset_lines(nodeset, lkp: util.Lookup) -> str: # follow https://slurm.schedmd.com/slurm.conf.html#OPT_Boards # by setting Boards, SocketsPerBoard, CoresPerSocket, and ThreadsPerCore - gres = f"gpu:{template_info.gpu_count}" if template_info.gpu_count else None + gres = f"gpu:{template_info.gpu.count}" if template_info.gpu else None node_conf = { "RealMemory": machine_conf.memory, "Boards": machine_conf.boards, @@ -360,11 +360,10 @@ def gen_cloud_gres_conf(lkp: util.Lookup) -> None: gpu_nodes = defaultdict(list) for nodeset in lkp.cfg.nodeset.values(): - template_info = lkp.template_info(nodeset.instance_template) - gpu_count = template_info.gpu_count - if gpu_count == 0: - continue - gpu_nodes[gpu_count].append(lkp.nodelist(nodeset)) + ti = lkp.template_info(nodeset.instance_template) + gpu_count = ti.gpu.count if ti.gpu else 0 + if gpu_count: + gpu_nodes[gpu_count].append(lkp.nodelist(nodeset)) lines = [ dict_to_conf( diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py index 54d7f45d43..643712efa7 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py @@ -20,6 +20,7 @@ if SCRIPTS_DIR not in sys.path: sys.path.append(SCRIPTS_DIR) # TODO: make this more robust +import util # TODO: use "real" classes once they are defined (instead of NSDict) @@ -79,7 +80,7 @@ class TstMachineConf: @dataclass class TstTemplateInfo: - gpu_count: int = 0 + gpu: Optional[util.AcceleratorInfo] @dataclass class TstInstance: diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_conf.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_conf.py index 6585b2fcd1..a8ea8c1c13 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_conf.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_conf.py @@ -44,7 +44,9 @@ def test_nodeset_lines(): node_conf={"red": "velvet", "CPUs": 55}, ) lkp = util.Lookup(TstCfg()) - lkp.template_info = Mock(return_value=TstTemplateInfo(gpu_count=33)) + lkp.template_info = Mock(return_value=TstTemplateInfo( + gpu=util.AcceleratorInfo(type="Popov", count=33) + )) mc = TstMachineConf( cpus=5, memory=6, diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py index b6e73526f9..40e2cd947d 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py @@ -18,7 +18,7 @@ from mock import Mock from common import TstNodeset, TstCfg # needed to import util import util -from util import NodeState +from util import NodeState, MachineType, AcceleratorInfo from datetime import timedelta from google.api_core.client_options import ClientOptions # noqa: E402 @@ -348,3 +348,55 @@ def test_node_state(node: str, state: Optional[NodeState], want: NodeState | Non else: assert lkp.node_state(node) == want + + +@pytest.mark.parametrize( + "jo,want", + [ + ({ + "accelerators": [ { "guestAcceleratorCount": 1, "guestAcceleratorType": "nvidia-tesla-a100" } ], + "creationTimestamp": "1969-12-31T16:00:00.000-08:00", + "description": "Accelerator Optimized: 1 NVIDIA Tesla A100 GPU, 12 vCPUs, 85GB RAM", + "guestCpus": 12, + "id": "1000012", + "imageSpaceGb": 0, + "isSharedCpu": False, + "kind": "compute#machineType", + "maximumPersistentDisks": 128, + "maximumPersistentDisksSizeGb": "263168", + "memoryMb": 87040, + "name": "a2-highgpu-1g", + "selfLink": "https://www.googleapis.com/compute/v1/projects/io-playground/zones/us-central1-a/machineTypes/a2-highgpu-1g", + "zone": "us-central1-a" + }, MachineType( + name="a2-highgpu-1g", + guest_cpus=12, + memory_mb=87040, + accelerators=[ + AcceleratorInfo(type="nvidia-tesla-a100", count=1) + ] + )), + ({ + "architecture": "X86_64", + "creationTimestamp": "1969-12-31T16:00:00.000-08:00", + "description": "8 vCPUs, 32 GB RAM", + "guestCpus": 8, + "id": "1210008", + "imageSpaceGb": 0, + "isSharedCpu": False, + "kind": "compute#machineType", + "maximumPersistentDisks": 128, + "maximumPersistentDisksSizeGb": "263168", + "memoryMb": 32768, + "name": "t2d-standard-8", + "selfLink": "https://www.googleapis.com/compute/v1/projects/io-playground/zones/europe-north2-b/machineTypes/t2d-standard-8", + "zone": "europe-north2-b" + }, MachineType( + name="t2d-standard-8", + guest_cpus=8, + memory_mb=32768, + accelerators=[] + )), + ]) +def test_MachineType_from_json(jo: dict, want: MachineType): + assert MachineType.from_json(jo) == want diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index 62fafde6e1..96955309d3 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -129,6 +129,65 @@ class ApiEndpoint(Enum): SECRET = "secret_manager" +@dataclass(frozen=True) +class AcceleratorInfo: + type: str + count: int + + @classmethod + def from_json(cls, jo: dict) -> "AcceleratorInfo": + return cls( + type=jo["guestAcceleratorType"], + count=jo["guestAcceleratorCount"]) + +@dataclass(frozen=True) +class MachineType: + name: str + guest_cpus: int + memory_mb: int + accelerators: List[AcceleratorInfo] + + @classmethod + def from_json(cls, jo: dict) -> "MachineType": + return cls( + name=jo["name"], + guest_cpus=jo["guestCpus"], + memory_mb=jo["memoryMb"], + accelerators=[ + AcceleratorInfo.from_json(a) for a in jo.get("accelerators", [])], + ) + + @property + def family(self) -> str: + # TODO: doesn't work with N1 custom machine types + # See https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#create + return self.name.split("-")[0] + + @property + def supports_smt(self) -> bool: + # https://cloud.google.com/compute/docs/cpu-platforms + if self.family in ("t2a", "t2d", "h3", "c4a",): + return False + if self.guest_cpus == 1: + return False + return True + + @property + def sockets(self) -> int: + return { + "h3": 2, + "c2d": 2 if self.guest_cpus > 56 else 1, + "a3": 2, + "c2": 2 if self.guest_cpus > 30 else 1, + "c3": 2 if self.guest_cpus > 88 else 1, + "c3d": 2 if self.guest_cpus > 180 else 1, + "c4": 2 if self.guest_cpus > 96 else 1, + }.get( + self.family, 1, # assume 1 socket for all other families + ) + + + @lru_cache(maxsize=1) def default_credentials(): return google.auth.default()[0] @@ -1111,46 +1170,8 @@ def get_insert_operations(group_ids): return get_filtered_operations(" AND ".join(f"({f})" for f in filters if f)) -def machine_type_family(mt: str) -> str: - """get machine type family from machine type""" - # TODO: doesn't work with N1 custom machine types - # See https://cloud.google.com/compute/docs/instances/creating-instance-with-custom-machine-type#create - return mt.split("-")[0] - - -def machine_type_sockets(template) -> int: - guestCpus: int = int(template.machine_info.guestCpus) - return { - "h3": 2, - "c2d": 2 if guestCpus > 56 else 1, - "a3": 2, - "c2": 2 if guestCpus > 30 else 1, - "c3": 2 if guestCpus > 88 else 1, - "c3d": 2 if guestCpus > 180 else 1, - "c4": 2 if guestCpus > 96 else 1, - }.get( - machine_type_family(template.machineType), - 1, # assume 1 socket for all other families - ) - - -def isSmt(template) -> bool: - # https://cloud.google.com/compute/docs/cpu-platforms - noSmtFamily = ( - "t2a", - "t2d", - "h3", - "c4a", - ) - if machine_type_family(template.machineType) in noSmtFamily: - return False - if template.machine_info.guestCpus == 1: - return False - return True - - def getThreadsPerCore(template) -> int: - if not isSmt(template): + if not template.machine_type.supports_smt: return 1 return template.advancedMachineFeatures.threadsPerCore or 2 @@ -1650,53 +1671,48 @@ def machine_types(self): op = act.aggregatedList_next(op, result) return machines - def machine_type(self, machine_type: str): - """ """ + def machine_type(self, name: str) -> MachineType: custom_patt = re.compile( r"((?P\w+)-)?custom-(?P\d+)-(?P\d+)" ) - custom_match = custom_patt.match(machine_type) - if custom_match is not None: - groups = custom_match.groupdict() - cpus, mem = (groups[k] for k in ["cpus", "mem"]) - machine_info = { - "guestCpus": int(cpus), - "memoryMb": int(mem), - } - else: - machines = self.machine_types() - if machine_type not in machines: - raise Exception(f"machine type {machine_type} not found") - per_zone = machines[machine_type] - assert per_zone - machine_info = next(iter(per_zone.values())) # pick the first/any zone - return NSDict(machine_info) + if match := custom_patt.match(name): + return MachineType( + name=name, + guest_cpus=int(match.group("cpus")), + memory_mb=int(match.group("mem")), + accelerators=[], + ) + + machines = self.machine_types() + if name not in machines: + raise Exception(f"machine type {name} not found") + per_zone = machines[name] + assert per_zone + return MachineType.from_json( + next(iter(per_zone.values())) # pick the first/any zone + ) def template_machine_conf(self, template_link): template = self.template_info(template_link) - if not template.machineType: - temp_name = trim_self_link(template_link) - raise Exception(f"instance template {temp_name} has no machine type") - template.machine_info = self.machine_type(template.machineType) - machine = template.machine_info + machine = template.machine_type machine_conf = NSDict() machine_conf.boards = 1 # No information, assume 1 - machine_conf.sockets = machine_type_sockets(template) + machine_conf.sockets = machine.sockets # the value below for SocketsPerBoard must be type int machine_conf.sockets_per_board = machine_conf.sockets // machine_conf.boards machine_conf.threads_per_core = 1 _div = 2 if getThreadsPerCore(template) == 1 else 1 machine_conf.cpus = ( - int(machine.guestCpus / _div) if isSmt(template) else machine.guestCpus + int(machine.guest_cpus / _div) if machine.supports_smt else machine.guest_cpus ) machine_conf.cores_per_socket = int(machine_conf.cpus / machine_conf.sockets) # Because the actual memory on the host will be different than # what is configured (e.g. kernel will take it). From # experiments, about 16 MB per GB are used (plus about 400 MB # buffer for the first couple of GB's. Using 30 MB to be safe. - gb = machine.memoryMb // 1024 - machine_conf.memory = machine.memoryMb - (400 + (30 * gb)) + gb = machine.memory_mb // 1024 + machine_conf.memory = machine.memory_mb - (400 + (30 * gb)) return machine_conf @contextmanager @@ -1741,20 +1757,20 @@ def template_info(self, template_link): # name and link are not in properties, so stick them in template.name = template_name template.link = template_link + template.machine_type = self.machine_type(template.machineType) # TODO delete metadata to reduce memory footprint? # del template.metadata # translate gpus into an easier-to-read format - machine_info = self.machine_type(template.machineType) - if machine_info.accelerators: - template.gpu_type = machine_info.accelerators[0].guestAcceleratorType - template.gpu_count = machine_info.accelerators[0].guestAcceleratorCount + if template.machine_type.accelerators: + template.gpu = template.machine_type.accelerators[0] elif template.guestAccelerators: - template.gpu_type = template.guestAccelerators[0].acceleratorType - template.gpu_count = template.guestAccelerators[0].acceleratorCount + tga = template.guestAccelerators[0] + template.gpu = AcceleratorInfo( + type=tga.acceleratorType, + count=tga.acceleratorCount) else: - template.gpu_type = None - template.gpu_count = 0 + template.gpu = None # keep write access open for minimum time with self.template_cache(writeback=True) as cache: From 842f48380510c3b0c2b00484448d963579e10c18 Mon Sep 17 00:00:00 2001 From: abbas1902 Date: Thu, 19 Dec 2024 00:58:05 +0000 Subject: [PATCH 049/140] Add terraform setup to github workflow config --- .github/workflows/pr-precommit.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pr-precommit.yml b/.github/workflows/pr-precommit.yml index 5b1b5091cf..272fb524f4 100644 --- a/.github/workflows/pr-precommit.yml +++ b/.github/workflows/pr-precommit.yml @@ -41,6 +41,10 @@ jobs: with: go-version: '1.22' check-latest: true + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.5.7" + terraform_wrapper: false - run: make install-dev-deps - uses: terraform-linters/setup-tflint@v4 with: From 8e54adc3cf1946dc4e2e89cac009e4883fb023d1 Mon Sep 17 00:00:00 2001 From: abbas1902 Date: Wed, 18 Dec 2024 21:54:22 +0000 Subject: [PATCH 050/140] Add validation to prevent creation of empty nodesets --- .../compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf index ad78840a38..5781d2415c 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf @@ -75,7 +75,14 @@ output "nodeset" { precondition { condition = var.future_reservation == "" || local.fr_zone == var.zone error_message = <<-EOD - The zone of the deployment must match that of the future reservation" + The zone of the deployment must match that of the future reservation + EOD + } + + precondition { + condition = var.node_count_dynamic_max > 0 || var.node_count_static > 0 + error_message = <<-EOD + This nodeset contains zero nodes, there should be at least one static or dynamic node EOD } } From b4ac5130739300f8a8a78d50dcfe537d06af5307 Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Thu, 19 Dec 2024 06:30:13 +0000 Subject: [PATCH 051/140] Fix gke parallelstore blueprint name going beyond network char limit --- examples/README.md | 4 ++-- ...-parallelstore.yaml => gke-managed-parallelstore.yaml} | 2 +- modules/file-system/gke-storage/README.md | 2 +- ...-parallelstore.yaml => gke-managed-parallelstore.yaml} | 6 +++--- ...ed-parallelstore.yml => gke-managed-parallelstore.yml} | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) rename examples/{gke-storage-managed-parallelstore.yaml => gke-managed-parallelstore.yaml} (98%) rename tools/cloud-build/daily-tests/builds/{gke-storage-managed-parallelstore.yaml => gke-managed-parallelstore.yaml} (90%) rename tools/cloud-build/daily-tests/tests/{gke-storage-managed-parallelstore.yml => gke-managed-parallelstore.yml} (77%) diff --git a/examples/README.md b/examples/README.md index 46ab3d11c0..29db27df94 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1518,7 +1518,7 @@ cleaned up when the job is deleted. [storage-gke.yaml]: ../examples/storage-gke.yaml -### [gke-storage-managed-parallelstore.yaml] ![core-badge] ![experimental-badge] +### [gke-managed-parallelstore.yaml] ![core-badge] ![experimental-badge] This blueprint shows how to use managed parallelstore storage options with GKE in the toolkit. @@ -1540,7 +1540,7 @@ The blueprint contains the following: > `--vars authorized_cidr=/32`.** You can use a service like > [whatismyip.com](https://whatismyip.com) to determine your IP address. -[gke-storage-managed-parallelstore.yaml]: ../examples/gke-storage-managed-parallelstore.yaml +[gke-managed-parallelstore.yaml]: ../examples/gke-managed-parallelstore.yaml ### [gke-a3-megagpu.yaml] ![core-badge] ![experimental-badge] diff --git a/examples/gke-storage-managed-parallelstore.yaml b/examples/gke-managed-parallelstore.yaml similarity index 98% rename from examples/gke-storage-managed-parallelstore.yaml rename to examples/gke-managed-parallelstore.yaml index 414a2b180d..4425f13181 100644 --- a/examples/gke-storage-managed-parallelstore.yaml +++ b/examples/gke-managed-parallelstore.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -blueprint_name: gke-storage-managed-parallelstore +blueprint_name: gke-managed-parallelstore vars: project_id: ## Set GCP Project ID Here ## deployment_name: gke-storage-managed-ps diff --git a/modules/file-system/gke-storage/README.md b/modules/file-system/gke-storage/README.md index f4ebd8add0..fc65e76d4d 100644 --- a/modules/file-system/gke-storage/README.md +++ b/modules/file-system/gke-storage/README.md @@ -39,7 +39,7 @@ then use them in a `gke-job-template` to dynamically provision the resource. ``` See example -[gke-storage-managed-parallelstore.yaml](../../../examples/README.md#gke-storage-managed-parallelstoreyaml--) blueprint +[gke-managed-parallelstore.yaml](../../../examples/README.md#gke-managed-parallelstoreyaml--) blueprint for a complete example. ### Authorized Network diff --git a/tools/cloud-build/daily-tests/builds/gke-storage-managed-parallelstore.yaml b/tools/cloud-build/daily-tests/builds/gke-managed-parallelstore.yaml similarity index 90% rename from tools/cloud-build/daily-tests/builds/gke-storage-managed-parallelstore.yaml rename to tools/cloud-build/daily-tests/builds/gke-managed-parallelstore.yaml index 8fbc9c1794..01010a0435 100644 --- a/tools/cloud-build/daily-tests/builds/gke-storage-managed-parallelstore.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-managed-parallelstore.yaml @@ -27,7 +27,7 @@ timeout: 14400s # 4hr steps: ## Test GKE -- id: gke-storage-managed-parallelstore +- id: gke-managed-parallelstore name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: @@ -40,7 +40,7 @@ steps: cd /workspace && make BUILD_ID_FULL=$BUILD_ID BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - SG_EXAMPLE=examples/gke-storage-managed-parallelstore.yaml + SG_EXAMPLE=examples/gke-managed-parallelstore.yaml # adding vm to act as remote node echo ' - id: remote-node' >> $${SG_EXAMPLE} @@ -58,4 +58,4 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-storage-managed-parallelstore.yml" + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-managed-parallelstore.yml" diff --git a/tools/cloud-build/daily-tests/tests/gke-storage-managed-parallelstore.yml b/tools/cloud-build/daily-tests/tests/gke-managed-parallelstore.yml similarity index 77% rename from tools/cloud-build/daily-tests/tests/gke-storage-managed-parallelstore.yml rename to tools/cloud-build/daily-tests/tests/gke-managed-parallelstore.yml index bfb8bc32d7..cd9e7f712b 100644 --- a/tools/cloud-build/daily-tests/tests/gke-storage-managed-parallelstore.yml +++ b/tools/cloud-build/daily-tests/tests/gke-managed-parallelstore.yml @@ -12,16 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -test_name: gke-storage-managed-parallelstore -deployment_name: gke-storage-managed-parallelstore-{{ build }} +test_name: gke-managed-parallelstore +deployment_name: gke-managed-parallelstore-{{ build }} zone: us-central1-a # for remote node region: us-central1 workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/gke-storage-managed-parallelstore.yaml" +blueprint_yaml: "{{ workspace }}/examples/gke-managed-parallelstore.yaml" network: "{{ deployment_name }}-net" remote_node: "{{ deployment_name }}-0" post_deploy_tests: -- test-validation/test-gke-storage-managed-parallelstore.yml +- test-validation/test-gke-managed-parallelstore.yml custom_vars: project: "{{ project }}" cli_deployment_vars: From c5cf2159d808d3948c200d2b48d98a5a4561138a Mon Sep 17 00:00:00 2001 From: Mohit Chaurasia Date: Thu, 19 Dec 2024 07:15:43 +0000 Subject: [PATCH 052/140] Updated ansible playbook test file name --- ...orage-parallelstore.yml => test-gke-managed-parallelstore.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools/cloud-build/daily-tests/ansible_playbooks/test-validation/{test-gke-storage-parallelstore.yml => test-gke-managed-parallelstore.yml} (100%) diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-storage-parallelstore.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-managed-parallelstore.yml similarity index 100% rename from tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-storage-parallelstore.yml rename to tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-managed-parallelstore.yml From deea05eb38a58e255720ce6d7f12c5e81cde76b3 Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Thu, 19 Dec 2024 09:10:06 +0000 Subject: [PATCH 053/140] upgrade a3-ultra to use kueue v0.10.0 --- examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml index 72bf5e2bf2..2eb10b679c 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml @@ -176,7 +176,7 @@ deployment_groups: settings: kueue: install: true - version: v0.9.1 + version: v0.10.0 jobset: install: true version: v0.7.1 From 862e19b85bfa3c495e7db7f35d6d60f6245faa06 Mon Sep 17 00:00:00 2001 From: Parul Bajaj Date: Thu, 19 Dec 2024 12:49:12 +0000 Subject: [PATCH 054/140] Add compact placement validations --- modules/compute/gke-node-pool/main.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index f1999cbd0b..9a09712097 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -307,6 +307,14 @@ resource "google_container_node_pool" "node_pool" { condition = local.upgrade_settings.max_unavailable > 0 || local.upgrade_settings.max_surge > 0 error_message = "At least one of max_unavailable or max_surge must greater than 0" } + precondition { + condition = var.placement_policy.type != "COMPACT" || length(var.zones) == 1 + error_message = "Compact placement is only available for node pools operating in a single zone." + } + precondition { + condition = var.placement_policy.type != "COMPACT" || local.upgrade_settings.strategy != "BLUE_GREEN" + error_message = "Compact placement is not supported with blue-green upgrades." + } } } From 9658bbc735a7dd87afa02935049b1eaae6a9de46 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 19 Dec 2024 20:00:56 +0000 Subject: [PATCH 055/140] Rename `/community/module/internal/slurm-gcp-v6` to `slurm-gcp` No other changes --- .../schedmd-slurm-gcp-v6-nodeset-dynamic/README.md | 2 +- .../schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf | 2 +- .../{slurm-gcp-v6 => slurm-gcp}/instance/README.md | 0 .../{slurm-gcp-v6 => slurm-gcp}/instance/main.tf | 0 .../{slurm-gcp-v6 => slurm-gcp}/instance/outputs.tf | 0 .../{slurm-gcp-v6 => slurm-gcp}/instance/variables.tf | 0 .../{slurm-gcp-v6 => slurm-gcp}/instance/versions.tf | 0 .../instance_template/README.md | 0 .../instance_template/files/startup_sh_unlinted | 0 .../instance_template/main.tf | 0 .../instance_template/outputs.tf | 0 .../instance_template/variables.tf | 0 .../instance_template/versions.tf | 0 .../internal_instance_template/README.md | 0 .../internal_instance_template/main.tf | 0 .../internal_instance_template/outputs.tf | 0 .../internal_instance_template/variables.tf | 0 .../internal_instance_template/versions.tf | 0 .../{slurm-gcp-v6 => slurm-gcp}/nodeset_tpu/README.md | 0 .../{slurm-gcp-v6 => slurm-gcp}/nodeset_tpu/main.tf | 0 .../{slurm-gcp-v6 => slurm-gcp}/nodeset_tpu/outputs.tf | 0 .../nodeset_tpu/variables.tf | 0 .../nodeset_tpu/versions.tf | 0 .../schedmd-slurm-gcp-v6-controller/README.md | 10 +++++----- .../schedmd-slurm-gcp-v6-controller/controller.tf | 2 +- .../scheduler/schedmd-slurm-gcp-v6-controller/login.tf | 4 ++-- .../modules/slurm_files/scripts/slurmsync.py | 1 + .../schedmd-slurm-gcp-v6-controller/partition.tf | 4 ++-- 28 files changed, 13 insertions(+), 12 deletions(-) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance/README.md (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance/main.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance/outputs.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance/variables.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance/versions.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance_template/README.md (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance_template/files/startup_sh_unlinted (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance_template/main.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance_template/outputs.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance_template/variables.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/instance_template/versions.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/internal_instance_template/README.md (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/internal_instance_template/main.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/internal_instance_template/outputs.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/internal_instance_template/variables.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/internal_instance_template/versions.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/nodeset_tpu/README.md (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/nodeset_tpu/main.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/nodeset_tpu/outputs.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/nodeset_tpu/variables.tf (100%) rename community/modules/internal/{slurm-gcp-v6 => slurm-gcp}/nodeset_tpu/versions.tf (100%) diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md index 643ef9ad84..50f0cbc6e0 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md @@ -74,7 +74,7 @@ modules. For support with the underlying modules, see the instructions in the | Name | Source | Version | |------|--------|---------| -| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | ../../internal/slurm-gcp-v6/instance_template | n/a | +| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | ../../internal/slurm-gcp/instance_template | n/a | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf index 6dcc872cab..a528978760 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf @@ -56,7 +56,7 @@ locals { } module "slurm_nodeset_template" { - source = "../../internal/slurm-gcp-v6/instance_template" + source = "../../internal/slurm-gcp/instance_template" project_id = var.project_id region = var.region diff --git a/community/modules/internal/slurm-gcp-v6/instance/README.md b/community/modules/internal/slurm-gcp/instance/README.md similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance/README.md rename to community/modules/internal/slurm-gcp/instance/README.md diff --git a/community/modules/internal/slurm-gcp-v6/instance/main.tf b/community/modules/internal/slurm-gcp/instance/main.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance/main.tf rename to community/modules/internal/slurm-gcp/instance/main.tf diff --git a/community/modules/internal/slurm-gcp-v6/instance/outputs.tf b/community/modules/internal/slurm-gcp/instance/outputs.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance/outputs.tf rename to community/modules/internal/slurm-gcp/instance/outputs.tf diff --git a/community/modules/internal/slurm-gcp-v6/instance/variables.tf b/community/modules/internal/slurm-gcp/instance/variables.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance/variables.tf rename to community/modules/internal/slurm-gcp/instance/variables.tf diff --git a/community/modules/internal/slurm-gcp-v6/instance/versions.tf b/community/modules/internal/slurm-gcp/instance/versions.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance/versions.tf rename to community/modules/internal/slurm-gcp/instance/versions.tf diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/README.md b/community/modules/internal/slurm-gcp/instance_template/README.md similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance_template/README.md rename to community/modules/internal/slurm-gcp/instance_template/README.md diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/files/startup_sh_unlinted b/community/modules/internal/slurm-gcp/instance_template/files/startup_sh_unlinted similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance_template/files/startup_sh_unlinted rename to community/modules/internal/slurm-gcp/instance_template/files/startup_sh_unlinted diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/main.tf b/community/modules/internal/slurm-gcp/instance_template/main.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance_template/main.tf rename to community/modules/internal/slurm-gcp/instance_template/main.tf diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/outputs.tf b/community/modules/internal/slurm-gcp/instance_template/outputs.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance_template/outputs.tf rename to community/modules/internal/slurm-gcp/instance_template/outputs.tf diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/variables.tf b/community/modules/internal/slurm-gcp/instance_template/variables.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance_template/variables.tf rename to community/modules/internal/slurm-gcp/instance_template/variables.tf diff --git a/community/modules/internal/slurm-gcp-v6/instance_template/versions.tf b/community/modules/internal/slurm-gcp/instance_template/versions.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/instance_template/versions.tf rename to community/modules/internal/slurm-gcp/instance_template/versions.tf diff --git a/community/modules/internal/slurm-gcp-v6/internal_instance_template/README.md b/community/modules/internal/slurm-gcp/internal_instance_template/README.md similarity index 100% rename from community/modules/internal/slurm-gcp-v6/internal_instance_template/README.md rename to community/modules/internal/slurm-gcp/internal_instance_template/README.md diff --git a/community/modules/internal/slurm-gcp-v6/internal_instance_template/main.tf b/community/modules/internal/slurm-gcp/internal_instance_template/main.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/internal_instance_template/main.tf rename to community/modules/internal/slurm-gcp/internal_instance_template/main.tf diff --git a/community/modules/internal/slurm-gcp-v6/internal_instance_template/outputs.tf b/community/modules/internal/slurm-gcp/internal_instance_template/outputs.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/internal_instance_template/outputs.tf rename to community/modules/internal/slurm-gcp/internal_instance_template/outputs.tf diff --git a/community/modules/internal/slurm-gcp-v6/internal_instance_template/variables.tf b/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/internal_instance_template/variables.tf rename to community/modules/internal/slurm-gcp/internal_instance_template/variables.tf diff --git a/community/modules/internal/slurm-gcp-v6/internal_instance_template/versions.tf b/community/modules/internal/slurm-gcp/internal_instance_template/versions.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/internal_instance_template/versions.tf rename to community/modules/internal/slurm-gcp/internal_instance_template/versions.tf diff --git a/community/modules/internal/slurm-gcp-v6/nodeset_tpu/README.md b/community/modules/internal/slurm-gcp/nodeset_tpu/README.md similarity index 100% rename from community/modules/internal/slurm-gcp-v6/nodeset_tpu/README.md rename to community/modules/internal/slurm-gcp/nodeset_tpu/README.md diff --git a/community/modules/internal/slurm-gcp-v6/nodeset_tpu/main.tf b/community/modules/internal/slurm-gcp/nodeset_tpu/main.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/nodeset_tpu/main.tf rename to community/modules/internal/slurm-gcp/nodeset_tpu/main.tf diff --git a/community/modules/internal/slurm-gcp-v6/nodeset_tpu/outputs.tf b/community/modules/internal/slurm-gcp/nodeset_tpu/outputs.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/nodeset_tpu/outputs.tf rename to community/modules/internal/slurm-gcp/nodeset_tpu/outputs.tf diff --git a/community/modules/internal/slurm-gcp-v6/nodeset_tpu/variables.tf b/community/modules/internal/slurm-gcp/nodeset_tpu/variables.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/nodeset_tpu/variables.tf rename to community/modules/internal/slurm-gcp/nodeset_tpu/variables.tf diff --git a/community/modules/internal/slurm-gcp-v6/nodeset_tpu/versions.tf b/community/modules/internal/slurm-gcp/nodeset_tpu/versions.tf similarity index 100% rename from community/modules/internal/slurm-gcp-v6/nodeset_tpu/versions.tf rename to community/modules/internal/slurm-gcp/nodeset_tpu/versions.tf diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index 583d41825e..b03fbf0973 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -261,12 +261,12 @@ limitations under the License. | [daos\_network\_storage\_scripts](#module\_daos\_network\_storage\_scripts) | ../../../../modules/scripts/startup-script | n/a | | [nodeset\_cleanup](#module\_nodeset\_cleanup) | ./modules/cleanup_compute | n/a | | [nodeset\_cleanup\_tpu](#module\_nodeset\_cleanup\_tpu) | ./modules/cleanup_tpu | n/a | -| [slurm\_controller\_template](#module\_slurm\_controller\_template) | ../../internal/slurm-gcp-v6/instance_template | n/a | +| [slurm\_controller\_template](#module\_slurm\_controller\_template) | ../../internal/slurm-gcp/instance_template | n/a | | [slurm\_files](#module\_slurm\_files) | ./modules/slurm_files | n/a | -| [slurm\_login\_instance](#module\_slurm\_login\_instance) | ../../internal/slurm-gcp-v6/instance | n/a | -| [slurm\_login\_template](#module\_slurm\_login\_template) | ../../internal/slurm-gcp-v6/instance_template | n/a | -| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | ../../internal/slurm-gcp-v6/instance_template | n/a | -| [slurm\_nodeset\_tpu](#module\_slurm\_nodeset\_tpu) | ../../internal/slurm-gcp-v6/nodeset_tpu | n/a | +| [slurm\_login\_instance](#module\_slurm\_login\_instance) | ../../internal/slurm-gcp/instance | n/a | +| [slurm\_login\_template](#module\_slurm\_login\_template) | ../../internal/slurm-gcp/instance_template | n/a | +| [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | ../../internal/slurm-gcp/instance_template | n/a | +| [slurm\_nodeset\_tpu](#module\_slurm\_nodeset\_tpu) | ../../internal/slurm-gcp/nodeset_tpu | n/a | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf index fa28b8728f..c98813a722 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf @@ -43,7 +43,7 @@ locals { # INSTANCE TEMPLATE module "slurm_controller_template" { - source = "../../internal/slurm-gcp-v6/instance_template" + source = "../../internal/slurm-gcp/instance_template" project_id = var.project_id region = var.region diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf index cfb61787cb..874d1aff67 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf @@ -14,7 +14,7 @@ # TEMPLATE module "slurm_login_template" { - source = "../../internal/slurm-gcp-v6/instance_template" + source = "../../internal/slurm-gcp/instance_template" for_each = { for x in var.login_nodes : x.name_prefix => x } @@ -56,7 +56,7 @@ module "slurm_login_template" { # INSTANCE module "slurm_login_instance" { - source = "../../internal/slurm-gcp-v6/instance" + source = "../../internal/slurm-gcp/instance" for_each = { for x in var.login_nodes : x.name_prefix => x } access_config = each.value.access_config diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py index 65bf15ede5..05fb2d5805 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurmsync.py @@ -441,6 +441,7 @@ def delete_reservation(lkp: util.Lookup, reservation_name: str) -> None: def create_reservation(lkp: util.Lookup, reservation_name: str, node: str, start_time: datetime) -> None: # Format time to be compatible with slurm reservation. formatted_start_time = start_time.strftime('%Y-%m-%dT%H:%M:%S') + util.run(f"{lkp.scontrol} create reservation user=slurm starttime={formatted_start_time} duration=180 nodes={node} reservationname={reservation_name} flags=maint,ignore_jobs") diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf index e8626bd1bd..308b60d19d 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf @@ -26,7 +26,7 @@ locals { # NODESET # TODO: remove dependency on slurm-gcp repo, move to local template module module "slurm_nodeset_template" { - source = "../../internal/slurm-gcp-v6/instance_template" + source = "../../internal/slurm-gcp/instance_template" for_each = local.nodeset_map project_id = var.project_id @@ -104,7 +104,7 @@ locals { # NODESET TPU module "slurm_nodeset_tpu" { - source = "../../internal/slurm-gcp-v6/nodeset_tpu" + source = "../../internal/slurm-gcp/nodeset_tpu" for_each = local.nodeset_tpu_map project_id = var.project_id From 697b70a4739bdc2c047990dd3410c7139b3a9e73 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 19 Dec 2024 21:22:41 +0000 Subject: [PATCH 056/140] Fix wrong API field name --- .../modules/slurm_files/scripts/resume.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py index 4f3fce7213..fa5413e53c 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py @@ -129,7 +129,7 @@ def update_reservation_props(reservation:object, props:object, placement_group:O } if reservation.dense or reservation_from_fr: - props.scheduling.provisioning_model = "RESERVATION_BOUND" + props.scheduling.provisioningModel = "RESERVATION_BOUND" # Figure out `resourcePolicies` if reservation.policies: # use ones already attached to reservations From ba673d54668429e44878a51aec68ed1c892bc8cc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:54:36 +0000 Subject: [PATCH 057/140] Bump golang.org/x/crypto from 0.25.0 to 0.31.0 Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.25.0 to 0.31.0. - [Commits](https://github.com/golang/crypto/compare/v0.25.0...v0.31.0) --- updated-dependencies: - dependency-name: golang.org/x/crypto dependency-type: indirect ... Signed-off-by: dependabot[bot] --- go.mod | 8 ++++---- go.sum | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/go.mod b/go.mod index 61a5f523e6..8e9d4e4d7c 100644 --- a/go.mod +++ b/go.mod @@ -51,7 +51,7 @@ require ( go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/mod v0.19.0 // indirect - golang.org/x/sync v0.7.0 // indirect + golang.org/x/sync v0.10.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.23.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240610135401-a8a62080eff3 // indirect @@ -95,11 +95,11 @@ require ( github.com/ulikunitz/xz v0.5.10 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect go.opencensus.io v0.24.0 // indirect - golang.org/x/crypto v0.25.0 // indirect + golang.org/x/crypto v0.31.0 // indirect golang.org/x/net v0.27.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sys v0.27.0 - golang.org/x/text v0.16.0 // indirect + golang.org/x/sys v0.28.0 + golang.org/x/text v0.21.0 // indirect google.golang.org/grpc v1.64.1 // indirect google.golang.org/protobuf v1.34.2 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect diff --git a/go.sum b/go.sum index 1e4a67b6ba..bdf95899c5 100644 --- a/go.sum +++ b/go.sum @@ -529,8 +529,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= -golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -662,8 +662,8 @@ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220929204114-8fcdb60fdcc0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -732,13 +732,13 @@ golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= -golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -749,8 +749,8 @@ golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= From f0272b792d0f2a8171bcd74287992173533feb6a Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 19 Dec 2024 15:13:44 -0800 Subject: [PATCH 058/140] Update google tf provider version ceiling --- pkg/config/expand.go | 4 ++-- pkg/config/expand_test.go | 4 ++-- .../igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml | 4 ++-- .../golden_copies/expectations/igc_pkr/zero/versions.tf | 4 ++-- .../igc_tf/.ghpc/artifacts/expanded_blueprint.yaml | 8 ++++---- .../golden_copies/expectations/igc_tf/one/versions.tf | 4 ++-- .../golden_copies/expectations/igc_tf/zero/versions.tf | 4 ++-- .../merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml | 4 ++-- .../expectations/merge_flatten/zero/versions.tf | 4 ++-- .../.ghpc/artifacts/expanded_blueprint.yaml | 4 ++-- .../expectations/versioned_blueprint/primary/versions.tf | 4 ++-- 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/pkg/config/expand.go b/pkg/config/expand.go index ae5c30a328..95ad1de52e 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -199,11 +199,11 @@ func getDefaultGoogleProviders(bp Blueprint) map[string]TerraformProvider { return map[string]TerraformProvider{ "google": { Source: "hashicorp/google", - Version: "~> 6.13.0", + Version: "~> 6.14.0", Configuration: gglConf}, "google-beta": { Source: "hashicorp/google-beta", - Version: "~> 6.13.0", + Version: "~> 6.14.0", Configuration: gglConf}} } diff --git a/pkg/config/expand_test.go b/pkg/config/expand_test.go index e1ad008407..73f321cef0 100644 --- a/pkg/config/expand_test.go +++ b/pkg/config/expand_test.go @@ -93,10 +93,10 @@ func (s *zeroSuite) TestExpandProviders(c *C) { c.Check(g.TerraformProviders, DeepEquals, map[string]PR{ "google": TerraformProvider{ Source: "hashicorp/google", - Version: "~> 6.13.0"}, + Version: "~> 6.14.0"}, "google-beta": TerraformProvider{ Source: "hashicorp/google-beta", - Version: "~> 6.13.0"}}) + Version: "~> 6.14.0"}}) } { // no def PR, group PR diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml index dd66cf7aa1..b633bbafb0 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/.ghpc/artifacts/expanded_blueprint.yaml @@ -38,14 +38,14 @@ deployment_groups: terraform_providers: google: source: hashicorp/google - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) zone: ((var.zone)) google-beta: source: hashicorp/google-beta - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) diff --git a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf index fab3c44cd0..df921aaeb4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_pkr/zero/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 6.13.0" + version = "~> 6.14.0" } google-beta = { source = "hashicorp/google-beta" - version = "~> 6.13.0" + version = "~> 6.14.0" } } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml index 1906e9a832..8fcb8e46d7 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/.ghpc/artifacts/expanded_blueprint.yaml @@ -44,14 +44,14 @@ deployment_groups: terraform_providers: google: source: hashicorp/google - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) zone: ((var.zone)) google-beta: source: hashicorp/google-beta - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) @@ -80,14 +80,14 @@ deployment_groups: terraform_providers: google: source: hashicorp/google - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) zone: ((var.zone)) google-beta: source: hashicorp/google-beta - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf index fab3c44cd0..df921aaeb4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/one/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 6.13.0" + version = "~> 6.14.0" } google-beta = { source = "hashicorp/google-beta" - version = "~> 6.13.0" + version = "~> 6.14.0" } } } diff --git a/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf index fab3c44cd0..df921aaeb4 100644 --- a/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/igc_tf/zero/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 6.13.0" + version = "~> 6.14.0" } google-beta = { source = "hashicorp/google-beta" - version = "~> 6.13.0" + version = "~> 6.14.0" } } } diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml index 15a203a4b5..c81fa3e335 100644 --- a/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/.ghpc/artifacts/expanded_blueprint.yaml @@ -39,14 +39,14 @@ deployment_groups: terraform_providers: google: source: hashicorp/google - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) zone: ((var.zone)) google-beta: source: hashicorp/google-beta - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) diff --git a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf index fab3c44cd0..df921aaeb4 100644 --- a/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/merge_flatten/zero/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 6.13.0" + version = "~> 6.14.0" } google-beta = { source = "hashicorp/google-beta" - version = "~> 6.13.0" + version = "~> 6.14.0" } } } diff --git a/tools/validate_configs/golden_copies/expectations/versioned_blueprint/.ghpc/artifacts/expanded_blueprint.yaml b/tools/validate_configs/golden_copies/expectations/versioned_blueprint/.ghpc/artifacts/expanded_blueprint.yaml index 0a51078be5..cfb228725d 100644 --- a/tools/validate_configs/golden_copies/expectations/versioned_blueprint/.ghpc/artifacts/expanded_blueprint.yaml +++ b/tools/validate_configs/golden_copies/expectations/versioned_blueprint/.ghpc/artifacts/expanded_blueprint.yaml @@ -47,14 +47,14 @@ deployment_groups: terraform_providers: google: source: hashicorp/google - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) zone: ((var.zone)) google-beta: source: hashicorp/google-beta - version: ~> 6.13.0 + version: ~> 6.14.0 configuration: project: ((var.project_id)) region: ((var.region)) diff --git a/tools/validate_configs/golden_copies/expectations/versioned_blueprint/primary/versions.tf b/tools/validate_configs/golden_copies/expectations/versioned_blueprint/primary/versions.tf index fab3c44cd0..df921aaeb4 100644 --- a/tools/validate_configs/golden_copies/expectations/versioned_blueprint/primary/versions.tf +++ b/tools/validate_configs/golden_copies/expectations/versioned_blueprint/primary/versions.tf @@ -20,11 +20,11 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "~> 6.13.0" + version = "~> 6.14.0" } google-beta = { source = "hashicorp/google-beta" - version = "~> 6.13.0" + version = "~> 6.14.0" } } } From e98ad33ddd1d154a989036765a3992924ead2a1e Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Fri, 20 Dec 2024 11:40:12 +0000 Subject: [PATCH 059/140] add tas plugin fix --- .../manifests/topology-scheduler-scripts.yaml | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/community/modules/compute/gke-topology-scheduler/manifests/topology-scheduler-scripts.yaml b/community/modules/compute/gke-topology-scheduler/manifests/topology-scheduler-scripts.yaml index cf6cf2fb00..20438bc638 100644 --- a/community/modules/compute/gke-topology-scheduler/manifests/topology-scheduler-scripts.yaml +++ b/community/modules/compute/gke-topology-scheduler/manifests/topology-scheduler-scripts.yaml @@ -6,7 +6,6 @@ metadata: data: schedule-daemon.py: | #!/usr/bin/env python - """schedule-daemon.py is a Topology-aware Kubernetes pod scheduler.""" # Copyright 2024 Google Inc. All Rights Reserved. # @@ -21,6 +20,7 @@ data: # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + """schedule-daemon.py is a Topology-aware Kubernetes pod scheduler.""" import argparse import collections @@ -293,6 +293,16 @@ data: ) continue + # skip nodes that is not in Ready state + if any( + condition.type == "Ready" and condition.status != "True" for condition in node.status.conditions + ): + logging.info( + 'Skipping node %s because it is NotReady', + node_name + ) + continue + allocatable = node.status.allocatable used_cpu, used_memory, used_gpu = 0, 0, 0 @@ -445,7 +455,7 @@ data: v1: kubernetes.client.CoreV1Api, pod_name: str, pod_namespace: str, - node_name: str, + node: dict[str, Any], gate_name: str, ) -> bool: """Schedules a pod on a given node using affinity for direct assignment. @@ -454,7 +464,7 @@ data: v1: The kubernetes client. pod_name: The name of the pod to schedule. pod_namespace: The namespace of the pod to schedule. - node_name: The name of the node to schedule the pod on. + node: The node to schedule the pod on. gate_name: The name of the gate to remove from the pod. Returns: @@ -473,7 +483,7 @@ data: 'matchExpressions': [{ 'key': 'kubernetes.io/hostname', 'operator': 'In', - 'values': [node_name], + 'values': [node['name']], }] }] } @@ -484,7 +494,7 @@ data: v1.replace_namespaced_pod(pod_name, pod_namespace, pod) logging.info( - 'Pod %s/%s scheduled on %s', pod_namespace, pod_name, node_name + 'Pod %s/%s scheduled on %s with topology %s', pod_namespace, pod_name, node['name'], node_topology_key(node) ) except kubernetes.client.rest.ApiException as e: logging.exception( @@ -727,7 +737,7 @@ data: for i, pod in enumerate(sorted_pods): node = sorted_nodes[best_assignment[i]] if not schedule_pod_on_node( - v1, pod['name'], pod['namespace'], node['name'], gate_name + v1, pod['name'], pod['namespace'], node, gate_name ): logging.error( 'Failed to schedule pod %s on node %s. Skipping job %s', From a9f4617dc5b793d5c10f0606bc674a7dd9f9f053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wiktor=20Niesiob=C4=99dzki?= Date: Thu, 21 Nov 2024 06:16:52 +0000 Subject: [PATCH 060/140] Include MemSpecLimit when calculating defmem --- .../modules/slurm_files/scripts/conf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py index dd3d628cbb..ef0f747f64 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/conf.py @@ -184,10 +184,12 @@ def partitionlines(partition, lkp: util.Lookup) -> str: """Make a partition line for the slurm.conf""" MIN_MEM_PER_CPU = 100 - def defmempercpu(nodeset: str) -> int: - template = lkp.cfg.nodeset.get(nodeset).instance_template + def defmempercpu(nodeset_name: str) -> int: + nodeset = lkp.cfg.nodeset.get(nodeset_name) + template = nodeset.instance_template machine = lkp.template_machine_conf(template) - return max(MIN_MEM_PER_CPU, machine.memory // machine.cpus) + mem_spec_limit = int(nodeset.node_conf.get("MemSpecLimit", 0)) + return max(MIN_MEM_PER_CPU, (machine.memory - mem_spec_limit) // machine.cpus) defmem = min( map(defmempercpu, partition.partition_nodeset), default=MIN_MEM_PER_CPU From 5c400c79272b13eca12e66e2748708a0b3e36ffc Mon Sep 17 00:00:00 2001 From: In-Ho Yi Date: Fri, 20 Dec 2024 17:27:46 +0000 Subject: [PATCH 061/140] Add lifecycle rule to ignore local SSDs These settings are known to be tied to machine types and sometimes API changes prompt change not best tracked by TF states --- modules/compute/gke-node-pool/main.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index e971af24dc..48064aff98 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -226,6 +226,8 @@ resource "google_container_node_pool" "node_pool" { ignore_changes = [ node_config[0].labels, initial_node_count, + node_config[0].ephemeral_storage_local_ssd_config[0].local_ssd_count, + node_config[0].local_nvme_ssd_block_config[0].local_ssd_count, ] precondition { condition = (var.max_pods_per_node == null) || (data.google_container_cluster.gke_cluster.networking_mode == "VPC_NATIVE") From 6c108882ffa787cf72633f9fb8cdfcf287326ff0 Mon Sep 17 00:00:00 2001 From: In-Ho Yi Date: Fri, 20 Dec 2024 17:35:26 +0000 Subject: [PATCH 062/140] Fix non-constant format string errors --- cmd/create.go | 16 ++++++++-------- cmd/root.go | 2 +- pkg/modulereader/hcl_utils.go | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmd/create.go b/cmd/create.go index 17ec0eb442..3ea151cdcd 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -91,7 +91,7 @@ func printAdvancedInstructionsMessage(deplDir string) { logging.Info("Find instructions for cleanly destroying infrastructure and advanced manual") logging.Info("deployment instructions at:") logging.Info("") - logging.Info(modulewriter.InstructionsPath(deplDir)) + logging.Info("%s", modulewriter.InstructionsPath(deplDir)) } // TODO: move to expand.go @@ -135,10 +135,10 @@ func v5DeprecationWarning(bp config.Blueprint) { alreadyContainsV5 := false bp.WalkModulesSafe(func(mp config.ModulePath, m *config.Module) { if strings.Contains(m.Source, "schedmd-slurm-gcp-v5-controller") && !alreadyContainsV5 { - logging.Info(boldYellow( - "We have been supporting slurm-gcp v5 since July 2022 and are now deprecating it, as we've launched slurm-gcp v6 in June 2024. \n" + - "Toolkit blueprints using Slurm-gcp v5 will be marked “deprecated” starting October 2024 and slurm-gcp v6 will be the default deployment. \n" + - "However we won't begin removing slurm-gcp v5 blueprints until January 6, 2025. Beginning on January 6, 2025, the Cluster Toolkit team will cease their support for Slurm-gcp v5. \n" + + logging.Info("%s", boldYellow( + "We have been supporting slurm-gcp v5 since July 2022 and are now deprecating it, as we've launched slurm-gcp v6 in June 2024. \n"+ + "Toolkit blueprints using Slurm-gcp v5 will be marked “deprecated” starting October 2024 and slurm-gcp v6 will be the default deployment. \n"+ + "However we won't begin removing slurm-gcp v5 blueprints until January 6, 2025. Beginning on January 6, 2025, the Cluster Toolkit team will cease their support for Slurm-gcp v5. \n"+ "While this will not directly or immediately impact running clusters, we recommend replacing any v5 clusters with Slurm-gcp v6.", )) alreadyContainsV5 = true // This is to avoid the logging message showing repeatedly for multiple v5 controllers @@ -152,7 +152,7 @@ func validateMaybeDie(bp config.Blueprint, ctx config.YamlCtx) { if err == nil { return } - logging.Error(renderError(err, ctx)) + logging.Error("%s", renderError(err, ctx)) logging.Error("One or more blueprint validators has failed. See messages above for suggested") logging.Error("actions. General troubleshooting guidance and instructions for configuring") @@ -169,12 +169,12 @@ func validateMaybeDie(bp config.Blueprint, ctx config.YamlCtx) { switch bp.ValidationLevel { case config.ValidationWarning: { - logging.Error(boldYellow("Validation failures were treated as a warning, continuing to create blueprint.")) + logging.Error("%s", boldYellow("Validation failures were treated as a warning, continuing to create blueprint.")) logging.Error("") } case config.ValidationError: { - logging.Fatal(boldRed("validation failed due to the issues listed above")) + logging.Fatal("%s", boldRed("validation failed due to the issues listed above")) } } diff --git a/cmd/root.go b/cmd/root.go index d19219fb97..a5ccddfc8f 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -262,6 +262,6 @@ func checkErr(err error, ctx *config.YamlCtx) { ctx = &config.YamlCtx{} } if err != nil { - logging.Fatal(renderError(err, *ctx)) + logging.Fatal("%s", renderError(err, *ctx)) } } diff --git a/pkg/modulereader/hcl_utils.go b/pkg/modulereader/hcl_utils.go index 5119aba662..4e2cb7c11e 100644 --- a/pkg/modulereader/hcl_utils.go +++ b/pkg/modulereader/hcl_utils.go @@ -136,7 +136,7 @@ func ReadHclAttributes(file string) (map[string]cty.Value, error) { // work around ugly in error message missing d.Subject // https://github.com/hashicorp/hcl2/blob/fb75b3253c80b3bc7ca99c4bfa2ad6743841b1af/hcl/diagnostic.go#L76-L78 if len(diags) == 1 { - return nil, fmt.Errorf(diags[0].Detail) + return nil, fmt.Errorf("%s", diags[0].Detail) } return nil, diags } From ecd4d9ce26221829f4713e8ceac56a7caaaa6510 Mon Sep 17 00:00:00 2001 From: In-Ho Yi Date: Fri, 20 Dec 2024 18:14:32 +0000 Subject: [PATCH 063/140] Fix lifecycle rule to properly ignore local ssd --- modules/compute/gke-node-pool/main.tf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 0001d204e0..b0bb2c8e30 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -235,8 +235,9 @@ resource "google_container_node_pool" "node_pool" { ignore_changes = [ node_config[0].labels, initial_node_count, - node_config[0].ephemeral_storage_local_ssd_config[0].local_ssd_count, - node_config[0].local_nvme_ssd_block_config[0].local_ssd_count, + # Ignore local/ephemeral ssd configs as they are tied to machine types. + node_config[0].ephemeral_storage_local_ssd_config, + node_config[0].local_nvme_ssd_block_config, ] precondition { condition = (var.max_pods_per_node == null) || (data.google_container_cluster.gke_cluster.networking_mode == "VPC_NATIVE") From 3339f2bf3cdb8fa8e43cf85943e0fb0559b9ef22 Mon Sep 17 00:00:00 2001 From: In-Ho Yi Date: Fri, 20 Dec 2024 21:10:34 +0000 Subject: [PATCH 064/140] Use errors.New to report hcl parse error --- pkg/modulereader/hcl_utils.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/modulereader/hcl_utils.go b/pkg/modulereader/hcl_utils.go index 4e2cb7c11e..9f4ded6a34 100644 --- a/pkg/modulereader/hcl_utils.go +++ b/pkg/modulereader/hcl_utils.go @@ -15,6 +15,7 @@ package modulereader import ( + "errors" "fmt" "hpc-toolkit/pkg/logging" "hpc-toolkit/pkg/sourcereader" @@ -136,7 +137,7 @@ func ReadHclAttributes(file string) (map[string]cty.Value, error) { // work around ugly in error message missing d.Subject // https://github.com/hashicorp/hcl2/blob/fb75b3253c80b3bc7ca99c4bfa2ad6743841b1af/hcl/diagnostic.go#L76-L78 if len(diags) == 1 { - return nil, fmt.Errorf("%s", diags[0].Detail) + return nil, errors.New(diags[0].Detail) } return nil, diags } From 5a11dd4670bbd9fe4abc28f4c36ac5a72c1fa7d9 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Fri, 20 Dec 2024 04:41:25 +0000 Subject: [PATCH 065/140] Replace duplicated `gpu_definition.tf` by shared internal module NOTE: keep ones in `schedmd-slurm-gcp-v5*` due to soon removal. --- .../compute/htcondor-execute-point/README.md | 1 + .../compute/htcondor-execute-point/main.tf | 9 +++ .../README.md | 1 + .../gpu_definition.tf | 58 ------------------- .../main.tf | 9 +++ .../schedmd-slurm-gcp-v6-nodeset/README.md | 4 +- .../gpu_definition.tf | 58 ------------------- .../schedmd-slurm-gcp-v6-nodeset/main.tf | 9 +++ .../schedmd-slurm-gcp-v6-controller/README.md | 1 + .../controller.tf | 9 ++- .../gpu_definition.tf | 58 ------------------- .../schedmd-slurm-gcp-v6-login/README.md | 4 +- .../gpu_definition.tf | 58 ------------------- .../schedmd-slurm-gcp-v6-login/main.tf | 9 +++ modules/compute/gke-node-pool/README.md | 1 + .../compute/gke-node-pool/gpu_definition.tf | 58 ------------------- modules/compute/gke-node-pool/main.tf | 9 +++ .../gke-node-pool/reservation_definitions.tf | 1 + modules/compute/vm-instance/README.md | 1 + modules/compute/vm-instance/gpu_definition.tf | 58 ------------------- modules/compute/vm-instance/main.tf | 9 +++ modules/internal/gpu-definition/README.md | 47 +++++++++++++++ .../internal/gpu-definition/main.tf | 33 +++++++++-- tools/duplicate-diff.py | 7 --- 24 files changed, 150 insertions(+), 362 deletions(-) delete mode 100644 community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf delete mode 100644 community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf delete mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf delete mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf delete mode 100644 modules/compute/gke-node-pool/gpu_definition.tf delete mode 100644 modules/compute/vm-instance/gpu_definition.tf create mode 100644 modules/internal/gpu-definition/README.md rename community/modules/compute/htcondor-execute-point/gpu_definition.tf => modules/internal/gpu-definition/main.tf (75%) diff --git a/community/modules/compute/htcondor-execute-point/README.md b/community/modules/compute/htcondor-execute-point/README.md index c7068a4522..fe1d49c4d8 100644 --- a/community/modules/compute/htcondor-execute-point/README.md +++ b/community/modules/compute/htcondor-execute-point/README.md @@ -211,6 +211,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| | [execute\_point\_instance\_template](#module\_execute\_point\_instance\_template) | terraform-google-modules/vm/google//modules/instance_template | ~> 12.1 | +| [gpu](#module\_gpu) | ../../../../modules/internal/gpu-definition | n/a | | [mig](#module\_mig) | terraform-google-modules/vm/google//modules/mig | ~> 12.1 | | [startup\_script](#module\_startup\_script) | ../../../../modules/scripts/startup-script | n/a | diff --git a/community/modules/compute/htcondor-execute-point/main.tf b/community/modules/compute/htcondor-execute-point/main.tf index 0d8171092a..fb875f01e4 100644 --- a/community/modules/compute/htcondor-execute-point/main.tf +++ b/community/modules/compute/htcondor-execute-point/main.tf @@ -19,7 +19,16 @@ locals { labels = merge(var.labels, { ghpc_module = "htcondor-execute-point", ghpc_role = "compute" }) } +module "gpu" { + source = "../../../../modules/internal/gpu-definition" + + machine_type = var.machine_type + guest_accelerator = var.guest_accelerator +} + locals { + guest_accelerator = module.gpu.guest_accelerator + zones = coalescelist(var.zones, data.google_compute_zones.available.names) network_storage_metadata = var.network_storage == null ? {} : { network_storage = jsonencode(var.network_storage) } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md index 50f0cbc6e0..755ded9f61 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md @@ -74,6 +74,7 @@ modules. For support with the underlying modules, see the instructions in the | Name | Source | Version | |------|--------|---------| +| [gpu](#module\_gpu) | ../../../../modules/internal/gpu-definition | n/a | | [slurm\_nodeset\_template](#module\_slurm\_nodeset\_template) | ../../internal/slurm-gcp/instance_template | n/a | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf deleted file mode 100644 index 1c84a92721..0000000000 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -## Required variables: -# guest_accelerator -# machine_type - -locals { - # example state; terraform will ignore diffs if last element of URL matches - # guest_accelerator = [ - # { - # count = 1 - # type = "https://www.googleapis.com/compute/beta/projects/PROJECT/zones/ZONE/acceleratorTypes/nvidia-tesla-a100" - # }, - # ] - accelerator_machines = { - "a2-highgpu-1g" = { type = "nvidia-tesla-a100", count = 1 }, - "a2-highgpu-2g" = { type = "nvidia-tesla-a100", count = 2 }, - "a2-highgpu-4g" = { type = "nvidia-tesla-a100", count = 4 }, - "a2-highgpu-8g" = { type = "nvidia-tesla-a100", count = 8 }, - "a2-megagpu-16g" = { type = "nvidia-tesla-a100", count = 16 }, - "a2-ultragpu-1g" = { type = "nvidia-a100-80gb", count = 1 }, - "a2-ultragpu-2g" = { type = "nvidia-a100-80gb", count = 2 }, - "a2-ultragpu-4g" = { type = "nvidia-a100-80gb", count = 4 }, - "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, - "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, - "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, - "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, - "g2-standard-4" = { type = "nvidia-l4", count = 1 }, - "g2-standard-8" = { type = "nvidia-l4", count = 1 }, - "g2-standard-12" = { type = "nvidia-l4", count = 1 }, - "g2-standard-16" = { type = "nvidia-l4", count = 1 }, - "g2-standard-24" = { type = "nvidia-l4", count = 2 }, - "g2-standard-32" = { type = "nvidia-l4", count = 1 }, - "g2-standard-48" = { type = "nvidia-l4", count = 4 }, - "g2-standard-96" = { type = "nvidia-l4", count = 8 }, - } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) - - # Select in priority order: - # (1) var.guest_accelerator if not empty - # (2) local.generated_guest_accelerator if not empty - # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) -} diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf index a528978760..c3235c0229 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf @@ -17,7 +17,16 @@ locals { labels = merge(var.labels, { ghpc_module = "schedmd-slurm-gcp-v6-nodeset-dynamic", ghpc_role = "compute" }) } +module "gpu" { + source = "../../../../modules/internal/gpu-definition" + + machine_type = var.machine_type + guest_accelerator = var.guest_accelerator +} + locals { + guest_accelerator = module.gpu.guest_accelerator + nodeset_name = substr(replace(var.name, "/[^a-z0-9]/", ""), 0, 14) feature = coalesce(var.feature, local.nodeset_name) diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md index 297c40bb7a..ce82c34172 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md @@ -142,7 +142,9 @@ modules. For support with the underlying modules, see the instructions in the ## Modules -No modules. +| Name | Source | Version | +|------|--------|---------| +| [gpu](#module\_gpu) | ../../../../modules/internal/gpu-definition | n/a | ## Resources diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf deleted file mode 100644 index 1c84a92721..0000000000 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -## Required variables: -# guest_accelerator -# machine_type - -locals { - # example state; terraform will ignore diffs if last element of URL matches - # guest_accelerator = [ - # { - # count = 1 - # type = "https://www.googleapis.com/compute/beta/projects/PROJECT/zones/ZONE/acceleratorTypes/nvidia-tesla-a100" - # }, - # ] - accelerator_machines = { - "a2-highgpu-1g" = { type = "nvidia-tesla-a100", count = 1 }, - "a2-highgpu-2g" = { type = "nvidia-tesla-a100", count = 2 }, - "a2-highgpu-4g" = { type = "nvidia-tesla-a100", count = 4 }, - "a2-highgpu-8g" = { type = "nvidia-tesla-a100", count = 8 }, - "a2-megagpu-16g" = { type = "nvidia-tesla-a100", count = 16 }, - "a2-ultragpu-1g" = { type = "nvidia-a100-80gb", count = 1 }, - "a2-ultragpu-2g" = { type = "nvidia-a100-80gb", count = 2 }, - "a2-ultragpu-4g" = { type = "nvidia-a100-80gb", count = 4 }, - "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, - "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, - "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, - "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, - "g2-standard-4" = { type = "nvidia-l4", count = 1 }, - "g2-standard-8" = { type = "nvidia-l4", count = 1 }, - "g2-standard-12" = { type = "nvidia-l4", count = 1 }, - "g2-standard-16" = { type = "nvidia-l4", count = 1 }, - "g2-standard-24" = { type = "nvidia-l4", count = 2 }, - "g2-standard-32" = { type = "nvidia-l4", count = 1 }, - "g2-standard-48" = { type = "nvidia-l4", count = 4 }, - "g2-standard-96" = { type = "nvidia-l4", count = 8 }, - } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) - - # Select in priority order: - # (1) var.guest_accelerator if not empty - # (2) local.generated_guest_accelerator if not empty - # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) -} diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf index 84cb60457a..c0a99f99bb 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf @@ -17,7 +17,16 @@ locals { labels = merge(var.labels, { ghpc_module = "schedmd-slurm-gcp-v6-nodeset", ghpc_role = "compute" }) } +module "gpu" { + source = "../../../../modules/internal/gpu-definition" + + machine_type = var.machine_type + guest_accelerator = var.guest_accelerator +} + locals { + guest_accelerator = module.gpu.guest_accelerator + disable_automatic_updates_metadata = var.allow_automatic_updates ? {} : { google_disable_automatic_updates = "TRUE" } metadata = merge( diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index b03fbf0973..99078dbcce 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -259,6 +259,7 @@ limitations under the License. |------|--------|---------| | [bucket](#module\_bucket) | terraform-google-modules/cloud-storage/google | ~> 6.1 | | [daos\_network\_storage\_scripts](#module\_daos\_network\_storage\_scripts) | ../../../../modules/scripts/startup-script | n/a | +| [gpu](#module\_gpu) | ../../../../modules/internal/gpu-definition | n/a | | [nodeset\_cleanup](#module\_nodeset\_cleanup) | ./modules/cleanup_compute | n/a | | [nodeset\_cleanup\_tpu](#module\_nodeset\_cleanup\_tpu) | ./modules/cleanup_tpu | n/a | | [slurm\_controller\_template](#module\_slurm\_controller\_template) | ../../internal/slurm-gcp/instance_template | n/a | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf index c98813a722..879509f693 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf @@ -12,6 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +module "gpu" { + source = "../../../../modules/internal/gpu-definition" + + machine_type = var.machine_type + guest_accelerator = var.guest_accelerator +} + locals { additional_disks = [ for ad in var.additional_disks : { @@ -67,7 +74,7 @@ module "slurm_controller_template" { enable_shielded_vm = var.enable_shielded_vm shielded_instance_config = var.shielded_instance_config - gpu = one(local.guest_accelerator) + gpu = one(module.gpu.guest_accelerator) machine_type = var.machine_type metadata = local.metadata diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf deleted file mode 100644 index 1c84a92721..0000000000 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -## Required variables: -# guest_accelerator -# machine_type - -locals { - # example state; terraform will ignore diffs if last element of URL matches - # guest_accelerator = [ - # { - # count = 1 - # type = "https://www.googleapis.com/compute/beta/projects/PROJECT/zones/ZONE/acceleratorTypes/nvidia-tesla-a100" - # }, - # ] - accelerator_machines = { - "a2-highgpu-1g" = { type = "nvidia-tesla-a100", count = 1 }, - "a2-highgpu-2g" = { type = "nvidia-tesla-a100", count = 2 }, - "a2-highgpu-4g" = { type = "nvidia-tesla-a100", count = 4 }, - "a2-highgpu-8g" = { type = "nvidia-tesla-a100", count = 8 }, - "a2-megagpu-16g" = { type = "nvidia-tesla-a100", count = 16 }, - "a2-ultragpu-1g" = { type = "nvidia-a100-80gb", count = 1 }, - "a2-ultragpu-2g" = { type = "nvidia-a100-80gb", count = 2 }, - "a2-ultragpu-4g" = { type = "nvidia-a100-80gb", count = 4 }, - "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, - "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, - "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, - "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, - "g2-standard-4" = { type = "nvidia-l4", count = 1 }, - "g2-standard-8" = { type = "nvidia-l4", count = 1 }, - "g2-standard-12" = { type = "nvidia-l4", count = 1 }, - "g2-standard-16" = { type = "nvidia-l4", count = 1 }, - "g2-standard-24" = { type = "nvidia-l4", count = 2 }, - "g2-standard-32" = { type = "nvidia-l4", count = 1 }, - "g2-standard-48" = { type = "nvidia-l4", count = 4 }, - "g2-standard-96" = { type = "nvidia-l4", count = 8 }, - } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) - - # Select in priority order: - # (1) var.guest_accelerator if not empty - # (2) local.generated_guest_accelerator if not empty - # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) -} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/README.md index 7160fbdd02..023f4d161b 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/README.md @@ -71,7 +71,9 @@ modules. For support with the underlying modules, see the instructions in the ## Modules -No modules. +| Name | Source | Version | +|------|--------|---------| +| [gpu](#module\_gpu) | ../../../../modules/internal/gpu-definition | n/a | ## Resources diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf deleted file mode 100644 index 1c84a92721..0000000000 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -## Required variables: -# guest_accelerator -# machine_type - -locals { - # example state; terraform will ignore diffs if last element of URL matches - # guest_accelerator = [ - # { - # count = 1 - # type = "https://www.googleapis.com/compute/beta/projects/PROJECT/zones/ZONE/acceleratorTypes/nvidia-tesla-a100" - # }, - # ] - accelerator_machines = { - "a2-highgpu-1g" = { type = "nvidia-tesla-a100", count = 1 }, - "a2-highgpu-2g" = { type = "nvidia-tesla-a100", count = 2 }, - "a2-highgpu-4g" = { type = "nvidia-tesla-a100", count = 4 }, - "a2-highgpu-8g" = { type = "nvidia-tesla-a100", count = 8 }, - "a2-megagpu-16g" = { type = "nvidia-tesla-a100", count = 16 }, - "a2-ultragpu-1g" = { type = "nvidia-a100-80gb", count = 1 }, - "a2-ultragpu-2g" = { type = "nvidia-a100-80gb", count = 2 }, - "a2-ultragpu-4g" = { type = "nvidia-a100-80gb", count = 4 }, - "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, - "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, - "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, - "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, - "g2-standard-4" = { type = "nvidia-l4", count = 1 }, - "g2-standard-8" = { type = "nvidia-l4", count = 1 }, - "g2-standard-12" = { type = "nvidia-l4", count = 1 }, - "g2-standard-16" = { type = "nvidia-l4", count = 1 }, - "g2-standard-24" = { type = "nvidia-l4", count = 2 }, - "g2-standard-32" = { type = "nvidia-l4", count = 1 }, - "g2-standard-48" = { type = "nvidia-l4", count = 4 }, - "g2-standard-96" = { type = "nvidia-l4", count = 8 }, - } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) - - # Select in priority order: - # (1) var.guest_accelerator if not empty - # (2) local.generated_guest_accelerator if not empty - # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) -} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/main.tf index 1632116209..6568996e75 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/main.tf @@ -17,7 +17,16 @@ locals { labels = merge(var.labels, { ghpc_module = "schedmd-slurm-gcp-v6-login", ghpc_role = "scheduler" }) } +module "gpu" { + source = "../../../../modules/internal/gpu-definition" + + machine_type = var.machine_type + guest_accelerator = var.guest_accelerator +} + locals { + guest_accelerator = module.gpu.guest_accelerator + disable_automatic_updates_metadata = var.allow_automatic_updates ? {} : { google_disable_automatic_updates = "TRUE" } metadata = merge( diff --git a/modules/compute/gke-node-pool/README.md b/modules/compute/gke-node-pool/README.md index d2715ff652..a1fcaa8f01 100644 --- a/modules/compute/gke-node-pool/README.md +++ b/modules/compute/gke-node-pool/README.md @@ -295,6 +295,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| +| [gpu](#module\_gpu) | ../../internal/gpu-definition | n/a | | [kubectl\_apply](#module\_kubectl\_apply) | ../../management/kubectl-apply | n/a | ## Resources diff --git a/modules/compute/gke-node-pool/gpu_definition.tf b/modules/compute/gke-node-pool/gpu_definition.tf deleted file mode 100644 index 1c84a92721..0000000000 --- a/modules/compute/gke-node-pool/gpu_definition.tf +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -## Required variables: -# guest_accelerator -# machine_type - -locals { - # example state; terraform will ignore diffs if last element of URL matches - # guest_accelerator = [ - # { - # count = 1 - # type = "https://www.googleapis.com/compute/beta/projects/PROJECT/zones/ZONE/acceleratorTypes/nvidia-tesla-a100" - # }, - # ] - accelerator_machines = { - "a2-highgpu-1g" = { type = "nvidia-tesla-a100", count = 1 }, - "a2-highgpu-2g" = { type = "nvidia-tesla-a100", count = 2 }, - "a2-highgpu-4g" = { type = "nvidia-tesla-a100", count = 4 }, - "a2-highgpu-8g" = { type = "nvidia-tesla-a100", count = 8 }, - "a2-megagpu-16g" = { type = "nvidia-tesla-a100", count = 16 }, - "a2-ultragpu-1g" = { type = "nvidia-a100-80gb", count = 1 }, - "a2-ultragpu-2g" = { type = "nvidia-a100-80gb", count = 2 }, - "a2-ultragpu-4g" = { type = "nvidia-a100-80gb", count = 4 }, - "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, - "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, - "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, - "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, - "g2-standard-4" = { type = "nvidia-l4", count = 1 }, - "g2-standard-8" = { type = "nvidia-l4", count = 1 }, - "g2-standard-12" = { type = "nvidia-l4", count = 1 }, - "g2-standard-16" = { type = "nvidia-l4", count = 1 }, - "g2-standard-24" = { type = "nvidia-l4", count = 2 }, - "g2-standard-32" = { type = "nvidia-l4", count = 1 }, - "g2-standard-48" = { type = "nvidia-l4", count = 4 }, - "g2-standard-96" = { type = "nvidia-l4", count = 8 }, - } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) - - # Select in priority order: - # (1) var.guest_accelerator if not empty - # (2) local.generated_guest_accelerator if not empty - # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) -} diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index f1999cbd0b..c91c791393 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -27,7 +27,16 @@ locals { } } +module "gpu" { + source = "../../internal/gpu-definition" + + machine_type = var.machine_type + guest_accelerator = var.guest_accelerator +} + locals { + guest_accelerator = module.gpu.guest_accelerator + has_gpu = length(local.guest_accelerator) > 0 allocatable_gpu_per_node = local.has_gpu ? max(local.guest_accelerator[*].count...) : -1 gpu_taint = local.has_gpu ? [{ diff --git a/modules/compute/gke-node-pool/reservation_definitions.tf b/modules/compute/gke-node-pool/reservation_definitions.tf index 37b92a2f1a..cb24e4204c 100644 --- a/modules/compute/gke-node-pool/reservation_definitions.tf +++ b/modules/compute/gke-node-pool/reservation_definitions.tf @@ -48,6 +48,7 @@ data "google_compute_reservation" "specific_reservations" { } locals { + generated_guest_accelerator = module.gpu.machine_type_guest_accelerator reservation_resource_api_label = "compute.googleapis.com/reservation-name" input_specific_reservations_count = try(length(var.reservation_affinity.specific_reservations), 0) diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index 8fe80e1cdc..e75b70865d 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -185,6 +185,7 @@ limitations under the License. | Name | Source | Version | |------|--------|---------| +| [gpu](#module\_gpu) | ../../internal/gpu-definition | n/a | | [netstorage\_startup\_script](#module\_netstorage\_startup\_script) | ../../scripts/startup-script | n/a | ## Resources diff --git a/modules/compute/vm-instance/gpu_definition.tf b/modules/compute/vm-instance/gpu_definition.tf deleted file mode 100644 index 1c84a92721..0000000000 --- a/modules/compute/vm-instance/gpu_definition.tf +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -## Required variables: -# guest_accelerator -# machine_type - -locals { - # example state; terraform will ignore diffs if last element of URL matches - # guest_accelerator = [ - # { - # count = 1 - # type = "https://www.googleapis.com/compute/beta/projects/PROJECT/zones/ZONE/acceleratorTypes/nvidia-tesla-a100" - # }, - # ] - accelerator_machines = { - "a2-highgpu-1g" = { type = "nvidia-tesla-a100", count = 1 }, - "a2-highgpu-2g" = { type = "nvidia-tesla-a100", count = 2 }, - "a2-highgpu-4g" = { type = "nvidia-tesla-a100", count = 4 }, - "a2-highgpu-8g" = { type = "nvidia-tesla-a100", count = 8 }, - "a2-megagpu-16g" = { type = "nvidia-tesla-a100", count = 16 }, - "a2-ultragpu-1g" = { type = "nvidia-a100-80gb", count = 1 }, - "a2-ultragpu-2g" = { type = "nvidia-a100-80gb", count = 2 }, - "a2-ultragpu-4g" = { type = "nvidia-a100-80gb", count = 4 }, - "a2-ultragpu-8g" = { type = "nvidia-a100-80gb", count = 8 }, - "a3-highgpu-8g" = { type = "nvidia-h100-80gb", count = 8 }, - "a3-megagpu-8g" = { type = "nvidia-h100-mega-80gb", count = 8 }, - "a3-ultragpu-8g" = { type = "nvidia-h200-141gb", count = 8 }, - "g2-standard-4" = { type = "nvidia-l4", count = 1 }, - "g2-standard-8" = { type = "nvidia-l4", count = 1 }, - "g2-standard-12" = { type = "nvidia-l4", count = 1 }, - "g2-standard-16" = { type = "nvidia-l4", count = 1 }, - "g2-standard-24" = { type = "nvidia-l4", count = 2 }, - "g2-standard-32" = { type = "nvidia-l4", count = 1 }, - "g2-standard-48" = { type = "nvidia-l4", count = 4 }, - "g2-standard-96" = { type = "nvidia-l4", count = 8 }, - } - generated_guest_accelerator = try([local.accelerator_machines[var.machine_type]], []) - - # Select in priority order: - # (1) var.guest_accelerator if not empty - # (2) local.generated_guest_accelerator if not empty - # (3) default to empty list if both are empty - guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) -} diff --git a/modules/compute/vm-instance/main.tf b/modules/compute/vm-instance/main.tf index dcb43fe91a..9b74632678 100644 --- a/modules/compute/vm-instance/main.tf +++ b/modules/compute/vm-instance/main.tf @@ -19,7 +19,16 @@ locals { labels = merge(var.labels, { ghpc_module = "vm-instance", ghpc_role = "compute" }) } +module "gpu" { + source = "../../internal/gpu-definition" + + machine_type = var.machine_type + guest_accelerator = var.guest_accelerator +} + locals { + guest_accelerator = module.gpu.guest_accelerator + native_fstype = [] startup_script = local.startup_from_network_storage != null ? ( { startup-script = local.startup_from_network_storage }) : {} diff --git a/modules/internal/gpu-definition/README.md b/modules/internal/gpu-definition/README.md new file mode 100644 index 0000000000..29a87cab78 --- /dev/null +++ b/modules/internal/gpu-definition/README.md @@ -0,0 +1,47 @@ + +Copyright 2024 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >= 1.3 | + +## Providers + +No providers. + +## Modules + +No modules. + +## Resources + +No resources. + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | +| [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation | `string` | n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [guest\_accelerator](#output\_guest\_accelerator) | Sanitized list of the type and count of accelerator cards attached to the instance. | +| [machine\_type\_guest\_accelerator](#output\_machine\_type\_guest\_accelerator) | List of the type and count of accelerator cards attached to the specified machine type. | + diff --git a/community/modules/compute/htcondor-execute-point/gpu_definition.tf b/modules/internal/gpu-definition/main.tf similarity index 75% rename from community/modules/compute/htcondor-execute-point/gpu_definition.tf rename to modules/internal/gpu-definition/main.tf index 1c84a92721..bc66442e5e 100644 --- a/community/modules/compute/htcondor-execute-point/gpu_definition.tf +++ b/modules/internal/gpu-definition/main.tf @@ -1,5 +1,5 @@ /** - * Copyright 2023 Google LLC + * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,20 @@ * limitations under the License. */ -## Required variables: -# guest_accelerator -# machine_type +variable "machine_type" { + description = "Machine type to use for the instance creation" + type = string +} + +variable "guest_accelerator" { + description = "List of the type and count of accelerator cards attached to the instance." + type = list(object({ + type = string, + count = number + })) + default = [] + nullable = false +} locals { # example state; terraform will ignore diffs if last element of URL matches @@ -56,3 +67,17 @@ locals { # (3) default to empty list if both are empty guest_accelerator = try(coalescelist(var.guest_accelerator, local.generated_guest_accelerator), []) } + +output "guest_accelerator" { + description = "Sanitized list of the type and count of accelerator cards attached to the instance." + value = local.guest_accelerator +} + +output "machine_type_guest_accelerator" { + description = "List of the type and count of accelerator cards attached to the specified machine type." + value = local.generated_guest_accelerator +} + +terraform { + required_version = ">= 1.3" +} diff --git a/tools/duplicate-diff.py b/tools/duplicate-diff.py index 703f00ff95..5a7c83ac1e 100644 --- a/tools/duplicate-diff.py +++ b/tools/duplicate-diff.py @@ -36,16 +36,9 @@ "modules/compute/vm-instance/startup_from_network_storage.tf", ], [ - "modules/compute/vm-instance/gpu_definition.tf", - "community/modules/compute/htcondor-execute-point/gpu_definition.tf", "community/modules/compute/schedmd-slurm-gcp-v5-node-group/gpu_definition.tf", "community/modules/scheduler/schedmd-slurm-gcp-v5-login/gpu_definition.tf", "community/modules/scheduler/schedmd-slurm-gcp-v5-controller/gpu_definition.tf", - "community/modules/compute/schedmd-slurm-gcp-v6-nodeset/gpu_definition.tf", - "community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/gpu_definition.tf", - "community/modules/scheduler/schedmd-slurm-gcp-v6-controller/gpu_definition.tf", - "community/modules/scheduler/schedmd-slurm-gcp-v6-login/gpu_definition.tf", - "modules/compute/gke-node-pool/gpu_definition.tf", ], [ "modules/compute/gke-node-pool/threads_per_core_calc.tf", From 7c686ca0da681256f6f7ec7cc682ba7a1eca730e Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Thu, 19 Dec 2024 20:07:11 +0000 Subject: [PATCH 066/140] A3 Ultra integration tests --- examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml | 34 +++++------ .../daily-tests/builds/gke-a3-ultragpu.yaml | 60 +++++++++++++++++++ .../daily-tests/tests/gke-a3-ultragpu.yml | 39 ++++++++++++ 3 files changed, 116 insertions(+), 17 deletions(-) create mode 100644 tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml create mode 100644 tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml index 2eb10b679c..d15f579cb7 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml @@ -33,6 +33,22 @@ vars: system_node_pool_disk_size_gb: 200 a3ultra_node_pool_disk_size_gb: 100 +terraform_providers: + google: + source: hashicorp/google + version: 6.13.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + google-beta: + source: hashicorp/google-beta + version: 6.13.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + deployment_groups: - group: primary modules: @@ -171,7 +187,7 @@ deployment_groups: use: [a3-ultragpu-cluster] - id: workload-manager-install - source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/management/kubectl-apply?ref=e0c690b + source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/management/kubectl-apply?ref=8c26d4a use: [a3-ultragpu-cluster] settings: kueue: @@ -194,19 +210,3 @@ deployment_groups: node_count: 2 name: run-nvidia-smi outputs: [instructions] - -terraform_providers: - google: - source: hashicorp/google - version: 6.13.0 - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) - google-beta: - source: hashicorp/google-beta - version: 6.13.0 - configuration: - project: $(vars.project_id) - region: $(vars.region) - zone: $(vars.zone) diff --git a/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml b/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml new file mode 100644 index 0000000000..c0a3cbb196 --- /dev/null +++ b/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml @@ -0,0 +1,60 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +tags: +- m.gke-job-template +- gke + + +timeout: 14400s # 4hr +steps: +- id: gke-a3-ultragpu + name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner + entrypoint: /bin/bash + env: + - "ANSIBLE_HOST_KEY_CHECKING=false" + - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" + args: + - -c + - | + set -x -e + cd /workspace && make + BUILD_ID_FULL=$BUILD_ID + BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} + EXAMPLE_BP=examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml + + + # adding vm to act as remote node + echo ' - id: remote-node' >> $${EXAMPLE_BP} + echo ' source: modules/compute/vm-instance' >> $${EXAMPLE_BP} + echo ' use: [gke-a3-ultra-net-0]' >> $${EXAMPLE_BP} + echo ' settings:' >> $${EXAMPLE_BP} + echo ' machine_type: e2-standard-2' >> $${EXAMPLE_BP} + echo ' name_prefix: remote-node' >> $${EXAMPLE_BP} + echo ' add_deployment_name_before_prefix: true' >> $${EXAMPLE_BP} + echo '' + echo ' - id: job_template_hostname' >> $${EXAMPLE_BP} + echo ' source: modules/compute/gke-job-template' >> $${EXAMPLE_BP} + echo ' use: [a3-ultragpu-pool]' >> $${EXAMPLE_BP} + echo ' settings:' >> $${EXAMPLE_BP} + echo ' image: nvidia/cuda:11.0.3-runtime-ubuntu20.04' >> $${EXAMPLE_BP} + echo ' command:' >> $${EXAMPLE_BP} + echo ' - nvidia-smi' >> $${EXAMPLE_BP} + echo ' node_count: 1' >> $${EXAMPLE_BP} + echo ' outputs: [instructions]' >> $${EXAMPLE_BP} + + ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml" diff --git a/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml b/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml new file mode 100644 index 0000000000..bb13b25d5c --- /dev/null +++ b/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml @@ -0,0 +1,39 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- + +# region, zone must be defined +# in build file with --extra-vars flag! +test_name: gke-a3ultra +deployment_name: gke-a3ultra-{{ build }} +workspace: /workspace +blueprint_yaml: "{{ workspace }}/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml" +network: gke-a3-ultra-net-0 +region: europe-west1 +zone: europe-west1-b +remote_node: "{{ deployment_name }}-remote-node-0" +extended_reservation: slurm-dev-gcp-a3u-gsc +static_node_count: 1 +cli_deployment_vars: + region: "{{ region }}" + zone: "{{ zone }}" + static_node_count: "{{ static_node_count }}" + extended_reservation: "{{ extended_reservation }}" + authorized_cidr: "{{ build_ip.stdout }}/32" + gcp_public_cidrs_access_enabled: true +custom_vars: + project: "{{ project }}" +post_deploy_tests: +- test-validation/test-gke-job.yml From 72ac71ec3cca723bbfdb5465a5ade8ba4454ce98 Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Sat, 21 Dec 2024 21:11:51 +0000 Subject: [PATCH 067/140] Update A3U template design --- examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml | 35 ++++++++++--------- .../daily-tests/builds/gke-a3-ultragpu.yaml | 7 +++- .../daily-tests/tests/gke-a3-ultragpu.yml | 2 +- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml index d15f579cb7..3037132c21 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml @@ -53,21 +53,22 @@ deployment_groups: - group: primary modules: - id: gke-a3-ultra-net-0 - source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/network/vpc?ref=e0c690b + source: modules/network/vpc settings: - network_name: gke-a3-ultra-net-0 + network_name: $(vars.deployment_name)-net-0 subnetworks: - - subnet_name: gke-a3-ultra-sub-0 + - subnet_name: $(vars.deployment_name)-sub-0 subnet_region: $(vars.region) subnet_ip: 192.168.0.0/18 - secondary_ranges: - gke-a3-ultra-sub-0: + secondary_ranges_list: + - subnetwork_name: $(vars.deployment_name)-sub-0 + ranges: - range_name: pods ip_cidr_range: 10.4.0.0/14 - range_name: services ip_cidr_range: 10.0.32.0/20 firewall_rules: - - name: gke-a3-ultra-internal-0 + - name: $(vars.deployment_name)-internal-0 ranges: [192.168.0.0/16] allow: - protocol: tcp @@ -77,16 +78,16 @@ deployment_groups: - protocol: icmp - id: gke-a3-ultra-net-1 - source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/network/vpc?ref=e0c690b + source: modules/network/vpc settings: - network_name: gke-a3-ultra-net-1 + network_name: $(vars.deployment_name)-net-1 mtu: $(vars.mtu_size) subnetworks: - - subnet_name: gke-a3-ultra-sub-1 + - subnet_name: $(vars.deployment_name)-sub-1 subnet_region: $(vars.region) subnet_ip: 192.168.64.0/18 firewall_rules: - - name: gke-a3-ultra-internal-1 + - name: $(vars.deployment_name)-internal-1 ranges: [192.168.0.0/16] allow: - protocol: tcp @@ -96,20 +97,20 @@ deployment_groups: - protocol: icmp - id: gke-a3-ultra-rdma-net - source: github.com/GoogleCloudPlatform/cluster-toolkit.git//community/modules/network/rdma-vpc?ref=98c49fe + source: modules/network/gpu-rdma-vpc settings: - network_name: gke-a3-ultra-rdma-net + network_name: $(vars.deployment_name)-rdma-net mtu: $(vars.mtu_size) network_profile: https://www.googleapis.com/compute/beta/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-roce network_routing_mode: REGIONAL subnetworks_template: - name_prefix: gke-a3-ultra-rdma-sub + name_prefix: $(vars.deployment_name)-rdma-sub count: 8 ip_range: 192.168.128.0/18 region: $(vars.region) - id: a3-ultragpu-cluster - source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/scheduler/gke-cluster?ref=e0c690b + source: modules/scheduler/gke-cluster use: [gke-a3-ultra-net-0] settings: release_channel: RAPID @@ -146,7 +147,7 @@ deployment_groups: outputs: [instructions] - id: a3-ultragpu-pool - source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/compute/gke-node-pool?ref=e0c690b + source: modules/compute/gke-node-pool use: [a3-ultragpu-cluster] settings: machine_type: a3-ultragpu-8g @@ -183,11 +184,11 @@ deployment_groups: outputs: [instructions] - id: topology-aware-scheduler-install - source: github.com/GoogleCloudPlatform/cluster-toolkit.git//community/modules/compute/gke-topology-scheduler?ref=e0c690b + source: community/modules/compute/gke-topology-scheduler use: [a3-ultragpu-cluster] - id: workload-manager-install - source: github.com/GoogleCloudPlatform/cluster-toolkit.git//modules/management/kubectl-apply?ref=8c26d4a + source: modules/management/kubectl-apply use: [a3-ultragpu-cluster] settings: kueue: diff --git a/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml b/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml index c0a3cbb196..c8ffdb136e 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml @@ -16,7 +16,12 @@ tags: - m.gke-job-template - gke - +- m.gke-cluster +- m.gke-node-pool +- m.gke-topology-scheduler +- m.gpu-rdma-vpc +- m.kubectl-apply +- m.vpc timeout: 14400s # 4hr steps: diff --git a/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml b/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml index bb13b25d5c..a1dd8c72f4 100644 --- a/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml +++ b/tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml @@ -20,7 +20,7 @@ test_name: gke-a3ultra deployment_name: gke-a3ultra-{{ build }} workspace: /workspace blueprint_yaml: "{{ workspace }}/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml" -network: gke-a3-ultra-net-0 +network: "{{ deployment_name }}-net-0" region: europe-west1 zone: europe-west1-b remote_node: "{{ deployment_name }}-remote-node-0" From 9ac1bc52eac436be810e017e8c2cfaf55600b651 Mon Sep 17 00:00:00 2001 From: Sam Skillman Date: Sun, 22 Dec 2024 09:57:27 -0700 Subject: [PATCH 068/140] Update mount-daos.sh Fix network interfaces for ubuntu, similar to debian. --- .../pre-existing-network-storage/scripts/mount-daos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/file-system/pre-existing-network-storage/scripts/mount-daos.sh b/modules/file-system/pre-existing-network-storage/scripts/mount-daos.sh index 50ac2b273c..a6a133b05d 100644 --- a/modules/file-system/pre-existing-network-storage/scripts/mount-daos.sh +++ b/modules/file-system/pre-existing-network-storage/scripts/mount-daos.sh @@ -54,7 +54,7 @@ sed -i "s/.*access_points.*/access_points: $access_points/g" $daos_config # Get names of network interfaces not in first PCI slot # The first PCI slot is a standard network adapter while remaining interfaces # are typically network cards dedicated to GPU or workload communication -if [[ "$OS_ID" == "debian" ]]; then +if [[ "$OS_ID" == "debian" ]] || [[ "${OS_ID}" = "ubuntu" ]]; then extra_interfaces=$(find /sys/class/net/ -not -name 'enp0s*' -regextype posix-extended -regex '.*/enp[0-9]+s.*' -printf '"%f"\n' | paste -s -d ',') elif [[ "${OS_ID}" = "rocky" ]] || [[ "${OS_ID}" = "rhel" ]]; then extra_interfaces=$(find /sys/class/net/ -not -name eth0 -regextype posix-extended -regex '.*/eth[0-9]+' -printf '"%f"\n' | paste -s -d ',') From b39e07f979acacbc81fbe7601599f6693c43b695 Mon Sep 17 00:00:00 2001 From: Sam Skillman Date: Mon, 23 Dec 2024 01:49:45 +0000 Subject: [PATCH 069/140] Add to parallelstore scripts --- modules/file-system/parallelstore/scripts/mount-daos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/file-system/parallelstore/scripts/mount-daos.sh b/modules/file-system/parallelstore/scripts/mount-daos.sh index 50ac2b273c..a6a133b05d 100644 --- a/modules/file-system/parallelstore/scripts/mount-daos.sh +++ b/modules/file-system/parallelstore/scripts/mount-daos.sh @@ -54,7 +54,7 @@ sed -i "s/.*access_points.*/access_points: $access_points/g" $daos_config # Get names of network interfaces not in first PCI slot # The first PCI slot is a standard network adapter while remaining interfaces # are typically network cards dedicated to GPU or workload communication -if [[ "$OS_ID" == "debian" ]]; then +if [[ "$OS_ID" == "debian" ]] || [[ "${OS_ID}" = "ubuntu" ]]; then extra_interfaces=$(find /sys/class/net/ -not -name 'enp0s*' -regextype posix-extended -regex '.*/enp[0-9]+s.*' -printf '"%f"\n' | paste -s -d ',') elif [[ "${OS_ID}" = "rocky" ]] || [[ "${OS_ID}" = "rhel" ]]; then extra_interfaces=$(find /sys/class/net/ -not -name eth0 -regextype posix-extended -regex '.*/eth[0-9]+' -printf '"%f"\n' | paste -s -d ',') From 72314e77eb8f08dda67f00d0177bec555ba28682 Mon Sep 17 00:00:00 2001 From: Alyssa Date: Thu, 19 Dec 2024 03:37:12 +0000 Subject: [PATCH 070/140] Update python test deployment name to build id --- .../daily-tests/builds/slurm-gcp-v6-reconfig-size.yaml | 2 ++ .../builds/slurm-gcp-v6-simple-job-completion.yaml | 2 ++ .../daily-tests/builds/slurm-gcp-v6-topology.yaml | 2 ++ .../blueprints/slurm-simple-reconfig.yaml | 2 +- .../blueprints/slurm-simple.yaml | 2 +- .../blueprints/topology-test.yaml | 2 +- tools/python-integration-tests/deployment.py | 10 +++++++++- 7 files changed, 18 insertions(+), 4 deletions(-) diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-reconfig-size.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-reconfig-size.yaml index 8d6e390ebe..d67fd64e1e 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-reconfig-size.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-reconfig-size.yaml @@ -31,4 +31,6 @@ steps: - | set -x -e cd /workspace && make + export BUILD_ID="${BUILD_ID}" + python3 tools/python-integration-tests/slurm_reconfig_size.py diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-simple-job-completion.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-simple-job-completion.yaml index 7acd7bdc11..7a8a8f3a26 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-simple-job-completion.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-simple-job-completion.yaml @@ -31,4 +31,6 @@ steps: - | set -x -e cd /workspace && make + export BUILD_ID="${BUILD_ID}" + python3 tools/python-integration-tests/slurm_simple_job_completion.py diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-topology.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-topology.yaml index 51bfa17c71..f96bd876fc 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-topology.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-topology.yaml @@ -31,4 +31,6 @@ steps: - | set -x -e cd /workspace && make + export BUILD_ID="${BUILD_ID}" + python3 tools/python-integration-tests/slurm_topology.py diff --git a/tools/python-integration-tests/blueprints/slurm-simple-reconfig.yaml b/tools/python-integration-tests/blueprints/slurm-simple-reconfig.yaml index a9ac6d891f..a014c03c0f 100644 --- a/tools/python-integration-tests/blueprints/slurm-simple-reconfig.yaml +++ b/tools/python-integration-tests/blueprints/slurm-simple-reconfig.yaml @@ -17,7 +17,7 @@ blueprint_name: slurm-test vars: project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-test + deployment_name: ## Set Deployment Name Here ## region: us-central1 zone: us-central1-a diff --git a/tools/python-integration-tests/blueprints/slurm-simple.yaml b/tools/python-integration-tests/blueprints/slurm-simple.yaml index 235674c4d2..b0451f3cc9 100644 --- a/tools/python-integration-tests/blueprints/slurm-simple.yaml +++ b/tools/python-integration-tests/blueprints/slurm-simple.yaml @@ -17,7 +17,7 @@ blueprint_name: slurm-test vars: project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-test + deployment_name: ## Set Deployment Name Here ## region: us-central1 zone: us-central1-a diff --git a/tools/python-integration-tests/blueprints/topology-test.yaml b/tools/python-integration-tests/blueprints/topology-test.yaml index acb494c801..0dbf627e6c 100644 --- a/tools/python-integration-tests/blueprints/topology-test.yaml +++ b/tools/python-integration-tests/blueprints/topology-test.yaml @@ -17,7 +17,7 @@ blueprint_name: topology-test vars: project_id: ## Set GCP Project ID Here ## - deployment_name: topology-test + deployment_name: ## Set Deployment Name Here ## region: us-central1 zone: us-central1-a diff --git a/tools/python-integration-tests/deployment.py b/tools/python-integration-tests/deployment.py index 3ed43361b9..c0ca562656 100644 --- a/tools/python-integration-tests/deployment.py +++ b/tools/python-integration-tests/deployment.py @@ -17,6 +17,7 @@ import os import subprocess import yaml +import uuid class Deployment: def __init__(self, blueprint: str): @@ -37,7 +38,6 @@ def run_command(self, cmd: str, err_msg: str = None) -> subprocess.CompletedProc def parse_blueprint(self, file_path: str): with open(file_path, 'r') as file: content = yaml.safe_load(file) - self.deployment_name = content["vars"]["deployment_name"] self.zone = content["vars"]["zone"] def get_posixAccount_info(self): @@ -50,8 +50,16 @@ def get_posixAccount_info(self): self.project_id = account['accountId'] self.username = account['username'] + def generate_uniq_deployment_name(self): + BUILD_ID = os.environ.get('BUILD_ID') + if BUILD_ID: + return BUILD_ID[:6] + else: + return str(uuid.uuid4())[:6] + def set_deployment_variables(self): self.workspace = os.path.abspath(os.getcwd().strip()) + self.deployment_name = self.generate_uniq_deployment_name() self.parse_blueprint(self.blueprint_yaml) self.get_posixAccount_info() self.instance_name = self.deployment_name.replace("-", "")[:10] + "-slurm-login-001" From 3f0b32db45c0b3f764913ecf6faad1c90c5c92f3 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Mon, 23 Dec 2024 02:39:27 +0000 Subject: [PATCH 071/140] Add full defintion of `nodeset` to partition module --- .../schedmd-slurm-gcp-v6-partition/README.md | 2 +- .../variables.tf | 108 +++++++++++++++++- .../test_configs/node-groups.yaml | 46 -------- 3 files changed, 107 insertions(+), 49 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-partition/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-partition/README.md index 54148697bf..00aa1198e8 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-partition/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-partition/README.md @@ -85,7 +85,7 @@ No resources. | [exclusive](#input\_exclusive) | Exclusive job access to nodes. When set to true nodes execute single job and are deleted
after job exits. If set to false, multiple jobs can be scheduled on one node. | `bool` | `true` | no | | [is\_default](#input\_is\_default) | Sets this partition as the default partition by updating the partition\_conf.
If "Default" is already set in partition\_conf, this variable will have no effect. | `bool` | `false` | no | | [network\_storage](#input\_network\_storage) | DEPRECATED |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
| `[]` | no | -| [nodeset](#input\_nodeset) | A list of nodesets.
For type definition see community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf::nodeset | `list(any)` | `[]` | no | +| [nodeset](#input\_nodeset) | A list of nodesets.
For type definition see community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf::nodeset |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, false)
enable_opportunistic_maintenance = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
dws_flex = object({
enabled = bool
max_run_duration = number
use_job_duration = bool
})
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
future_reservation = string
startup_script = optional(list(object({
filename = string
content = string })), [])

zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
| `[]` | no | | [nodeset\_dyn](#input\_nodeset\_dyn) | Defines dynamic nodesets, as a list. |
list(object({
nodeset_name = string
nodeset_feature = string
}))
| `[]` | no | | [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 5)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, false)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
})), [])
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
project_id = string
reserved = optional(string, false)
}))
| `[]` | no | | [partition\_conf](#input\_partition\_conf) | Slurm partition configuration as a map.
See https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION | `map(string)` | `{}` | no | diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-partition/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v6-partition/variables.tf index e14e44b02a..5798e68878 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-partition/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-partition/variables.tf @@ -54,8 +54,112 @@ variable "nodeset" { A list of nodesets. For type definition see community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf::nodeset EOD - type = list(any) - default = [] + type = list(object({ + node_count_static = optional(number, 0) + node_count_dynamic_max = optional(number, 1) + node_conf = optional(map(string), {}) + nodeset_name = string + additional_disks = optional(list(object({ + disk_name = optional(string) + device_name = optional(string) + disk_size_gb = optional(number) + disk_type = optional(string) + disk_labels = optional(map(string), {}) + auto_delete = optional(bool, true) + boot = optional(bool, false) + })), []) + bandwidth_tier = optional(string, "platform_default") + can_ip_forward = optional(bool, false) + disable_smt = optional(bool, false) + disk_auto_delete = optional(bool, true) + disk_labels = optional(map(string), {}) + disk_size_gb = optional(number) + disk_type = optional(string) + enable_confidential_vm = optional(bool, false) + enable_placement = optional(bool, false) + enable_oslogin = optional(bool, true) + enable_shielded_vm = optional(bool, false) + enable_maintenance_reservation = optional(bool, false) + enable_opportunistic_maintenance = optional(bool, false) + gpu = optional(object({ + count = number + type = string + })) + dws_flex = object({ + enabled = bool + max_run_duration = number + use_job_duration = bool + }) + labels = optional(map(string), {}) + machine_type = optional(string) + maintenance_interval = optional(string) + instance_properties_json = string + metadata = optional(map(string), {}) + min_cpu_platform = optional(string) + network_tier = optional(string, "STANDARD") + network_storage = optional(list(object({ + server_ip = string + remote_mount = string + local_mount = string + fs_type = string + mount_options = string + client_install_runner = optional(map(string)) + mount_runner = optional(map(string)) + })), []) + on_host_maintenance = optional(string) + preemptible = optional(bool, false) + region = optional(string) + service_account = optional(object({ + email = optional(string) + scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"]) + })) + shielded_instance_config = optional(object({ + enable_integrity_monitoring = optional(bool, true) + enable_secure_boot = optional(bool, true) + enable_vtpm = optional(bool, true) + })) + source_image_family = optional(string) + source_image_project = optional(string) + source_image = optional(string) + subnetwork_self_link = string + additional_networks = optional(list(object({ + network = string + subnetwork = string + subnetwork_project = string + network_ip = string + nic_type = string + stack_type = string + queue_count = number + access_config = list(object({ + nat_ip = string + network_tier = string + })) + ipv6_access_config = list(object({ + network_tier = string + })) + alias_ip_range = list(object({ + ip_cidr_range = string + subnetwork_range_name = string + })) + }))) + access_config = optional(list(object({ + nat_ip = string + network_tier = string + }))) + spot = optional(bool, false) + tags = optional(list(string), []) + termination_action = optional(string) + reservation_name = optional(string) + future_reservation = string + startup_script = optional(list(object({ + filename = string + content = string })), []) + + zone_target_shape = string + zone_policy_allow = set(string) + zone_policy_deny = set(string) + })) + default = [] validation { condition = length(distinct(var.nodeset[*].nodeset_name)) == length(var.nodeset) diff --git a/tools/validate_configs/test_configs/node-groups.yaml b/tools/validate_configs/test_configs/node-groups.yaml index 962d1e3130..ef512993c7 100644 --- a/tools/validate_configs/test_configs/node-groups.yaml +++ b/tools/validate_configs/test_configs/node-groups.yaml @@ -107,51 +107,6 @@ deployment_groups: settings: partition_name: multns - ## Explicitly set node partition with one nodeset - - id: single_nodeset_explicit_partition - source: community/modules/compute/schedmd-slurm-gcp-v6-partition - settings: - partition_name: explns - is_default: true - nodeset: - - nodeset_name: expl - node_count_static: 0 - node_count_dynamic_max: 4 - enable_placement: false - node_conf: {} - additional_disks: [] - additional_networks: [] - bandwidth_tier: null - can_ip_forward: false - enable_smt: true - disk_auto_delete: true - disk_labels: {} - disk_size_gb: 50 - disk_type: pd-standard - enable_confidential_vm: false - enable_oslogin: true - enable_shielded_vm: false - enable_spot_vm: false - gpu: null - instance_template: null - labels: $(vars.labels) - machine_type: n2-standard-16 - maintenance_interval: "" - metadata: {} - min_cpu_platform: null - on_host_maintenance: TERMINATE - preemptible: false - reservation_name: null # will be replaced by default value empty string - service_account_email: null - shielded_instance_config: null - subnetwork_self_link: $(network.subnetwork_self_link) - spot_instance_config: null - source_image_family: null - source_image_project: null - source_image: null - tags: [] - access_config: [] - - id: slurm_login source: community/modules/scheduler/schedmd-slurm-gcp-v6-login use: [network] @@ -165,7 +120,6 @@ deployment_groups: - network - single_nodeset_partition - multiple_nodesets - - single_nodeset_explicit_partition - homefs - slurm_login settings: From d76cfe2d716550aa3834a85229e0ba359acda4a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wiktor=20Niesiob=C4=99dzki?= Date: Thu, 17 Oct 2024 09:19:49 +0000 Subject: [PATCH 072/140] Use templates for DAOS mounts --- modules/file-system/parallelstore/README.md | 26 +++++++ modules/file-system/parallelstore/main.tf | 13 ++-- .../scripts/install-daos-client.sh | 1 + .../mount-daos.sh.tftpl} | 70 ++++++++----------- .../file-system/parallelstore/variables.tf | 14 ++++ .../pre-existing-network-storage/README.md | 33 +++++++++ .../pre-existing-network-storage/outputs.tf | 12 +++- .../scripts/install-daos-client.sh | 1 + .../mount-daos.sh.tftpl} | 70 ++++++++----------- .../pre-existing-network-storage/variables.tf | 10 +++ .../templates/startup-script-custom.tftpl | 2 +- tools/duplicate-diff.py | 4 +- 12 files changed, 168 insertions(+), 88 deletions(-) rename modules/file-system/parallelstore/{scripts/mount-daos.sh => templates/mount-daos.sh.tftpl} (55%) rename modules/file-system/pre-existing-network-storage/{scripts/mount-daos.sh => templates/mount-daos.sh.tftpl} (55%) diff --git a/modules/file-system/parallelstore/README.md b/modules/file-system/parallelstore/README.md index 25c84eeaac..4a5896e217 100644 --- a/modules/file-system/parallelstore/README.md +++ b/modules/file-system/parallelstore/README.md @@ -94,6 +94,30 @@ Here you can replace `import_gcs_bucket_uri` with the uri of sub folder within G bucket and `import_destination_path` with local directory within parallelstore instance. +### Additional configuration for DAOS agent and dfuse +Use `daos_agent_config` to provide additional configuration for `daos_agent`, for example: + +```yaml +- id: parallelstorefs + source: modules/file-system/pre-existing-network-storage + settings: + daos_agent_config: | + credential_config: + cache_expiration: 1m +``` + +Use `dfuse_environment` to provide additional environment variables for `dfuse` process, for example: + +```yaml +- id: parallelstorefs + source: modules/file-system/parallelstore + settings: + dfuse_environment: + D_LOG_FILE: /tmp/client.log + D_APPEND_PID_TO_LOG: 1 + D_LOG_MASK: debug +``` + Copyright 2024 Google LLC @@ -142,7 +166,9 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [daos\_agent\_config](#input\_daos\_agent\_config) | Additional configuration to be added to daos\_config.yml | `string` | `""` | no | | [deployment\_name](#input\_deployment\_name) | Name of the HPC deployment. | `string` | n/a | yes | +| [dfuse\_environment](#input\_dfuse\_environment) | Additional environment variables for DFuse process | `map(string)` | `{}` | no | | [directory\_stripe](#input\_directory\_stripe) | The parallelstore stripe level for directories. | `string` | `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"` | no | | [file\_stripe](#input\_file\_stripe) | The parallelstore stripe level for files. | `string` | `"FILE_STRIPE_LEVEL_UNSPECIFIED"` | no | | [import\_destination\_path](#input\_import\_destination\_path) | The name of local path to import data on parallelstore instance from GCS bucket. | `string` | `null` | no | diff --git a/modules/file-system/parallelstore/main.tf b/modules/file-system/parallelstore/main.tf index c09de17a2e..acc2a0551e 100644 --- a/modules/file-system/parallelstore/main.tf +++ b/modules/file-system/parallelstore/main.tf @@ -34,10 +34,15 @@ locals { } mount_runner = { - "type" = "shell" - "source" = "${path.module}/scripts/mount-daos.sh" - "args" = "--access_points=\"${local.access_points}\" --local_mount=\"${var.local_mount}\" --mount_options=\"${var.mount_options}\"" - "destination" = "mount_daos.sh" + "type" = "shell" + "content" = templatefile("${path.module}/templates/mount-daos.sh.tftpl", { + access_points = local.access_points + daos_agent_config = var.daos_agent_config + dfuse_environment = var.dfuse_environment + local_mount = var.local_mount + mount_options = join(" ", [for opt in split(",", var.mount_options) : "--${opt}"]) + }) + "destination" = "mount_filesystem${replace(var.local_mount, "/", "_")}.sh" } } diff --git a/modules/file-system/parallelstore/scripts/install-daos-client.sh b/modules/file-system/parallelstore/scripts/install-daos-client.sh index 22ec324af7..e96eadb56a 100644 --- a/modules/file-system/parallelstore/scripts/install-daos-client.sh +++ b/modules/file-system/parallelstore/scripts/install-daos-client.sh @@ -50,6 +50,7 @@ else if [ -x /usr/bin/google_disable_automatic_updates ]; then /usr/bin/google_disable_automatic_updates fi + dnf clean all dnf makecache # 2) Install daos-client diff --git a/modules/file-system/parallelstore/scripts/mount-daos.sh b/modules/file-system/parallelstore/templates/mount-daos.sh.tftpl similarity index 55% rename from modules/file-system/parallelstore/scripts/mount-daos.sh rename to modules/file-system/parallelstore/templates/mount-daos.sh.tftpl index a6a133b05d..c6f5d53660 100644 --- a/modules/file-system/parallelstore/scripts/mount-daos.sh +++ b/modules/file-system/parallelstore/templates/mount-daos.sh.tftpl @@ -20,59 +20,48 @@ OS_VERSION=$(awk -F '=' '/VERSION_ID/ {print $2}' /etc/os-release | sed -e 's/"/ OS_VERSION_MAJOR=$(awk -F '=' '/VERSION_ID/ {print $2}' /etc/os-release | sed -e 's/"//g' -e 's/\..*$//') if ! { - { [[ "${OS_ID}" = "rocky" ]] || [[ "${OS_ID}" = "rhel" ]]; } && { [[ "${OS_VERSION_MAJOR}" = "8" ]] || [[ "${OS_VERSION_MAJOR}" = "9" ]]; } || - { [[ "${OS_ID}" = "ubuntu" ]] && [[ "${OS_VERSION}" = "22.04" ]]; } || - { [[ "${OS_ID}" = "debian" ]] && [[ "${OS_VERSION_MAJOR}" = "12" ]]; } + { [[ "$${OS_ID}" = "rocky" ]] || [[ "$${OS_ID}" = "rhel" ]]; } && { [[ "$${OS_VERSION_MAJOR}" = "8" ]] || [[ "$${OS_VERSION_MAJOR}" = "9" ]]; } || + { [[ "$${OS_ID}" = "ubuntu" ]] && [[ "$${OS_VERSION}" = "22.04" ]]; } || + { [[ "$${OS_ID}" = "debian" ]] && [[ "$${OS_VERSION_MAJOR}" = "12" ]]; } }; then - echo "Unsupported operating system ${OS_ID} ${OS_VERSION}. This script only supports Rocky Linux 8, Redhat 8, Redhat 9, Ubuntu 22.04, and Debian 12." + echo "Unsupported operating system $${OS_ID} $${OS_VERSION}. This script only supports Rocky Linux 8, Redhat 8, Redhat 9, Ubuntu 22.04, and Debian 12." exit 1 fi -# Parse local_mount, mount_options from argument. -# Format mount-options string to be compatible to dfuse mount command. -# e.g. "disable-wb-cache,eq-count=8" --> --disable-wb-cache --eq-count=8. -for arg in "$@"; do - if [[ $arg == --access_points=* ]]; then - access_points="${arg#*=}" - fi - if [[ $arg == --local_mount=* ]]; then - local_mount="${arg#*=}" - fi - if [[ $arg == --mount_options=* ]]; then - mount_options="${arg#*=}" - mount_options="--${mount_options//,/ --}" - fi -done - # Edit agent config daos_config=/etc/daos/daos_agent.yml -sed -i "s/#.*transport_config/transport_config/g" $daos_config -sed -i "s/#.*allow_insecure:.*false/ allow_insecure: true/g" $daos_config -sed -i "s/.*access_points.*/access_points: $access_points/g" $daos_config +# rewrite $daos_config from scratch +mv $${daos_config} $${daos_config}.orig + +exclude_fabric_ifaces="" # Get names of network interfaces not in first PCI slot # The first PCI slot is a standard network adapter while remaining interfaces # are typically network cards dedicated to GPU or workload communication -if [[ "$OS_ID" == "debian" ]] || [[ "${OS_ID}" = "ubuntu" ]]; then +if [[ "$${OS_ID}" == "debian" ]] || [[ "$${OS_ID}" = "ubuntu" ]]; then extra_interfaces=$(find /sys/class/net/ -not -name 'enp0s*' -regextype posix-extended -regex '.*/enp[0-9]+s.*' -printf '"%f"\n' | paste -s -d ',') -elif [[ "${OS_ID}" = "rocky" ]] || [[ "${OS_ID}" = "rhel" ]]; then +elif [[ "$${OS_ID}" = "rocky" ]] || [[ "$${OS_ID}" = "rhel" ]]; then extra_interfaces=$(find /sys/class/net/ -not -name eth0 -regextype posix-extended -regex '.*/eth[0-9]+' -printf '"%f"\n' | paste -s -d ',') fi -if [[ -n "$extra_interfaces" ]]; then - exclude_fabric_ifaces="\"lo\",$extra_interfaces" - sed -i "s/#.*exclude_fabric_ifaces: \[.*/exclude_fabric_ifaces: [$exclude_fabric_ifaces]/" $daos_config -fi +cat > $daos_config </etc/systemd/system/"${service_name}" </etc/systemd/system/"$${service_name}" < [fs\_type](#input\_fs\_type) | Type of file system to be mounted (e.g., nfs, lustre) | `string` | `"nfs"` | no | | [local\_mount](#input\_local\_mount) | The mount point where the contents of the device may be accessed after mounting. | `string` | `"/mnt"` | no | | [mount\_options](#input\_mount\_options) | Options describing various aspects of the file system. Consider adding setting to 'defaults,\_netdev,implicit\_dirs' when using gcsfuse. | `string` | `"defaults,_netdev"` | no | +| [parallelstore\_options](#input\_parallelstore\_options) | Parallelstore specific options |
object({
daos_agent_config = optional(string, "")
dfuse_environment = optional(map(string), {})
})
| `{}` | no | | [remote\_mount](#input\_remote\_mount) | Remote FS name or export. This is the exported directory for nfs, fs name for lustre, and bucket name (without gs://) for gcsfuse. | `string` | n/a | yes | | [server\_ip](#input\_server\_ip) | The device name as supplied to fs-tab, excluding remote fs-name(for nfs, that is the server IP, for lustre [:]). This can be omitted for gcsfuse. | `string` | `""` | no | diff --git a/modules/file-system/pre-existing-network-storage/outputs.tf b/modules/file-system/pre-existing-network-storage/outputs.tf index 9e93226804..df92f7c315 100644 --- a/modules/file-system/pre-existing-network-storage/outputs.tf +++ b/modules/file-system/pre-existing-network-storage/outputs.tf @@ -83,9 +83,15 @@ locals { } mount_runner_daos = { - "type" = "shell" - "content" = file("${path.module}/scripts/mount-daos.sh") - "args" = "--access_points=\"${var.remote_mount}\" --local_mount=\"${var.local_mount}\" --mount_options=\"${var.mount_options}\"" + "type" = "shell" + "content" = templatefile("${path.module}/templates/mount-daos.sh.tftpl", { + access_points = var.remote_mount + daos_agent_config = var.parallelstore_options.daos_agent_config + dfuse_environment = var.parallelstore_options.dfuse_environment + local_mount = var.local_mount + # avoid passing "--" as mount option to dfuse + mount_options = length(var.mount_options) == 0 ? "" : join(" ", [for opt in split(",", var.mount_options) : "--${opt}"]) + }) "destination" = "mount_filesystem${replace(var.local_mount, "/", "_")}.sh" } diff --git a/modules/file-system/pre-existing-network-storage/scripts/install-daos-client.sh b/modules/file-system/pre-existing-network-storage/scripts/install-daos-client.sh index 22ec324af7..e96eadb56a 100644 --- a/modules/file-system/pre-existing-network-storage/scripts/install-daos-client.sh +++ b/modules/file-system/pre-existing-network-storage/scripts/install-daos-client.sh @@ -50,6 +50,7 @@ else if [ -x /usr/bin/google_disable_automatic_updates ]; then /usr/bin/google_disable_automatic_updates fi + dnf clean all dnf makecache # 2) Install daos-client diff --git a/modules/file-system/pre-existing-network-storage/scripts/mount-daos.sh b/modules/file-system/pre-existing-network-storage/templates/mount-daos.sh.tftpl similarity index 55% rename from modules/file-system/pre-existing-network-storage/scripts/mount-daos.sh rename to modules/file-system/pre-existing-network-storage/templates/mount-daos.sh.tftpl index a6a133b05d..c6f5d53660 100644 --- a/modules/file-system/pre-existing-network-storage/scripts/mount-daos.sh +++ b/modules/file-system/pre-existing-network-storage/templates/mount-daos.sh.tftpl @@ -20,59 +20,48 @@ OS_VERSION=$(awk -F '=' '/VERSION_ID/ {print $2}' /etc/os-release | sed -e 's/"/ OS_VERSION_MAJOR=$(awk -F '=' '/VERSION_ID/ {print $2}' /etc/os-release | sed -e 's/"//g' -e 's/\..*$//') if ! { - { [[ "${OS_ID}" = "rocky" ]] || [[ "${OS_ID}" = "rhel" ]]; } && { [[ "${OS_VERSION_MAJOR}" = "8" ]] || [[ "${OS_VERSION_MAJOR}" = "9" ]]; } || - { [[ "${OS_ID}" = "ubuntu" ]] && [[ "${OS_VERSION}" = "22.04" ]]; } || - { [[ "${OS_ID}" = "debian" ]] && [[ "${OS_VERSION_MAJOR}" = "12" ]]; } + { [[ "$${OS_ID}" = "rocky" ]] || [[ "$${OS_ID}" = "rhel" ]]; } && { [[ "$${OS_VERSION_MAJOR}" = "8" ]] || [[ "$${OS_VERSION_MAJOR}" = "9" ]]; } || + { [[ "$${OS_ID}" = "ubuntu" ]] && [[ "$${OS_VERSION}" = "22.04" ]]; } || + { [[ "$${OS_ID}" = "debian" ]] && [[ "$${OS_VERSION_MAJOR}" = "12" ]]; } }; then - echo "Unsupported operating system ${OS_ID} ${OS_VERSION}. This script only supports Rocky Linux 8, Redhat 8, Redhat 9, Ubuntu 22.04, and Debian 12." + echo "Unsupported operating system $${OS_ID} $${OS_VERSION}. This script only supports Rocky Linux 8, Redhat 8, Redhat 9, Ubuntu 22.04, and Debian 12." exit 1 fi -# Parse local_mount, mount_options from argument. -# Format mount-options string to be compatible to dfuse mount command. -# e.g. "disable-wb-cache,eq-count=8" --> --disable-wb-cache --eq-count=8. -for arg in "$@"; do - if [[ $arg == --access_points=* ]]; then - access_points="${arg#*=}" - fi - if [[ $arg == --local_mount=* ]]; then - local_mount="${arg#*=}" - fi - if [[ $arg == --mount_options=* ]]; then - mount_options="${arg#*=}" - mount_options="--${mount_options//,/ --}" - fi -done - # Edit agent config daos_config=/etc/daos/daos_agent.yml -sed -i "s/#.*transport_config/transport_config/g" $daos_config -sed -i "s/#.*allow_insecure:.*false/ allow_insecure: true/g" $daos_config -sed -i "s/.*access_points.*/access_points: $access_points/g" $daos_config +# rewrite $daos_config from scratch +mv $${daos_config} $${daos_config}.orig + +exclude_fabric_ifaces="" # Get names of network interfaces not in first PCI slot # The first PCI slot is a standard network adapter while remaining interfaces # are typically network cards dedicated to GPU or workload communication -if [[ "$OS_ID" == "debian" ]] || [[ "${OS_ID}" = "ubuntu" ]]; then +if [[ "$${OS_ID}" == "debian" ]] || [[ "$${OS_ID}" = "ubuntu" ]]; then extra_interfaces=$(find /sys/class/net/ -not -name 'enp0s*' -regextype posix-extended -regex '.*/enp[0-9]+s.*' -printf '"%f"\n' | paste -s -d ',') -elif [[ "${OS_ID}" = "rocky" ]] || [[ "${OS_ID}" = "rhel" ]]; then +elif [[ "$${OS_ID}" = "rocky" ]] || [[ "$${OS_ID}" = "rhel" ]]; then extra_interfaces=$(find /sys/class/net/ -not -name eth0 -regextype posix-extended -regex '.*/eth[0-9]+' -printf '"%f"\n' | paste -s -d ',') fi -if [[ -n "$extra_interfaces" ]]; then - exclude_fabric_ifaces="\"lo\",$extra_interfaces" - sed -i "s/#.*exclude_fabric_ifaces: \[.*/exclude_fabric_ifaces: [$exclude_fabric_ifaces]/" $daos_config -fi +cat > $daos_config </etc/systemd/system/"${service_name}" </etc/systemd/system/"$${service_name}" < Date: Mon, 23 Dec 2024 21:48:13 +0000 Subject: [PATCH 073/140] add null checks to placement policy checks --- modules/compute/gke-node-pool/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 9a09712097..0d429165f3 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -308,7 +308,7 @@ resource "google_container_node_pool" "node_pool" { error_message = "At least one of max_unavailable or max_surge must greater than 0" } precondition { - condition = var.placement_policy.type != "COMPACT" || length(var.zones) == 1 + condition = var.placement_policy.type != "COMPACT" || (var.zones != null ? (length(var.zones) == 1) : false) error_message = "Compact placement is only available for node pools operating in a single zone." } precondition { From 3e333a7e2916338ec0addc7cc298a914eca0b842 Mon Sep 17 00:00:00 2001 From: ighosh98 Date: Tue, 24 Dec 2024 09:06:47 +0000 Subject: [PATCH 074/140] make upgrade settings configurable for gke-cluster --- modules/scheduler/gke-cluster/README.md | 1 + modules/scheduler/gke-cluster/main.tf | 29 ++++++++++++++++++++-- modules/scheduler/gke-cluster/variables.tf | 18 ++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/modules/scheduler/gke-cluster/README.md b/modules/scheduler/gke-cluster/README.md index 675039add6..a850202096 100644 --- a/modules/scheduler/gke-cluster/README.md +++ b/modules/scheduler/gke-cluster/README.md @@ -191,6 +191,7 @@ limitations under the License. | [system\_node\_pool\_taints](#input\_system\_node\_pool\_taints) | Taints to be applied to the system node pool. |
list(object({
key = string
value = any
effect = string
}))
|
[
{
"effect": "NO_SCHEDULE",
"key": "components.gke.io/gke-managed-components",
"value": true
}
]
| no | | [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no | | [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no | +| [upgrade\_settings](#input\_upgrade\_settings) | Defines gke cluster upgrade settings. It is highly recommended that you define all max\_surge and max\_unavailable.
If max\_surge is not specified, it would be set to a default value of 0.
If max\_unavailable is not specified, it would be set to a default value of 1. |
object({
strategy = string
max_surge = optional(number)
max_unavailable = optional(number)
})
|
{
"max_surge": 0,
"max_unavailable": 1,
"strategy": "SURGE"
}
| no | | [zone](#input\_zone) | Zone for a zonal cluster. | `string` | `null` | no | ## Outputs diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 5b416a85bb..621189b19a 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -19,6 +19,14 @@ locals { labels = merge(var.labels, { ghpc_module = "gke-cluster", ghpc_role = "scheduler" }) } +locals { + upgrade_settings = { + strategy = var.upgrade_settings.strategy + max_surge = coalesce(var.upgrade_settings.max_surge, 0) + max_unavailable = coalesce(var.upgrade_settings.max_unavailable, 1) + } +} + locals { dash = var.prefix_with_deployment_name && var.name_suffix != "" ? "-" : "" prefix = var.prefix_with_deployment_name ? var.deployment_name : "" @@ -243,8 +251,9 @@ resource "google_container_node_pool" "system_node_pools" { } upgrade_settings { - max_surge = 1 - max_unavailable = 0 + strategy = local.upgrade_settings.strategy + max_surge = local.upgrade_settings.max_surge + max_unavailable = local.upgrade_settings.max_unavailable } management { @@ -304,6 +313,22 @@ resource "google_container_node_pool" "system_node_pools" { node_config[0].labels, node_config[0].taint, ] + precondition { + condition = contains(["SURGE"], local.upgrade_settings.strategy) + error_message = "Only SURGE strategy is supported" + } + precondition { + condition = local.upgrade_settings.max_unavailable >= 0 + error_message = "max_unavailable should be set to 0 or greater" + } + precondition { + condition = local.upgrade_settings.max_surge >= 0 + error_message = "max_surge should be set to 0 or greater" + } + precondition { + condition = local.upgrade_settings.max_unavailable > 0 || local.upgrade_settings.max_surge > 0 + error_message = "At least one of max_unavailable or max_surge must greater than 0" + } } } diff --git a/modules/scheduler/gke-cluster/variables.tf b/modules/scheduler/gke-cluster/variables.tf index 58bf197763..9b807e7826 100644 --- a/modules/scheduler/gke-cluster/variables.tf +++ b/modules/scheduler/gke-cluster/variables.tf @@ -407,3 +407,21 @@ variable "deletion_protection" { type = bool default = false } + +variable "upgrade_settings" { + description = <<-EOT + Defines gke cluster upgrade settings. It is highly recommended that you define all max_surge and max_unavailable. + If max_surge is not specified, it would be set to a default value of 0. + If max_unavailable is not specified, it would be set to a default value of 1. + EOT + type = object({ + strategy = string + max_surge = optional(number) + max_unavailable = optional(number) + }) + default = { + strategy = "SURGE" + max_surge = 0 + max_unavailable = 1 + } +} From 4969446bf028a1d92e7be5e99c1a0dcb83e71780 Mon Sep 17 00:00:00 2001 From: chengcongdu Date: Thu, 26 Dec 2024 19:23:00 +0000 Subject: [PATCH 075/140] update a3mega nccl plugin to 1.0.7 and rxdm to 1.0.13_1 --- modules/compute/gke-node-pool/gpu_direct.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/compute/gke-node-pool/gpu_direct.tf b/modules/compute/gke-node-pool/gpu_direct.tf index 9403ea34fc..8f0e67a9b2 100644 --- a/modules/compute/gke-node-pool/gpu_direct.tf +++ b/modules/compute/gke-node-pool/gpu_direct.tf @@ -43,11 +43,11 @@ locals { "a3-megagpu-8g" = { # Manifest to be installed for enabling TCPXO on a3-megagpu-8g machines gpu_direct_manifests = [ - "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/fee883360a660f71ba07478db95d5c1325322f77/gpudirect-tcpxo/nccl-tcpxo-installer.yaml", # nccl_plugin v1.0.4 for tcpxo - "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/fee883360a660f71ba07478db95d5c1325322f77/nri_device_injector/nri-device-injector.yaml", # nri_plugin + "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/b324ec8994aa98ca320438dd2d01ff6d7f9165bb/gpudirect-tcpxo/nccl-tcpxo-installer.yaml", # nccl_plugin v1.0.7 for tcpxo + "https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/b324ec8994aa98ca320438dd2d01ff6d7f9165bb/nri_device_injector/nri-device-injector.yaml", # nri_plugin ] updated_workload_path = replace(local.workload_path_tcpxo, ".yaml", "-tcpxo.yaml") - rxdm_version = "v1.0.10" # matching nccl-tcpxo-installer version v1.0.4 + rxdm_version = "v1.0.13_1" # matching nccl-tcpxo-installer version v1.0.7 min_additional_networks = 8 major_minor_version_acceptable_map = { "1.28" = "1.28.9-gke.1250000" From 80ad9b13bc6128a7c291ffb1e58657e05af3987c Mon Sep 17 00:00:00 2001 From: chengcongdu Date: Mon, 30 Dec 2024 21:35:07 +0000 Subject: [PATCH 076/140] add GKE support for managed hyperdisk --- examples/README.md | 24 ++ examples/gke-managed-hyperdisk.yaml | 218 ++++++++++++++++++ modules/file-system/gke-storage/README.md | 2 +- .../hyperdisk-balanced-pvc.yaml.tftpl | 15 ++ .../hyperdisk-extreme-pvc.yaml.tftpl | 15 ++ .../hyperdisk-throughput-pvc.yaml.tftpl | 15 ++ .../hyperdisk-balanced-sc.yaml.tftpl | 25 ++ .../hyperdisk-extreme-sc.yaml.tftpl | 24 ++ .../hyperdisk-throughput-sc.yaml.tftpl | 24 ++ modules/file-system/gke-storage/variables.tf | 7 +- .../test-gke-managed-hyperdisk.yml | 41 ++++ .../builds/gke-managed-hyperdisk.yaml | 55 +++++ .../tests/gke-managed-hyperdisk.yml | 29 +++ 13 files changed, 491 insertions(+), 3 deletions(-) create mode 100644 examples/gke-managed-hyperdisk.yaml create mode 100644 modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-balanced-pvc.yaml.tftpl create mode 100644 modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-extreme-pvc.yaml.tftpl create mode 100644 modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-throughput-pvc.yaml.tftpl create mode 100644 modules/file-system/gke-storage/storage-class/hyperdisk-balanced-sc.yaml.tftpl create mode 100644 modules/file-system/gke-storage/storage-class/hyperdisk-extreme-sc.yaml.tftpl create mode 100644 modules/file-system/gke-storage/storage-class/hyperdisk-throughput-sc.yaml.tftpl create mode 100644 tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-managed-hyperdisk.yml create mode 100644 tools/cloud-build/daily-tests/builds/gke-managed-hyperdisk.yaml create mode 100644 tools/cloud-build/daily-tests/tests/gke-managed-hyperdisk.yml diff --git a/examples/README.md b/examples/README.md index 29db27df94..a1d3d0c589 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1518,6 +1518,30 @@ cleaned up when the job is deleted. [storage-gke.yaml]: ../examples/storage-gke.yaml +### [gke-managed-hyperdisk.yaml] ![core-badge] ![experimental-badge] + +This blueprint shows how to use managed hyperdisk storage options with GKE in the toolkit. + +The blueprint contains the following: + +* A K8s Job that uses a managed hyperdisk storage volume option. +* A K8s Job that demonstrates ML training workload with managed hyperdisk storage disk operation. + +> **Warning**: In this example blueprint, when storage type `Hyperdisk-balanced`, `Hyperdisk-extreme` or `Hyperdisk-throughput` is specified in `gke-storage` module. +> The lifecycle of the hyperdisk is managed by the blueprint. +> On glcuster destroy operation, the hyperdisk storage created will also be destroyed. +> +> [!Note] +> The Kubernetes API server will only allow requests from authorized networks. +> The `gke-cluster` module needs access to the Kubernetes API server +> to create a Persistent Volume and a Persistent Volume Claim. **You must use +> the `authorized_cidr` variable to supply an authorized network which contains +> the IP address of the machine deploying the blueprint, for example +> `--vars authorized_cidr=/32`.** You can use a service like +> [whatismyip.com](https://whatismyip.com) to determine your IP address. + +[gke-managed-hyperdisk.yaml]: ../examples/gke-managed-hyperdisk.yaml + ### [gke-managed-parallelstore.yaml] ![core-badge] ![experimental-badge] This blueprint shows how to use managed parallelstore storage options with GKE in the toolkit. diff --git a/examples/gke-managed-hyperdisk.yaml b/examples/gke-managed-hyperdisk.yaml new file mode 100644 index 0000000000..12c8063026 --- /dev/null +++ b/examples/gke-managed-hyperdisk.yaml @@ -0,0 +1,218 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +blueprint_name: gke-storage-hyperdisk +vars: + project_id: ## Set GCP Project ID Here ## + deployment_name: gke-storage-hyperdisk + region: us-central1 + zone: us-central1-c + + # Cidr block containing the IP of the machine calling terraform. + # The following line must be updated for this example to work. + authorized_cidr: /32 + +deployment_groups: +- group: primary + modules: + - id: network + source: modules/network/vpc + settings: + subnetwork_name: gke-subnet-hyperdisk + secondary_ranges: + gke-subnet-hyperdisk: + - range_name: pods + ip_cidr_range: 10.4.0.0/14 + - range_name: services + ip_cidr_range: 10.0.32.0/20 + + - id: gke_cluster + source: modules/scheduler/gke-cluster + use: [network] + settings: + enable_persistent_disk_csi: true # enable Hyperdisk for the cluster + configure_workload_identity_sa: true + enable_private_endpoint: false # Allows for access from authorized public IPs + master_authorized_networks: + - display_name: deployment-machine + cidr_block: $(vars.authorized_cidr) + outputs: [instructions] + + ### Set up storage class and persistent volume claim for Hyperdisk ### + - id: hyperdisk-balanced-setup + source: modules/file-system/gke-storage + use: [gke_cluster] + settings: + storage_type: Hyperdisk-balanced + access_mode: ReadWriteOnce + sc_volume_binding_mode: Immediate + sc_reclaim_policy: Delete + sc_topology_zones: [$(vars.zone)] + pvc_count: 1 + capacity_gb: 100 + + - id: hyperdisk-throughput-setup + source: modules/file-system/gke-storage + use: [gke_cluster] + settings: + storage_type: Hyperdisk-throughput + access_mode: ReadWriteOnce + sc_volume_binding_mode: Immediate + sc_reclaim_policy: Delete + sc_topology_zones: [$(vars.zone)] + pvc_count: 1 + capacity_gb: 5000 + + - id: hyperdisk-extreme-setup + source: modules/file-system/gke-storage + use: [gke_cluster] + settings: + storage_type: Hyperdisk-extreme + access_mode: ReadWriteOnce + sc_volume_binding_mode: Immediate + sc_reclaim_policy: Delete + sc_topology_zones: [$(vars.zone)] + pvc_count: 1 + capacity_gb: 100 + + - id: sample-pool + source: modules/compute/gke-node-pool + use: [gke_cluster] + settings: + name: sample-pool + zones: [$(vars.zone)] + machine_type: c3-standard-88 # Hyperdisk-extreme required C3 machine with 88 or more vCPUs + + # Train a TensorFlow model with Keras and Hyperdisk Balanced on GKE + # Tutorial: https://cloud.google.com/parallelstore/docs/tensorflow-sample + - id: hyperdisk-balanced-job + source: modules/compute/gke-job-template + use: + - gke_cluster + - hyperdisk-balanced-setup + settings: + name: tensorflow + image: jupyter/tensorflow-notebook@sha256:173f124f638efe870bb2b535e01a76a80a95217e66ed00751058c51c09d6d85d + security_context: # to make sure the job have enough access to execute the jobs and r/w from hyperdisk + - key: runAsUser + value: 1000 + - key: runAsGroup + value: 100 + - key: fsGroup + value: 100 + command: + - bash + - -c + - | + pip install transformers datasets + python - < [sc\_reclaim\_policy](#input\_sc\_reclaim\_policy) | Indicate whether to keep the dynamically provisioned PersistentVolumes of this storage class after the bound PersistentVolumeClaim is deleted.
[More details about reclaiming](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#reclaiming)
Supported value:
- Retain
- Delete | `string` | n/a | yes | | [sc\_topology\_zones](#input\_sc\_topology\_zones) | Zone location that allow the volumes to be dynamically provisioned. | `list(string)` | `null` | no | | [sc\_volume\_binding\_mode](#input\_sc\_volume\_binding\_mode) | Indicates when volume binding and dynamic provisioning should occur and how PersistentVolumeClaims should be provisioned and bound.
Supported value:
- Immediate
- WaitForFirstConsumer | `string` | `"WaitForFirstConsumer"` | no | -| [storage\_type](#input\_storage\_type) | The type of [GKE supported storage options](https://cloud.google.com/kubernetes-engine/docs/concepts/storage-overview)
to used. This module currently support dynamic provisioning for the below storage options
- Parallelstore | `string` | n/a | yes | +| [storage\_type](#input\_storage\_type) | The type of [GKE supported storage options](https://cloud.google.com/kubernetes-engine/docs/concepts/storage-overview)
to used. This module currently support dynamic provisioning for the below storage options
- Parallelstore
- Hyperdisk-balanced
- Hyperdisk-throughput
- Hyperdisk-extreme | `string` | n/a | yes | ## Outputs diff --git a/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-balanced-pvc.yaml.tftpl b/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-balanced-pvc.yaml.tftpl new file mode 100644 index 0000000000..32781be2fb --- /dev/null +++ b/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-balanced-pvc.yaml.tftpl @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ${pvc_name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +spec: + accessModes: + - ${access_mode} + resources: + requests: + storage: ${capacity} + storageClassName: ${storage_class_name} diff --git a/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-extreme-pvc.yaml.tftpl b/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-extreme-pvc.yaml.tftpl new file mode 100644 index 0000000000..32781be2fb --- /dev/null +++ b/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-extreme-pvc.yaml.tftpl @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ${pvc_name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +spec: + accessModes: + - ${access_mode} + resources: + requests: + storage: ${capacity} + storageClassName: ${storage_class_name} diff --git a/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-throughput-pvc.yaml.tftpl b/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-throughput-pvc.yaml.tftpl new file mode 100644 index 0000000000..32781be2fb --- /dev/null +++ b/modules/file-system/gke-storage/persistent-volume-claim/hyperdisk-throughput-pvc.yaml.tftpl @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ${pvc_name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +spec: + accessModes: + - ${access_mode} + resources: + requests: + storage: ${capacity} + storageClassName: ${storage_class_name} diff --git a/modules/file-system/gke-storage/storage-class/hyperdisk-balanced-sc.yaml.tftpl b/modules/file-system/gke-storage/storage-class/hyperdisk-balanced-sc.yaml.tftpl new file mode 100644 index 0000000000..46e1f023d3 --- /dev/null +++ b/modules/file-system/gke-storage/storage-class/hyperdisk-balanced-sc.yaml.tftpl @@ -0,0 +1,25 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: ${name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +provisioner: pd.csi.storage.gke.io +allowVolumeExpansion: true +parameters: + type: hyperdisk-balanced + provisioned-throughput-on-create: "250Mi" + provisioned-iops-on-create: "7000" +volumeBindingMode: ${volume_binding_mode} +reclaimPolicy: ${reclaim_policy} + %{~ if topology_zones != null ~} +allowedTopologies: +- matchLabelExpressions: + - key: topology.gke.io/zone + values: + %{~ for z in topology_zones ~} + - ${z} + %{~ endfor ~} + %{~ endif ~} diff --git a/modules/file-system/gke-storage/storage-class/hyperdisk-extreme-sc.yaml.tftpl b/modules/file-system/gke-storage/storage-class/hyperdisk-extreme-sc.yaml.tftpl new file mode 100644 index 0000000000..445020d001 --- /dev/null +++ b/modules/file-system/gke-storage/storage-class/hyperdisk-extreme-sc.yaml.tftpl @@ -0,0 +1,24 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: ${name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} +provisioner: pd.csi.storage.gke.io +allowVolumeExpansion: true +parameters: + %{~ endfor ~} + type: hyperdisk-extreme + provisioned-iops-on-create: "50000" +volumeBindingMode: ${volume_binding_mode} +reclaimPolicy: ${reclaim_policy} + %{~ if topology_zones != null ~} +allowedTopologies: +- matchLabelExpressions: + - key: topology.gke.io/zone + values: + %{~ for z in topology_zones ~} + - ${z} + %{~ endfor ~} + %{~ endif ~} diff --git a/modules/file-system/gke-storage/storage-class/hyperdisk-throughput-sc.yaml.tftpl b/modules/file-system/gke-storage/storage-class/hyperdisk-throughput-sc.yaml.tftpl new file mode 100644 index 0000000000..ec404aec45 --- /dev/null +++ b/modules/file-system/gke-storage/storage-class/hyperdisk-throughput-sc.yaml.tftpl @@ -0,0 +1,24 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: ${name} + labels: + %{~ for key, val in labels ~} + ${key}: ${val} + %{~ endfor ~} +provisioner: pd.csi.storage.gke.io +allowVolumeExpansion: true +parameters: + type: hyperdisk-throughput + provisioned-throughput-on-create: "250Mi" +volumeBindingMode: ${volume_binding_mode} +reclaimPolicy: ${reclaim_policy} + %{~ if topology_zones != null ~} +allowedTopologies: +- matchLabelExpressions: + - key: topology.gke.io/zone + values: + %{~ for z in topology_zones ~} + - ${z} + %{~ endfor ~} + %{~ endif ~} diff --git a/modules/file-system/gke-storage/variables.tf b/modules/file-system/gke-storage/variables.tf index 9efbe6082c..b33203be0f 100644 --- a/modules/file-system/gke-storage/variables.tf +++ b/modules/file-system/gke-storage/variables.tf @@ -30,12 +30,15 @@ variable "storage_type" { The type of [GKE supported storage options](https://cloud.google.com/kubernetes-engine/docs/concepts/storage-overview) to used. This module currently support dynamic provisioning for the below storage options - Parallelstore + - Hyperdisk-balanced + - Hyperdisk-throughput + - Hyperdisk-extreme EOT type = string nullable = false validation { - condition = var.storage_type == null ? false : contains(["parallelstore"], lower(var.storage_type)) - error_message = "Allowed string values for var.storage_type are \"Parallelstore\"." + condition = var.storage_type == null ? false : contains(["parallelstore", "hyperdisk-balanced", "hyperdisk-throughput", "hyperdisk-extreme"], lower(var.storage_type)) + error_message = "Allowed string values for var.storage_type are \"Parallelstore\", \"Hyperdisk-balanced\", \"Hyperdisk-throughput\", \"Hyperdisk-extreme\"." } } diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-managed-hyperdisk.yml b/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-managed-hyperdisk.yml new file mode 100644 index 0000000000..fb114c402a --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/test-validation/test-gke-managed-hyperdisk.yml @@ -0,0 +1,41 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Get cluster credentials for kubectl + delegate_to: localhost + ansible.builtin.command: gcloud container clusters get-credentials {{ deployment_name }} --region {{ cli_deployment_vars.region }} --project {{ custom_vars.project }} + +- name: Execute the job + delegate_to: localhost + ansible.builtin.shell: | + jobs=({{ workspace }}/{{ deployment_name }}/primary/tensorflow*) + for job in "${jobs[@]}"; do + kubectl create -f "$job" + done + args: + executable: /bin/bash + changed_when: False + +- name: Wait for job to complete + delegate_to: localhost + ansible.builtin.command: | + kubectl get job --field-selector status.successful=1 + register: job_completion + until: job_completion.stdout_lines | length > 3 # 3 jobs total + retries: 80 + delay: 15 + +- name: Print job_completion debug output + ansible.builtin.debug: + var: job_completion.stdout_lines diff --git a/tools/cloud-build/daily-tests/builds/gke-managed-hyperdisk.yaml b/tools/cloud-build/daily-tests/builds/gke-managed-hyperdisk.yaml new file mode 100644 index 0000000000..64129fcdde --- /dev/null +++ b/tools/cloud-build/daily-tests/builds/gke-managed-hyperdisk.yaml @@ -0,0 +1,55 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +tags: +- m.gke-cluster +- m.gke-job-template +- m.gke-node-pool +- m.gke-storage +- m.vpc +- gke + +timeout: 14400s # 4hr + +steps: +## Test GKE +- id: gke-managed-hyperdisk + name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner + entrypoint: /bin/bash + env: + - "ANSIBLE_HOST_KEY_CHECKING=false" + - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" + args: + - -c + - | + set -x -e + cd /workspace && make + BUILD_ID_FULL=$BUILD_ID + BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} + SG_EXAMPLE=examples/gke-managed-hyperdisk.yaml + # adding vm to act as remote node + echo ' - id: remote-node' >> $${SG_EXAMPLE} + echo ' source: modules/compute/vm-instance' >> $${SG_EXAMPLE} + echo ' use: [network]' >> $${SG_EXAMPLE} + echo ' settings:' >> $${SG_EXAMPLE} + echo ' machine_type: e2-standard-2' >> $${SG_EXAMPLE} + echo ' zone: us-central1-a' >> $${SG_EXAMPLE} + # avoids conflict with other tests + sed -i "s/gke-subnet/gke-subnet-$${BUILD_ID_SHORT}/" $${SG_EXAMPLE} + IP=$(curl ifconfig.me) + sed -i "s//$${IP}/" $${SG_EXAMPLE} + ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-managed-hyperdisk.yml" diff --git a/tools/cloud-build/daily-tests/tests/gke-managed-hyperdisk.yml b/tools/cloud-build/daily-tests/tests/gke-managed-hyperdisk.yml new file mode 100644 index 0000000000..036657720a --- /dev/null +++ b/tools/cloud-build/daily-tests/tests/gke-managed-hyperdisk.yml @@ -0,0 +1,29 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +test_name: gke-managed-hyperdisk +deployment_name: gke-managed-hyperdisk-{{ build }} +zone: us-central1-a # for remote node +region: us-central1 +workspace: /workspace +blueprint_yaml: "{{ workspace }}/examples/gke-managed-hyperdisk.yaml" +network: "{{ deployment_name }}-net" +remote_node: "{{ deployment_name }}-0" +post_deploy_tests: +- test-validation/test-gke-managed-hyperdisk.yml +custom_vars: + project: "{{ project }}" +cli_deployment_vars: + region: "{{ region }}" + gcp_public_cidrs_access_enabled: true From b874f82a73c53bb3c752aac8a213a08bba07598f Mon Sep 17 00:00:00 2001 From: Indraneel Ghosh Date: Tue, 31 Dec 2024 14:09:16 +0530 Subject: [PATCH 077/140] Update Stackdrier typo in README.md --- modules/scripts/startup-script/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/scripts/startup-script/README.md b/modules/scripts/startup-script/README.md index 67f0effe7b..48d79f872b 100644 --- a/modules/scripts/startup-script/README.md +++ b/modules/scripts/startup-script/README.md @@ -143,7 +143,7 @@ recommends using the _Cloud Ops Agent_, it is recommended to use #### Stackdriver Agent Installation If an image or machine already has Cloud Ops Agent installed and you would like -to instead use the Stackdrier Agent, the following script will remove the Cloud +to instead use the Stackdriver Agent, the following script will remove the Cloud Ops Agent and install the Stackdriver Agent. ```bash From 0fbbc7c1742625c65d5473dc30d226cdadbf8ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wiktor=20Niesiob=C4=99dzki?= Date: Tue, 31 Dec 2024 10:07:43 +0000 Subject: [PATCH 078/140] Fix failures if not specifing stipe sizes for parallelstore --- modules/file-system/parallelstore/README.md | 4 ++-- modules/file-system/parallelstore/variables.tf | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/file-system/parallelstore/README.md b/modules/file-system/parallelstore/README.md index 4a5896e217..9b0595c965 100644 --- a/modules/file-system/parallelstore/README.md +++ b/modules/file-system/parallelstore/README.md @@ -169,8 +169,8 @@ No modules. | [daos\_agent\_config](#input\_daos\_agent\_config) | Additional configuration to be added to daos\_config.yml | `string` | `""` | no | | [deployment\_name](#input\_deployment\_name) | Name of the HPC deployment. | `string` | n/a | yes | | [dfuse\_environment](#input\_dfuse\_environment) | Additional environment variables for DFuse process | `map(string)` | `{}` | no | -| [directory\_stripe](#input\_directory\_stripe) | The parallelstore stripe level for directories. | `string` | `"DIRECTORY_STRIPE_LEVEL_UNSPECIFIED"` | no | -| [file\_stripe](#input\_file\_stripe) | The parallelstore stripe level for files. | `string` | `"FILE_STRIPE_LEVEL_UNSPECIFIED"` | no | +| [directory\_stripe](#input\_directory\_stripe) | The parallelstore stripe level for directories. | `string` | `null` | no | +| [file\_stripe](#input\_file\_stripe) | The parallelstore stripe level for files. | `string` | `null` | no | | [import\_destination\_path](#input\_import\_destination\_path) | The name of local path to import data on parallelstore instance from GCS bucket. | `string` | `null` | no | | [import\_gcs\_bucket\_uri](#input\_import\_gcs\_bucket\_uri) | The name of the GCS bucket to import data from to parallelstore. | `string` | `null` | no | | [labels](#input\_labels) | Labels to add to parallel store instance. | `map(string)` | `{}` | no | diff --git a/modules/file-system/parallelstore/variables.tf b/modules/file-system/parallelstore/variables.tf index 836f443f19..d5b7e7a19a 100644 --- a/modules/file-system/parallelstore/variables.tf +++ b/modules/file-system/parallelstore/variables.tf @@ -109,9 +109,9 @@ variable "import_destination_path" { variable "file_stripe" { description = "The parallelstore stripe level for files." type = string - default = "FILE_STRIPE_LEVEL_UNSPECIFIED" + default = null validation { - condition = contains([ + condition = var.file_stripe == null ? true : contains([ "FILE_STRIPE_LEVEL_UNSPECIFIED", "FILE_STRIPE_LEVEL_MIN", "FILE_STRIPE_LEVEL_BALANCED", @@ -124,9 +124,9 @@ variable "file_stripe" { variable "directory_stripe" { description = "The parallelstore stripe level for directories." type = string - default = "DIRECTORY_STRIPE_LEVEL_UNSPECIFIED" + default = null validation { - condition = contains([ + condition = var.directory_stripe == null ? true : contains([ "DIRECTORY_STRIPE_LEVEL_UNSPECIFIED", "DIRECTORY_STRIPE_LEVEL_MIN", "DIRECTORY_STRIPE_LEVEL_BALANCED", From 7605aaa5b7d999f65865b34e4166a8c1bd7c6d5e Mon Sep 17 00:00:00 2001 From: chengcongdu Date: Fri, 3 Jan 2025 00:20:13 +0000 Subject: [PATCH 079/140] address comment --- examples/gke-managed-hyperdisk.yaml | 11 +++++++++-- examples/gke-managed-parallelstore.yaml | 7 +++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/examples/gke-managed-hyperdisk.yaml b/examples/gke-managed-hyperdisk.yaml index 12c8063026..4be8bcf83c 100644 --- a/examples/gke-managed-hyperdisk.yaml +++ b/examples/gke-managed-hyperdisk.yaml @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. --- -blueprint_name: gke-storage-hyperdisk +blueprint_name: gke-managed-hyperdisk vars: project_id: ## Set GCP Project ID Here ## - deployment_name: gke-storage-hyperdisk + deployment_name: gke-managed-hyperdisk region: us-central1 zone: us-central1-c @@ -41,12 +41,18 @@ deployment_groups: source: modules/scheduler/gke-cluster use: [network] settings: + release_channel: RAPID enable_persistent_disk_csi: true # enable Hyperdisk for the cluster configure_workload_identity_sa: true enable_private_endpoint: false # Allows for access from authorized public IPs master_authorized_networks: - display_name: deployment-machine cidr_block: $(vars.authorized_cidr) + maintenance_exclusions: + - name: no-minor-or-node-upgrades-indefinite + start_time: "2024-12-01T00:00:00Z" + end_time: "2025-12-22T00:00:00Z" + exclusion_scope: NO_MINOR_OR_NODE_UPGRADES outputs: [instructions] ### Set up storage class and persistent volume claim for Hyperdisk ### @@ -93,6 +99,7 @@ deployment_groups: name: sample-pool zones: [$(vars.zone)] machine_type: c3-standard-88 # Hyperdisk-extreme required C3 machine with 88 or more vCPUs + auto_upgrade: true # Train a TensorFlow model with Keras and Hyperdisk Balanced on GKE # Tutorial: https://cloud.google.com/parallelstore/docs/tensorflow-sample diff --git a/examples/gke-managed-parallelstore.yaml b/examples/gke-managed-parallelstore.yaml index 4425f13181..6f292e0bb6 100644 --- a/examples/gke-managed-parallelstore.yaml +++ b/examples/gke-managed-parallelstore.yaml @@ -63,6 +63,7 @@ deployment_groups: source: modules/scheduler/gke-cluster use: [network] settings: + release_channel: RAPID enable_parallelstore_csi: true # enable Parallelstore for the cluster configure_workload_identity_sa: true enable_private_endpoint: false # Allows for access from authorized public IPs @@ -70,6 +71,11 @@ deployment_groups: master_authorized_networks: - display_name: deployment-machine cidr_block: $(vars.authorized_cidr) + maintenance_exclusions: + - name: no-minor-or-node-upgrades-indefinite + start_time: "2024-12-01T00:00:00Z" + end_time: "2025-12-22T00:00:00Z" + exclusion_scope: NO_MINOR_OR_NODE_UPGRADES outputs: [instructions] ### Set up storage class and persistent volume claim for Parallelstore ### @@ -92,6 +98,7 @@ deployment_groups: name: sample-pool zones: [$(vars.zone)] machine_type: n2-standard-16 + auto_upgrade: true # Train a TensorFlow model with Keras and Parallelstore on GKE # Tutorial: https://cloud.google.com/parallelstore/docs/tensorflow-sample From b390c5ffaeecd8ec0ac74a036423ed32a448c54c Mon Sep 17 00:00:00 2001 From: chengcongdu Date: Fri, 3 Jan 2025 20:42:45 +0000 Subject: [PATCH 080/140] update README for managed-hyperdiska nd managed-parallelstore example blueprint --- examples/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/README.md b/examples/README.md index a1d3d0c589..d268dcd423 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1526,6 +1526,8 @@ The blueprint contains the following: * A K8s Job that uses a managed hyperdisk storage volume option. * A K8s Job that demonstrates ML training workload with managed hyperdisk storage disk operation. + * The sample training workload manifest will be generated under the gke-managed-hyperdisk/primary folder, as tensorflow-GUID.yaml + * You can deploy this sample training workload using "kubectl apply -f tensorflow-GUID.yaml" to start the training > **Warning**: In this example blueprint, when storage type `Hyperdisk-balanced`, `Hyperdisk-extreme` or `Hyperdisk-throughput` is specified in `gke-storage` module. > The lifecycle of the hyperdisk is managed by the blueprint. @@ -1550,6 +1552,8 @@ The blueprint contains the following: * A K8s Job that uses a managed parallelstore storage volume option. * A K8s Job that demonstrates ML training workload with managed parallelstore storage disk operation. + * The sample training workload manifest will be generated under the gke-managed-parallelstore/primary folder, as tensorflow-GUID.yaml + * You can deploy this sample training workload using "kubectl apply -f tensorflow-GUID.yaml" to start the training > **Warning**: In this example blueprint, when storage type `Parallelstore` is specified in `gke-storage` module. > The lifecycle of the parallelstore is managed by the blueprint. From 526b171324dd6dfa21d15516b1dc22ff8c4bd5c5 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Mon, 16 Dec 2024 19:45:27 +0000 Subject: [PATCH 081/140] Document unsupported "bracket-less" collection addressing --- examples/README.md | 4 ++++ pkg/config/expression.go | 8 ++++++++ pkg/config/expression_test.go | 4 ++++ 3 files changed, 16 insertions(+) diff --git a/examples/README.md b/examples/README.md index 30883ce0f9..82539448ca 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1884,6 +1884,10 @@ To learn more about how to refer to a module in a blueprint file, please consult Variables can be used to refer both to values defined elsewhere in the blueprint and to the output and structure of other modules. +> [!NOTE] +> "Brackets-less" access to elements of collection is not supported, use brackets. +> E.g. `pink.lime[0].salmon` instead of `pink.lime.0.salmon`. + ### Blueprint expressions Expressions in a blueprint file can refer to deployment variables or the outputs diff --git a/pkg/config/expression.go b/pkg/config/expression.go index 3cfeb096d1..0fb75f71fd 100644 --- a/pkg/config/expression.go +++ b/pkg/config/expression.go @@ -88,6 +88,14 @@ func bpTraversalToTerraform(t hcl.Traversal) (hcl.Traversal, error) { // BlueprintExpressionLiteralToExpression takes a content of `$(...)`-literal and transforms it to `Expression` func BlueprintExpressionLiteralToExpression(s string) (Expression, error) { + // TODO: FIX: this function relies on assumption that + // `epxrToTokens(toExpression(tokenize(X))) == tokenize(X)` + // This is not correct, e.g.: + // ``` + // epxrToTokens(toExpression(tokenize("pink.lime.0.salmon"))) == + // tokenize("pink.lime[0].salmon") != tokenize("pink.lime.0.salmon") + // ``` + // As a result `pink.lime.0.salmon` can not be properly translated. bpExp, diag := hclsyntax.ParseExpression([]byte(s), "", hcl.Pos{}) if diag.HasErrors() { return nil, diag diff --git a/pkg/config/expression_test.go b/pkg/config/expression_test.go index 88a8fa8338..e7a135846e 100644 --- a/pkg/config/expression_test.go +++ b/pkg/config/expression_test.go @@ -85,6 +85,7 @@ func TestParseBpLit(t *testing.T) { {"$(vars.green.sleeve)", "var.green.sleeve", false}, {`$(vars.green["sleeve"])`, `var.green["sleeve"]`, false}, {"$(vars.green.sleeve[3])", "var.green.sleeve[3]", false}, + {"$(vars.green[3].sleeve)", "var.green[3].sleeve", false}, {"$(var.green)", "module.var.green", false}, {"$(box.green)", "module.box.green", false}, @@ -135,6 +136,9 @@ echo "Hello $(vars.project_id)" {"$(vars[3]])", "", true}, // can't index vars {`$(vars["green"])`, "", true}, // can't index module + // TODO: uncomment + // see comment to `BlueprintExpressionLiteralToExpression` + // {"$(pink.lime.0.salmon)", "module.pink.lime[0].salmon", false}, } for _, tc := range tests { t.Run(tc.input, func(t *testing.T) { From b76bcc22d0271e5326c6860111c50071d2f88a3c Mon Sep 17 00:00:00 2001 From: abbas1902 Date: Sat, 4 Jan 2025 01:22:47 +0000 Subject: [PATCH 082/140] Remove provisioningModel from future reservations --- .../modules/slurm_files/scripts/resume.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py index fa5413e53c..7bec9be1a1 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py @@ -103,10 +103,10 @@ def instance_properties(nodeset:object, model:str, placement_group:Optional[str] props.resourcePolicies = [placement_group] if reservation := lookup().nodeset_reservation(nodeset): - update_reservation_props(reservation, props, placement_group, False) + update_reservation_props(reservation, props, placement_group) if (fr := lookup().future_reservation(nodeset)) and fr.specific: - update_reservation_props(fr.active_reservation, props, placement_group, True) + update_reservation_props(fr.active_reservation, props, placement_group) if props.resourcePolicies: props.scheduling.onHostMaintenance = "TERMINATE" @@ -121,14 +121,14 @@ def instance_properties(nodeset:object, model:str, placement_group:Optional[str] props.update(nodeset.get("instance_properties") or {}) return props -def update_reservation_props(reservation:object, props:object, placement_group:Optional[str], reservation_from_fr:bool) -> None: +def update_reservation_props(reservation:object, props:object, placement_group:Optional[str]) -> None: props.reservationAffinity = { "consumeReservationType": "SPECIFIC_RESERVATION", "key": f"compute.{util.universe_domain()}/reservation-name", "values": [reservation.bulk_insert_name], } - if reservation.dense or reservation_from_fr: + if reservation.dense: props.scheduling.provisioningModel = "RESERVATION_BOUND" # Figure out `resourcePolicies` From 203da782fa54a041bf051bf749cd01d860f483aa Mon Sep 17 00:00:00 2001 From: Harsh Thakkar Date: Mon, 6 Jan 2025 14:24:31 +0000 Subject: [PATCH 083/140] Remove slurm-gcp v5 tests --- .../daily-tests/builds/hcls-v5-legacy.yaml | 69 -------------- .../hpc-enterprise-slurm-v5-legacy.yaml | 46 --------- .../hpc-slurm-chromedesktop-v5-legacy.yaml | 45 --------- .../builds/lustre-slurm-v5-legacy.yaml | 43 --------- .../builds/ml-a3-highgpu-slurm-v5.yaml | 93 ------------------- .../builds/ml-slurm-v5-legacy.yaml | 48 ---------- .../daily-tests/builds/packer-v5-legacy.yaml | 46 --------- .../builds/slurm-gcp-v5-debian.yaml | 43 --------- .../builds/slurm-gcp-v5-hpc-centos7.yaml | 42 --------- .../builds/slurm-gcp-v5-rocky8.yaml | 43 --------- ...lurm-gcp-v5-startup-scripts-v5-legacy.yaml | 45 --------- .../builds/slurm-gcp-v5-ubuntu2004.yaml | 43 --------- .../daily-tests/tests/hcls-v5-legacy.yml | 45 --------- .../tests/hpc-enterprise-slurm-v5-legacy.yml | 50 ---------- .../tests/hpc-slurm-chromedesktop.yml | 42 --------- .../tests/lustre-slurm-v5-legacy.yml | 43 --------- .../ml-a3-highgpu-slurm-cluster-legacy.yml | 49 ---------- .../daily-tests/tests/ml-slurm-v5-legacy.yml | 23 ----- .../daily-tests/tests/packer-v5-legacy.yml | 27 ------ .../daily-tests/tests/slurm-v5-debian.yml | 45 --------- .../tests/slurm-v5-hpc-centos7.yml | 44 --------- .../daily-tests/tests/slurm-v5-rocky8.yml | 45 --------- .../tests/slurm-v5-startup-scripts.yml | 38 -------- .../daily-tests/tests/slurm-v5-ubuntu.yml | 40 -------- 24 files changed, 1097 deletions(-) delete mode 100644 tools/cloud-build/daily-tests/builds/hcls-v5-legacy.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm-v5-legacy.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/hpc-slurm-chromedesktop-v5-legacy.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/lustre-slurm-v5-legacy.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm-v5.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/ml-slurm-v5-legacy.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/packer-v5-legacy.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/slurm-gcp-v5-debian.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/slurm-gcp-v5-hpc-centos7.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/slurm-gcp-v5-rocky8.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/slurm-gcp-v5-startup-scripts-v5-legacy.yaml delete mode 100644 tools/cloud-build/daily-tests/builds/slurm-gcp-v5-ubuntu2004.yaml delete mode 100644 tools/cloud-build/daily-tests/tests/hcls-v5-legacy.yml delete mode 100644 tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm-v5-legacy.yml delete mode 100644 tools/cloud-build/daily-tests/tests/hpc-slurm-chromedesktop.yml delete mode 100644 tools/cloud-build/daily-tests/tests/lustre-slurm-v5-legacy.yml delete mode 100644 tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm-cluster-legacy.yml delete mode 100644 tools/cloud-build/daily-tests/tests/ml-slurm-v5-legacy.yml delete mode 100644 tools/cloud-build/daily-tests/tests/packer-v5-legacy.yml delete mode 100644 tools/cloud-build/daily-tests/tests/slurm-v5-debian.yml delete mode 100644 tools/cloud-build/daily-tests/tests/slurm-v5-hpc-centos7.yml delete mode 100644 tools/cloud-build/daily-tests/tests/slurm-v5-rocky8.yml delete mode 100644 tools/cloud-build/daily-tests/tests/slurm-v5-startup-scripts.yml delete mode 100644 tools/cloud-build/daily-tests/tests/slurm-v5-ubuntu.yml diff --git a/tools/cloud-build/daily-tests/builds/hcls-v5-legacy.yaml b/tools/cloud-build/daily-tests/builds/hcls-v5-legacy.yaml deleted file mode 100644 index 0f39d815ee..0000000000 --- a/tools/cloud-build/daily-tests/builds/hcls-v5-legacy.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.chrome-remote-desktop -- m.cloud-storage-bucket -- m.dashboard -- m.filestore -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.service-enablement -- m.spack-execute -- m.spack-setup -- m.startup-script -- m.vm-instance -- m.vpc -- spack -- crd -- slurm5 - -timeout: 14400s # 4hr -steps: -# While using static network names we are gaurding against more than 1 instance running at a time (for multi-group tests) -- id: check_for_running_build - name: gcr.io/cloud-builders/gcloud - entrypoint: /bin/bash - args: - - -c - - | - set -x -e - echo $TRIGGER_BUILD_CONFIG_PATH - MATCHING_BUILDS=$(gcloud builds list --ongoing --format 'value(id)' --filter='substitutions.TRIGGER_BUILD_CONFIG_PATH="$TRIGGER_BUILD_CONFIG_PATH"') - MATCHING_COUNT=$(echo $$MATCHING_BUILDS | wc -w) - if [ "$$MATCHING_COUNT" -gt 1 ]; then - echo "Found more than 1 matching running builds" - echo "$$MATCHING_BUILDS" - exit 1 - fi -- id: hcls - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/hcls-v5-legacy.yml" diff --git a/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm-v5-legacy.yaml b/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm-v5-legacy.yaml deleted file mode 100644 index 008f2939e8..0000000000 --- a/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm-v5-legacy.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.DDN-EXAScaler -- m.dashboard -- m.filestore -- m.pre-existing-vpc -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.service-account -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: hpc-enterprise-slurm - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm-v5-legacy.yml" diff --git a/tools/cloud-build/daily-tests/builds/hpc-slurm-chromedesktop-v5-legacy.yaml b/tools/cloud-build/daily-tests/builds/hpc-slurm-chromedesktop-v5-legacy.yaml deleted file mode 100644 index 7e4a4acb56..0000000000 --- a/tools/cloud-build/daily-tests/builds/hpc-slurm-chromedesktop-v5-legacy.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.chrome-remote-desktop -- m.filestore -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.vpc -- crd -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: hpc-slurm-chromedesktop - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/hpc-slurm-chromedesktop.yml" diff --git a/tools/cloud-build/daily-tests/builds/lustre-slurm-v5-legacy.yaml b/tools/cloud-build/daily-tests/builds/lustre-slurm-v5-legacy.yaml deleted file mode 100644 index 7088e64267..0000000000 --- a/tools/cloud-build/daily-tests/builds/lustre-slurm-v5-legacy.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.DDN-EXAScaler -- m.pre-existing-vpc -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: lustre-slurm - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/lustre-slurm-v5-legacy.yml" diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm-v5.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm-v5.yaml deleted file mode 100644 index d44c7baec1..0000000000 --- a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm-v5.yaml +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.custom-image -- m.pre-existing-vpc -- m.startup-script -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: ml-a3-highgpu-slurm-image - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm-image-legacy.yml" - IMAGE_NAME=$(gcloud compute images list --project "${PROJECT_ID}" \ - --no-standard-images --filter="labels.ghpc_deployment~$${BUILD_ID_SHORT}" \ - --format='get(name)' --limit=1) - - echo $${IMAGE_NAME} > /persistent_volume/image_name - volumes: - - name: 'persistent_volume' - path: '/persistent_volume' -- id: ml-a3-highgpu-slurm-cluster - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - NFS_DEPLOYMENT_NAME="a3hnfs$${BUILD_ID_SHORT}" - - destroy_on_exit() { - ./gcluster destroy "$${NFS_DEPLOYMENT_NAME}" --auto-approve - cat /persistent_volume/image_name | xargs -L1 gcloud compute images delete --project "${PROJECT_ID}" --quiet - } - - REGION=us-west1 - ZONE=us-west1-a - - trap 'destroy_on_exit' EXIT - - ./gcluster deploy \ - --vars region="$${REGION}" \ - --vars zone="$${ZONE}" \ - --vars project_id="${PROJECT_ID}" \ - --vars deployment_name="$${NFS_DEPLOYMENT_NAME}" \ - tools/cloud-build/daily-tests/blueprints/nfs-server-homefs.yaml \ - --auto-approve - - NFS_IP=$(gcloud compute instances list --project "${PROJECT_ID}" \ - --filter="labels.ghpc_module=nfs-server and labels.ghpc_deployment=$${NFS_DEPLOYMENT_NAME}" \ - --format='get(networkInterfaces[0].networkIP)') - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} nfs_ip=$${NFS_IP}" \ - --extra-vars="region=$${REGION} zone=$${ZONE} remote_mount_homefs=/exports/home" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm-cluster-legacy.yml" - volumes: - - name: 'persistent_volume' - path: '/persistent_volume' diff --git a/tools/cloud-build/daily-tests/builds/ml-slurm-v5-legacy.yaml b/tools/cloud-build/daily-tests/builds/ml-slurm-v5-legacy.yaml deleted file mode 100644 index 3382f342b6..0000000000 --- a/tools/cloud-build/daily-tests/builds/ml-slurm-v5-legacy.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.custom-image -- m.filestore -- m.firewall-rules -- m.pre-existing-vpc -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.startup-script -- slurm5 - -timeout: 18000s # 5hr -steps: -# test image creation by provisioning a new VPC and using Packer to build an -# image in it -- id: ml-slurm - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-slurm-v5-legacy.yml" diff --git a/tools/cloud-build/daily-tests/builds/packer-v5-legacy.yaml b/tools/cloud-build/daily-tests/builds/packer-v5-legacy.yaml deleted file mode 100644 index a2f2c32296..0000000000 --- a/tools/cloud-build/daily-tests/builds/packer-v5-legacy.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.custom-image -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.startup-script -- m.vpc -- packer - -timeout: 14400s # 4hr -steps: -# test image creation by provisioning a new VPC and using Packer to build an -# image in it -- id: packer - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/packer-v5-legacy.yml" diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-debian.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-debian.yaml deleted file mode 100644 index 15c0c35650..0000000000 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-debian.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.filestore -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.vpc -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: slurm-gcp-v5-debian - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v5-debian.yml" diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-hpc-centos7.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-hpc-centos7.yaml deleted file mode 100644 index ed48e66298..0000000000 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-hpc-centos7.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.filestore -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.vpc -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: slurm-gcp-v5-hpc-centos7 - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v5-hpc-centos7.yml" diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-rocky8.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-rocky8.yaml deleted file mode 100644 index 562f1f4277..0000000000 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-rocky8.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.filestore -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.vpc -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: slurm-gcp-v5-rocky8 - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v5-rocky8.yml" diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-startup-scripts-v5-legacy.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-startup-scripts-v5-legacy.yaml deleted file mode 100644 index d7221cb59a..0000000000 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-startup-scripts-v5-legacy.yaml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.cloud-storage-bucket -- m.nfs-server -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.startup-script -- m.vpc -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: slurm-gcp-v5-startup-scripts - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v5-startup-scripts.yml" diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-ubuntu2004.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-ubuntu2004.yaml deleted file mode 100644 index 67db53434b..0000000000 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v5-ubuntu2004.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -tags: -- m.filestore -- m.schedmd-slurm-gcp-v5-controller -- m.schedmd-slurm-gcp-v5-login -- m.schedmd-slurm-gcp-v5-node-group -- m.schedmd-slurm-gcp-v5-partition -- m.vpc -- slurm5 - -timeout: 14400s # 4hr -steps: -- id: slurm-gcp-v5-ubuntu2004 - name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner - entrypoint: /bin/bash - env: - - "ANSIBLE_HOST_KEY_CHECKING=false" - - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - args: - - -c - - | - set -x -e - cd /workspace && make - BUILD_ID_FULL=$BUILD_ID - BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} - - ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v5-ubuntu.yml" diff --git a/tools/cloud-build/daily-tests/tests/hcls-v5-legacy.yml b/tools/cloud-build/daily-tests/tests/hcls-v5-legacy.yml deleted file mode 100644 index 073e773d2c..0000000000 --- a/tools/cloud-build/daily-tests/tests/hcls-v5-legacy.yml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -test_name: hcls-cluster -deployment_name: "hcls-{{ build }}" -# No non-alphanumerical characters in the slurm cluster name - they will be -# removed by Cluster Toolkit slurm wrappers, which will break the playbook -slurm_cluster_name: "hcls{{ build[0:6] }}" -zone: europe-west1-c -workspace: /workspace -blueprint_yaml: "{{ workspace }}/docs/videos/healthcare-and-life-sciences/hcls-blueprint-v5-legacy.yaml" -network: "{{ deployment_name }}-net" -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -cli_deployment_vars: - network_name: "{{ network }}" - region: europe-west1 - zone: "{{ zone }}" - disable_login_public_ips: "false" - disable_controller_public_ips: "false" -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-partitions.yml -custom_vars: - partitions: - - compute - mounts: - - /home - - /apps - - /data_input - - /data_output -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm-v5-legacy.yml b/tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm-v5-legacy.yml deleted file mode 100644 index 4457c03587..0000000000 --- a/tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm-v5-legacy.yml +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -test_name: hpc-enterprise-slurm -deployment_name: "enter-{{ build }}" -# Manually adding the slurm_cluster_name for use in node names, which filters -# non-alphanumeric chars and is capped at 10 chars. -slurm_cluster_name: "enter{{ build[0:5] }}" -zone: europe-west1-d -cli_deployment_vars: - region: europe-west1 - zone: "{{ zone }}" - zones: "[europe-west1-b,europe-west1-c,europe-west1-d]" -workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/hpc-enterprise-slurm-v5-legacy.yaml" -network: "default" -# Note: Pattern matching in gcloud only supports 1 wildcard. -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-partitions.yml -custom_vars: - partitions: - - n2 - - c2 - - c2d - # Disable those partitions for now. - # Note the current selected region may not support some of these partitions - # consult with https://cloud.google.com/compute/docs/regions-zones/ - #- c3 - #- a208 - #- a216 - mounts: - - /home - - /projects - - /scratch -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/hpc-slurm-chromedesktop.yml b/tools/cloud-build/daily-tests/tests/hpc-slurm-chromedesktop.yml deleted file mode 100644 index 700a3a1807..0000000000 --- a/tools/cloud-build/daily-tests/tests/hpc-slurm-chromedesktop.yml +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -test_name: slurm-crd -deployment_name: "slm-crd-{{ build }}" - -# Manually adding the slurm_cluster_name for use in node names, which filters -# non-alphanumeric chars and is capped at 10 chars. -slurm_cluster_name: "slmcrd{{ build[0:4] }}" -zone: europe-west1-c -cli_deployment_vars: - network_name: "{{ network }}" - region: europe-west1 - zone: "{{ zone }}" -workspace: /workspace -blueprint_yaml: "{{ workspace }}/community/examples/hpc-slurm-chromedesktop-v5-legacy.yaml" -network: "{{ deployment_name }}-net" -# Note: Pattern matching in gcloud only supports 1 wildcard. -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-crd.yml -custom_vars: - mounts: - - /home - partitions: - - desktop - - compute -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/lustre-slurm-v5-legacy.yml b/tools/cloud-build/daily-tests/tests/lustre-slurm-v5-legacy.yml deleted file mode 100644 index 6cd001d1c3..0000000000 --- a/tools/cloud-build/daily-tests/tests/lustre-slurm-v5-legacy.yml +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -test_name: test-slurm-lustre -deployment_name: "lustr-{{ build }}" -region: us-central1 -zone: us-central1-c -workspace: /workspace -blueprint_yaml: "{{ workspace }}/tools/cloud-build/daily-tests/blueprints/lustre-slurm-v5-legacy.yaml" -network: "default" -slurm_cluster_name: "lustr{{ build[0:5] }}" -cli_deployment_vars: - region: "{{ region }}" - zone: "{{ zone }}" -# Note: Pattern matching in gcloud only supports 1 wildcard. -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-partitions.yml -- test-validation/test-lustre-slurm.yml -custom_vars: - output_dir: /lustre/test - num_slurm_nodes: 1 - mounts: - - /lustre - partitions: - - centos - - rocky - # - ubuntu -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm-cluster-legacy.yml b/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm-cluster-legacy.yml deleted file mode 100644 index 1172471ce0..0000000000 --- a/tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm-cluster-legacy.yml +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -# region, zone, nfs_ip, remote_mount_homefs, must be defined in build file -# with --extra-vars flag! -test_name: a3h-cluster -deployment_name: a3hc-{{ build }} -slurm_cluster_name: "a3hc{{ build[0:4] }}" -workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-2-cluster-v5-legacy.yaml" -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -network: default -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-partitions.yml -# v5 solutions do not have post 3.5.0 fix for enroot with service accounts -# if this changes, reinsert this test -# - test-validation/test-enroot.yml -custom_vars: - partitions: - - a3 - - debug - mounts: - - /home -cli_deployment_vars: - network_name_system: default - subnetwork_name_system: default - region: "{{ region }}" - zone: "{{ zone }}" - server_ip_homefs: "{{ nfs_ip }}" - remote_mount_homefs: "{{ remote_mount_homefs }}" - slurm_cluster_name: "{{ slurm_cluster_name }}" - a3_static_cluster_size: 2 - disable_login_public_ips: false - disable_controller_public_ips: false diff --git a/tools/cloud-build/daily-tests/tests/ml-slurm-v5-legacy.yml b/tools/cloud-build/daily-tests/tests/ml-slurm-v5-legacy.yml deleted file mode 100644 index 5fbb9315e8..0000000000 --- a/tools/cloud-build/daily-tests/tests/ml-slurm-v5-legacy.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -test_name: ml-slurm -deployment_name: ml-slurm-{{ build }} -workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/ml-slurm-v5-legacy.yaml" -packer_group_name: packer -packer_module_id: custom-image -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/packer-v5-legacy.yml b/tools/cloud-build/daily-tests/tests/packer-v5-legacy.yml deleted file mode 100644 index 07158a5a0f..0000000000 --- a/tools/cloud-build/daily-tests/tests/packer-v5-legacy.yml +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -test_name: image-builder -deployment_name: pkr{{ build }} -zone: us-central1-c -workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/image-builder-v5-legacy.yaml" -network: "{{ deployment_name }}-net" -packer_group_name: packer -packer_module_id: custom-image -cli_deployment_vars: - network_name: "{{ network }}" - subnetwork_name: "{{ network }}-sub" diff --git a/tools/cloud-build/daily-tests/tests/slurm-v5-debian.yml b/tools/cloud-build/daily-tests/tests/slurm-v5-debian.yml deleted file mode 100644 index 2a06c30571..0000000000 --- a/tools/cloud-build/daily-tests/tests/slurm-v5-debian.yml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -test_name: hpc-slurm-debian -deployment_name: "debi-v5-{{ build }}" -# Manually adding the slurm_cluster_name for use in node names, which filters -# non-alphanumeric chars and is capped at 10 chars. -slurm_cluster_name: "debiv5{{ build[0:4] }}" - -cli_deployment_vars: - network_name: "{{ network }}" - instance_image: "{family: slurm-gcp-5-12-debian-11, project: schedmd-slurm-public}" - region: us-west4 - zone: us-west4-c - -zone: us-west4-c -workspace: /workspace -blueprint_yaml: "{{ workspace }}/community/examples/hpc-slurm-ubuntu2004-v5-legacy.yaml" -network: "{{ deployment_name }}-net" -# Note: Pattern matching in gcloud only supports 1 wildcard, centv5*-login-* won't work. -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-partitions.yml -custom_vars: - partitions: - - compute - - debug - mounts: - - /home -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/slurm-v5-hpc-centos7.yml b/tools/cloud-build/daily-tests/tests/slurm-v5-hpc-centos7.yml deleted file mode 100644 index 52400b9e66..0000000000 --- a/tools/cloud-build/daily-tests/tests/slurm-v5-hpc-centos7.yml +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -test_name: hpc-slurm -deployment_name: "cent-v5-{{ build }}" -# Manually adding the slurm_cluster_name for use in node names, which filters -# non-alphanumeric chars and is capped at 10 chars. -slurm_cluster_name: "centv5{{ build[0:4] }}" -zone: us-west4-c -cli_deployment_vars: - network_name: "{{ network }}" - enable_cleanup_compute: true - region: us-west4 - zone: "{{ zone }}" - zones: "[us-west4-a,us-west4-b,us-west4-c]" -workspace: /workspace -blueprint_yaml: "{{ workspace }}/examples/hpc-slurm-v5-legacy.yaml" -network: "{{ deployment_name }}-net" -# Note: Pattern matching in gcloud only supports 1 wildcard, centv5*-login-* won't work. -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-partitions.yml -custom_vars: - partitions: - - compute - - debug - mounts: - - /home -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/slurm-v5-rocky8.yml b/tools/cloud-build/daily-tests/tests/slurm-v5-rocky8.yml deleted file mode 100644 index cb76a571b4..0000000000 --- a/tools/cloud-build/daily-tests/tests/slurm-v5-rocky8.yml +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -test_name: hpc-slurm-rocky8 -deployment_name: "rock-8-{{ build }}" -# Manually adding the slurm_cluster_name for use in node names, which filters -# non-alphanumeric chars and is capped at 10 chars. -slurm_cluster_name: "rock8{{ build[0:5] }}" - -cli_deployment_vars: - network_name: "{{ network }}" - instance_image: "{family: slurm-gcp-5-12-hpc-rocky-linux-8, project: schedmd-slurm-public}" - region: us-west4 - zone: us-west4-c - -zone: us-west4-c -workspace: /workspace -blueprint_yaml: "{{ workspace }}/community/examples/hpc-slurm-ubuntu2004-v5-legacy.yaml" -network: "{{ deployment_name }}-net" -# Note: Pattern matching in gcloud only supports 1 wildcard, centv5*-login-* won't work. -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-partitions.yml -custom_vars: - partitions: - - compute - - debug - mounts: - - /home -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/slurm-v5-startup-scripts.yml b/tools/cloud-build/daily-tests/tests/slurm-v5-startup-scripts.yml deleted file mode 100644 index 9037211bb2..0000000000 --- a/tools/cloud-build/daily-tests/tests/slurm-v5-startup-scripts.yml +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -test_name: hpc-cluster-slurm-v5 -deployment_name: "ss-v5-{{ build }}" -# Manually adding the slurm_cluster_name for use in node names, which filters -# non-alphanumeric chars and is capped at 10 chars. -slurm_cluster_name: "ssv5{{ build[0:6] }}" -zone: us-west4-c -workspace: /workspace -blueprint_yaml: "{{ workspace }}/tools/validate_configs/test_configs/slurm-gcp-v5-startup-scripts-v5-legacy.yaml" -network: "{{ deployment_name }}-net" -# Note: Pattern matching in gcloud only supports 1 wildcard, centv5*-login-* won't work. -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -post_deploy_tests: -- test-validation/test-partitions.yml -custom_vars: - partitions: - - compute - - debug - mounts: - - /home - - /data -wait_for_compute_nodes_to_go_down: true diff --git a/tools/cloud-build/daily-tests/tests/slurm-v5-ubuntu.yml b/tools/cloud-build/daily-tests/tests/slurm-v5-ubuntu.yml deleted file mode 100644 index e104f5ede2..0000000000 --- a/tools/cloud-build/daily-tests/tests/slurm-v5-ubuntu.yml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -test_name: hpc-slurm-ubuntu2004 -deployment_name: "ubun-v5-{{ build }}" -# Manually adding the slurm_cluster_name for use in node names, which filters -# non-alphanumeric chars and is capped at 10 chars. -slurm_cluster_name: "ubunv5{{ build[0:4] }}" -zone: us-west4-c -workspace: /workspace -blueprint_yaml: "{{ workspace }}/community/examples/hpc-slurm-ubuntu2004-v5-legacy.yaml" -network: "{{ deployment_name }}-net" -# Note: Pattern matching in gcloud only supports 1 wildcard, centv5*-login-* won't work. -login_node: "{{ slurm_cluster_name }}-login-*" -controller_node: "{{ slurm_cluster_name }}-controller" -post_deploy_tests: -- test-validation/test-mounts.yml -- test-validation/test-partitions.yml -custom_vars: - partitions: - - compute - - debug - mounts: - - /home -cli_deployment_vars: - network_name: "{{ network }}" -wait_for_compute_nodes_to_go_down: true From d56d8a276b35f405a94dcd8f60c78624cb439471 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 18:31:31 +0000 Subject: [PATCH 084/140] Bump github.com/go-git/go-billy/v5 from 5.6.0 to 5.6.1 Bumps [github.com/go-git/go-billy/v5](https://github.com/go-git/go-billy) from 5.6.0 to 5.6.1. - [Release notes](https://github.com/go-git/go-billy/releases) - [Commits](https://github.com/go-git/go-billy/compare/v5.6.0...v5.6.1) --- updated-dependencies: - dependency-name: github.com/go-git/go-billy/v5 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- go.mod | 8 ++++---- go.sum | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/go.mod b/go.mod index 8e9d4e4d7c..6e9502407e 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( require ( github.com/fatih/color v1.18.0 - github.com/go-git/go-billy/v5 v5.6.0 + github.com/go-git/go-billy/v5 v5.6.1 github.com/google/go-cmp v0.6.0 github.com/hashicorp/terraform-exec v0.21.0 github.com/mattn/go-isatty v0.0.20 @@ -35,7 +35,7 @@ require ( cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect dario.cat/mergo v1.0.0 // indirect github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect - github.com/cyphar/filepath-securejoin v0.2.5 // indirect + github.com/cyphar/filepath-securejoin v0.3.6 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.1 // indirect @@ -44,7 +44,7 @@ require ( github.com/hashicorp/terraform-json v0.22.1 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/rogpeppe/go-internal v1.11.0 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect go.opentelemetry.io/otel v1.24.0 // indirect @@ -96,7 +96,7 @@ require ( github.com/xanzy/ssh-agent v0.3.3 // indirect go.opencensus.io v0.24.0 // indirect golang.org/x/crypto v0.31.0 // indirect - golang.org/x/net v0.27.0 // indirect + golang.org/x/net v0.33.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sys v0.28.0 golang.org/x/text v0.21.0 // indirect diff --git a/go.sum b/go.sum index bdf95899c5..f976fd23a8 100644 --- a/go.sum +++ b/go.sum @@ -231,8 +231,8 @@ github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWH github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/cyphar/filepath-securejoin v0.2.5 h1:6iR5tXJ/e6tJZzzdMc1km3Sa7RRIVBKAK32O2s7AYfo= -github.com/cyphar/filepath-securejoin v0.2.5/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= +github.com/cyphar/filepath-securejoin v0.3.6 h1:4d9N5ykBnSp5Xn2JkhocYDkOpURL/18CYMpo6xB9uWM= +github.com/cyphar/filepath-securejoin v0.3.6/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -261,8 +261,8 @@ github.com/gliderlabs/ssh v0.3.7 h1:iV3Bqi942d9huXnzEF2Mt+CY9gLu8DNM4Obd+8bODRE= github.com/gliderlabs/ssh v0.3.7/go.mod h1:zpHEXBstFnQYtGnB8k8kQLol82umzn/2/snG7alWVD8= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= -github.com/go-git/go-billy/v5 v5.6.0 h1:w2hPNtoehvJIxR00Vb4xX94qHQi/ApZfX+nBE2Cjio8= -github.com/go-git/go-billy/v5 v5.6.0/go.mod h1:sFDq7xD3fn3E0GOwUSZqHo9lrkmx8xJhA0ZrfvjBRGM= +github.com/go-git/go-billy/v5 v5.6.1 h1:u+dcrgaguSSkbjzHwelEjc0Yj300NUevrrPphk/SoRA= +github.com/go-git/go-billy/v5 v5.6.1/go.mod h1:0AsLr1z2+Uksi4NlElmMblP5rPcDZNRCD8ujZCRR2BE= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys= @@ -458,8 +458,8 @@ github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1: github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= @@ -484,8 +484,8 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8= github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= @@ -619,8 +619,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.0.0-20220909164309-bea034e7d591/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.0.0-20221014081412-f15817d10f9b/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= -golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= -golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= From 0cd02fce9375d4895c86de9fbbb491d9b87522d6 Mon Sep 17 00:00:00 2001 From: Harsh Thakkar Date: Mon, 6 Jan 2025 15:43:19 +0000 Subject: [PATCH 085/140] Remove slurm-gcp v5 examples and update documentation --- community/examples/AMD/README.md | 4 - .../examples/AMD/hpc-amd-slurm-v5-legacy.yaml | 231 --------- .../hpc-slurm-chromedesktop-v5-legacy.yaml | 119 ----- .../hpc-slurm-local-ssd-v5-legacy.yaml | 109 ----- .../hpc-slurm-ubuntu2004-v5-legacy.yaml | 96 ---- community/examples/htc-slurm-v5-legacy.yaml | 165 ------- .../healthcare-and-life-sciences/README.md | 4 - .../hcls-blueprint-v5-legacy.yaml | 353 -------------- examples/README.md | 455 ------------------ examples/cae/README.md | 4 - examples/cae/cae-slurm-v5-legacy.yaml | 254 ---------- examples/hpc-enterprise-slurm-v5-legacy.yaml | 326 ------------- examples/hpc-slurm-v5-legacy.yaml | 112 ----- examples/image-builder-v5-legacy.yaml | 110 ----- .../a3-highgpu-8g/v5-legacy/README.md | 342 ------------- .../ml-slurm-a3-0-base-v5-legacy.yaml | 61 --- .../ml-slurm-a3-1-image-v5-legacy.yaml | 283 ----------- .../ml-slurm-a3-2-cluster-v5-legacy.yaml | 213 -------- examples/ml-slurm-v5-legacy.yaml | 266 ---------- .../blueprints/lustre-slurm-v5-legacy.yaml | 151 ------ .../test_configs/gpu-v5-legacy.yaml | 189 -------- .../test_configs/node-groups-v5-legacy.yaml | 173 ------- ...lurm-gcp-v5-startup-scripts-v5-legacy.yaml | 123 ----- .../slurm-static-test-v5-legacy.yaml | 100 ---- .../zone-policies-slurm-v5-legacy.yaml | 94 ---- 25 files changed, 4337 deletions(-) delete mode 100644 community/examples/AMD/hpc-amd-slurm-v5-legacy.yaml delete mode 100644 community/examples/hpc-slurm-chromedesktop-v5-legacy.yaml delete mode 100644 community/examples/hpc-slurm-local-ssd-v5-legacy.yaml delete mode 100644 community/examples/hpc-slurm-ubuntu2004-v5-legacy.yaml delete mode 100644 community/examples/htc-slurm-v5-legacy.yaml delete mode 100644 docs/videos/healthcare-and-life-sciences/hcls-blueprint-v5-legacy.yaml delete mode 100644 examples/cae/cae-slurm-v5-legacy.yaml delete mode 100644 examples/hpc-enterprise-slurm-v5-legacy.yaml delete mode 100644 examples/hpc-slurm-v5-legacy.yaml delete mode 100644 examples/image-builder-v5-legacy.yaml delete mode 100644 examples/machine-learning/a3-highgpu-8g/v5-legacy/README.md delete mode 100644 examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-0-base-v5-legacy.yaml delete mode 100644 examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-1-image-v5-legacy.yaml delete mode 100644 examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-2-cluster-v5-legacy.yaml delete mode 100644 examples/ml-slurm-v5-legacy.yaml delete mode 100644 tools/cloud-build/daily-tests/blueprints/lustre-slurm-v5-legacy.yaml delete mode 100644 tools/validate_configs/test_configs/gpu-v5-legacy.yaml delete mode 100644 tools/validate_configs/test_configs/node-groups-v5-legacy.yaml delete mode 100644 tools/validate_configs/test_configs/slurm-gcp-v5-startup-scripts-v5-legacy.yaml delete mode 100644 tools/validate_configs/test_configs/slurm-static-test-v5-legacy.yaml delete mode 100644 tools/validate_configs/test_configs/zone-policies-slurm-v5-legacy.yaml diff --git a/community/examples/AMD/README.md b/community/examples/AMD/README.md index ffc25e2598..38ccda4442 100644 --- a/community/examples/AMD/README.md +++ b/community/examples/AMD/README.md @@ -1,9 +1,5 @@ # AMD solutions for the Cluster Toolkit (formerly HPC Toolkit) -> [!NOTE] -> This document uses Slurm-GCP v6. If you want to use Slurm-GCP v5 version you -> scan refer [blueprint](./hpc-amd-slurm-v5-legacy.yaml) - ## AMD-Optimized Slurm Cluster This example provisions a Slurm cluster using the AMD-based Computed Optimized diff --git a/community/examples/AMD/hpc-amd-slurm-v5-legacy.yaml b/community/examples/AMD/hpc-amd-slurm-v5-legacy.yaml deleted file mode 100644 index c92044511f..0000000000 --- a/community/examples/AMD/hpc-amd-slurm-v5-legacy.yaml +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -blueprint_name: hpc-amd-slurm - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: amd-v5 - region: us-east4 - zone: us-east4-c - -deployment_groups: -- group: primary - modules: - - id: network1 - source: modules/network/vpc - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /home - - - id: swfs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /sw - - - id: spack-setup - source: community/modules/scripts/spack-setup - settings: - install_dir: /sw/spack - spack_ref: v0.18.1 - - - id: spack-execute - source: community/modules/scripts/spack-execute - use: [spack-setup] - settings: - log_file: /var/log/spack.log - data_files: - - destination: /tmp/projections-config.yaml - content: | - modules: - default: - tcl: - hash_length: 0 - all: - conflict: - - '{name}' - projections: - all: '{name}/{version}-{compiler.name}-{compiler.version}' - - destination: /tmp/slurm-external-config.yaml - content: | - packages: - slurm: - externals: - - spec: slurm@22-05-8 - prefix: /usr/local - buildable: False - - destination: /sw/spack/openfoam_env.yaml - content: | - spack: - definitions: - - compilers: - - gcc@10.3.0 - - mpis: - - openmpi@4.1.3+legacylaunchers+pmi fabrics=none schedulers=slurm - - packages: - - flex@2.6.4 - - mpi_packages: - - openfoam-org@8 ^flex@2.6.4 target=zen3 - specs: - - matrix: - - - $mpis - - - $%compilers - - matrix: - - - $packages - - - $%compilers - - matrix: - - - $mpi_packages - - - $%compilers - - - $^mpis - concretizer: - unify: when_possible - commands: | - spack config --scope defaults add config:build_stage:/opt/spack_build_stage - spack config --scope defaults add -f /tmp/projections-config.yaml - spack config --scope site add -f /tmp/slurm-external-config.yaml - spack config --scope site add concretizer:targets:host_compatible:false - - # gcc 12.1.0 is known to have runtime failures with OpenFOAM 8 - # gcc 10.3.0 is the earliest copy of gcc with Zen 3 support - spack install gcc@10.3.0 %gcc@4.8.5 target=x86_64 - spack load gcc@10.3.0 %gcc@4.8.5 target=x86_64 - spack compiler find --scope site - - if ! spack env list | grep -q openfoam; then - spack env create openfoam /sw/spack/openfoam_env.yaml - spack env activate openfoam - spack concretize - spack install - fi - - - id: spack-startup - source: modules/scripts/startup-script - settings: - runners: - - $(spack-execute.spack_runner) - - type: shell - destination: shutdown.sh - content: | - #!/bin/bash - if [ ! -f /etc/block_auto_shutdown ]; then - touch /etc/block_auto_shutdown - shutdown -h +1 - fi - - - id: slurm_startup - source: modules/scripts/startup-script - settings: - runners: - - $(spack-setup.spack_runner) - # the following installation of AOCC may be automated in the future - # with a clear direction to the user to read the EULA at - # https://developer.amd.com/aocc-compiler-eula/ - - type: data - destination: /var/tmp/install_aocc.sh - content: | - #!/bin/bash - source /sw/spack/share/spack/setup-env.sh - spack install aocc@3.2.0 +license-agreed - spack load aocc@3.2.0 - spack compiler find --scope site - spack -d install -v openmpi@4.1.3 %aocc@3.2.0 +legacylaunchers +pmi schedulers=slurm - - type: data - destination: /var/tmp/openfoam_test.sh - content: | - #!/bin/bash - # the following line works around a problem activating environments - # before directory is accessed - ls -lha /sw/spack/var/spack/environments/openfoam/ &>/dev/null - spack env activate openfoam - DIR=$HOME/openfoam_test - mkdir -p $DIR - cd $DIR - cp -fr $WM_PROJECT_DIR/tutorials/incompressible/simpleFoam/motorBike . - cd motorBike - ./Allrun - - - id: spack_builder - source: modules/compute/vm-instance - use: [network1, swfs, spack-startup] - settings: - name_prefix: spack-builder - machine_type: c2d-standard-16 - disable_public_ips: true - instance_image: - # these images must match the images used by Slurm modules below because - # we are building OpenMPI with PMI support in libraries contained in - # Slurm installation - family: slurm-gcp-5-12-hpc-centos-7 - project: schedmd-slurm-public - - - id: low_cost_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - machine_type: c2d-standard-4 - node_count_dynamic_max: 10 - bandwidth_tier: gvnic_enabled - - - id: low_cost_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - low_cost_node_group - settings: - partition_name: lowcost - enable_placement: false - - - id: compute_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - machine_type: c2d-standard-112 - node_count_dynamic_max: 50 - bandwidth_tier: gvnic_enabled - - # because is_default is set to true, jobs will run on this partition unless an - # alternative partition is specified using, for example, "srun -p lowcost" - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - compute_node_group - settings: - partition_name: compute - enable_placement: true - is_default: true - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - homefs - - swfs - - low_cost_partition - - compute_partition - settings: - machine_type: c2d-standard-4 - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - - slurm_startup - settings: - # need at least 8 physical cores to run OpenFOAM test - machine_type: c2d-standard-16 diff --git a/community/examples/hpc-slurm-chromedesktop-v5-legacy.yaml b/community/examples/hpc-slurm-chromedesktop-v5-legacy.yaml deleted file mode 100644 index 1a223d55cc..0000000000 --- a/community/examples/hpc-slurm-chromedesktop-v5-legacy.yaml +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: slurm-crd - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-crd-01 - region: us-central1 - zone: us-central1-c - instance_image_crd: - family: slurm-gcp-5-12-debian-11 - project: schedmd-slurm-public - instance_image: - family: slurm-gcp-5-12-hpc-centos-7 - project: schedmd-slurm-public - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md - -deployment_groups: -- group: primary - modules: - - id: network1 - source: modules/network/vpc - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /home - - - id: remote-desktop - source: community/modules/remote-desktop/chrome-remote-desktop - use: [network1] - settings: - install_nvidia_driver: true - # instance_count: 0 will create installation scripts only - # which can be used with slurm node provisioning - instance_count: 0 - - - id: crd_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - machine_type: n1-standard-8 - node_count_dynamic_max: 3 - disable_public_ips: false - instance_image: $(vars.instance_image_crd) - instance_image_custom: true - guest_accelerator: - - type: nvidia-tesla-t4-vws - count: 1 - - - id: crd_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - remote-desktop - - crd_node_group - settings: - partition_name: desktop - enable_placement: false - partition_startup_scripts_timeout: 900 - - - id: compute_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - machine_type: n2d-standard-16 - node_count_dynamic_max: 20 - bandwidth_tier: gvnic_enabled - - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - compute_node_group - settings: - partition_name: compute - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - homefs - - crd_partition - - compute_partition - settings: - disable_controller_public_ips: false - compute_startup_scripts_timeout: 900 - cloud_parameters: - resume_rate: 0 - resume_timeout: 900 - suspend_rate: 0 - suspend_timeout: 300 - no_comma_params: false - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - machine_type: n2d-standard-4 - disable_login_public_ips: false diff --git a/community/examples/hpc-slurm-local-ssd-v5-legacy.yaml b/community/examples/hpc-slurm-local-ssd-v5-legacy.yaml deleted file mode 100644 index 08e39819b7..0000000000 --- a/community/examples/hpc-slurm-local-ssd-v5-legacy.yaml +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: hpc-slurm-local-ssd - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: hpc-localssd - region: us-central1 - zone: us-central1-a - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md - -deployment_groups: -- group: primary - modules: - - id: network1 - source: modules/network/pre-existing-vpc - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /home - - - id: startup - source: modules/scripts/startup-script - settings: - # When shutting down a VM with local SSD disks, we strongly recommend the - # automatic migration of data following these instructions: - # https://cloud.google.com/compute/docs/disks/local-ssd#stop_instance - # Failure to do will result in VMs that lose data and do not automatically - # mount local SSD filesystems - local_ssd_filesystem: - fs_type: ext4 - mountpoint: /mnt/localssd - permissions: "1777" # must quote numeric filesystem permissions! - - - id: compute_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - additional_disks: - - device_name: test-disk-1 - disk_name: null - disk_size_gb: 375 - disk_type: local-ssd - disk_labels: {} - auto_delete: true - boot: false - - device_name: test-disk-2 - disk_name: null - disk_size_gb: 375 - disk_type: local-ssd - disk_labels: {} - auto_delete: true - boot: false - bandwidth_tier: gvnic_enabled - machine_type: c2-standard-4 - node_count_dynamic_max: 5 - node_count_static: 0 - - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - compute_node_group - - startup - settings: - is_default: true - partition_name: ssdcomp - region: us-central1 - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - homefs - - compute_partition - settings: - cloud_parameters: - resume_rate: 0 - resume_timeout: 300 - suspend_rate: 0 - suspend_timeout: 300 - no_comma_params: false - machine_type: n1-standard-4 - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - machine_type: n1-standard-4 diff --git a/community/examples/hpc-slurm-ubuntu2004-v5-legacy.yaml b/community/examples/hpc-slurm-ubuntu2004-v5-legacy.yaml deleted file mode 100644 index 916fcde74b..0000000000 --- a/community/examples/hpc-slurm-ubuntu2004-v5-legacy.yaml +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: hpc-slurm-ubuntu2004 - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-gcp-v5 - region: us-west4 - zone: us-west4-c - instance_image: - # Please refer to the following link for the latest images: - # https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/images.md#supported-operating-systems - family: slurm-gcp-5-12-ubuntu-2004-lts - project: schedmd-slurm-public - instance_image_custom: true - - -deployment_groups: -- group: primary - modules: - # Source is an embedded module, denoted by "modules/*" without ./, ../, / - # as a prefix. To refer to a local module, prefix with ./, ../ or / - - id: network1 - source: modules/network/vpc - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /home - - - id: debug_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 4 - machine_type: n2-standard-2 - - - id: debug_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - debug_node_group - settings: - partition_name: debug - exclusive: false # allows nodes to stay up after jobs are done - enable_placement: false # the default is: true - is_default: true - - - id: compute_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - bandwidth_tier: gvnic_enabled - - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - compute_node_group - settings: - partition_name: compute - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - debug_partition - - compute_partition - - homefs - settings: - disable_controller_public_ips: false - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - machine_type: n2-standard-4 - disable_login_public_ips: false diff --git a/community/examples/htc-slurm-v5-legacy.yaml b/community/examples/htc-slurm-v5-legacy.yaml deleted file mode 100644 index 1089cf9904..0000000000 --- a/community/examples/htc-slurm-v5-legacy.yaml +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright 2022 Google LLC -# Copyright (C) SchedMD LLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -# This blueprint provisions a cluster using the Slurm scheduler configured to -# efficiently run many short duration, loosely-coupled (non-MPI) jobs. See also: -# https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/htc.md -# https://slurm.schedmd.com/high_throughput.html - -blueprint_name: htc-slurm - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: htc-slurm - region: us-west4 - zone: us-west4-c - # By default, public IPs are set in the login and controller to allow easier - # SSH access. To turn this behavior off, set this to true. - disable_public_ips: false - # Stage `community/modules/scheduler/schedmd-slurm-gcp-v5-controller/etc/*` into the deployment folder. - # If you move the blueprint, make sure the relative path is correct. - staged_configs: $(ghpc_stage("../modules/scheduler/schedmd-slurm-gcp-v5-controller/etc")) - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md - -deployment_groups: -- group: primary - modules: - # Source is an embedded module, denoted by "modules/*" without ./, ../, / - # as a prefix. To refer to a local or community module, prefix with ./, ../ or / - - id: network1 - source: modules/network/vpc - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /home - - - id: projectsfs - source: modules/file-system/filestore - use: [network1] - settings: - filestore_tier: HIGH_SCALE_SSD - size_gb: 10240 - local_mount: /projects - - # This file system has an associated license cost. - # https://console.developers.google.com/marketplace/product/ddnstorage/exascaler-cloud - - id: scratchfs - source: community/modules/file-system/DDN-EXAScaler - use: [network1] - settings: - local_mount: /scratch - - # The compute partition is designed for performance. - # Use: - # `srun -N 4 -p compute <>` for any node in the partition. - # `srun -N 4 -p compute --mincpus 30 <>` for node group c2s60. - - - id: compute_node_group_c2s60 - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: c2s60 - node_count_dynamic_max: 200 - bandwidth_tier: gvnic_enabled - - - id: compute_node_group_c2s30 - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: c2s30 - node_count_dynamic_max: 200 - machine_type: c2-standard-30 - bandwidth_tier: gvnic_enabled - - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - scratchfs - - projectsfs - - compute_node_group_c2s60 - - compute_node_group_c2s30 - settings: - partition_name: compute - enable_placement: false - exclusive: false - - # The lowcost partition is designed to run at a lower cost and without additional quota - # Use: - # `srun -N 4 <>` for any node in the partition. - # `srun -N 4 --mincpus 2` for node group n2s4. - - id: low_cost_node_group_n2s2 - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: n2s2 - machine_type: n2-standard-2 - node_count_dynamic_max: 10 - bandwidth_tier: gvnic_enabled - - - id: low_cost_node_group_n2s4 - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: n2s4 - machine_type: n2-standard-4 - node_count_dynamic_max: 10 - bandwidth_tier: gvnic_enabled - - - id: low_cost_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - scratchfs - - projectsfs - - low_cost_node_group_n2s2 - - low_cost_node_group_n2s4 - settings: - is_default: true - partition_name: lowcost - enable_placement: false - exclusive: false - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - homefs - - scratchfs - - projectsfs - - low_cost_partition - - compute_partition - settings: - machine_type: c2-standard-8 - disable_controller_public_ips: $(vars.disable_public_ips) - slurm_conf_tpl: $(vars.staged_configs)/htc-slurm.conf.tpl - slurmdbd_conf_tpl: $(vars.staged_configs)/htc-slurmdbd.conf.tpl - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - machine_type: n2-standard-4 - disable_login_public_ips: $(vars.disable_public_ips) - - - id: hpc_dashboard - source: modules/monitoring/dashboard - outputs: [instructions] diff --git a/docs/videos/healthcare-and-life-sciences/README.md b/docs/videos/healthcare-and-life-sciences/README.md index de76730b9c..3bd9d7949f 100644 --- a/docs/videos/healthcare-and-life-sciences/README.md +++ b/docs/videos/healthcare-and-life-sciences/README.md @@ -1,9 +1,5 @@ # Healthcare and Life Science Blueprint -> [!NOTE] -> This document uses SlurmGCP v6 version of hcls blueprint. If you want to -> use SlurmGCP v5 version, please refer to this [blueprint](./hcls-blueprint-v5-legacy.yaml). - This folder captures an advanced architecture that can be used to run GROMACS with GPUs or CPUs on Google Cloud. diff --git a/docs/videos/healthcare-and-life-sciences/hcls-blueprint-v5-legacy.yaml b/docs/videos/healthcare-and-life-sciences/hcls-blueprint-v5-legacy.yaml deleted file mode 100644 index 7df8f8ee5a..0000000000 --- a/docs/videos/healthcare-and-life-sciences/hcls-blueprint-v5-legacy.yaml +++ /dev/null @@ -1,353 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: hcls-cluster-v5 - -validators: -- validator: test_apis_enabled - skip: true # skipping this validator, since "service-enablement" will take care of it. - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: hcls-01 - region: us-central1 - zone: us-central1-c - bucket_force_destroy: false - -deployment_groups: -- group: enable_apis - modules: - - ### Enable APIs ### - - - id: services-api - source: community/modules/project/service-enablement - settings: - gcp_service_list: - - file.googleapis.com - - iam.googleapis.com - - pubsub.googleapis.com - - secretmanager.googleapis.com - - serviceusage.googleapis.com - - compute.googleapis.com - - stackdriver.googleapis.com - -- group: setup - modules: - - ### Network ### - - - id: network1 - source: modules/network/vpc - - ### Resource Monitoring ### - - - id: hpc-dash - source: modules/monitoring/dashboard - - ### Storage ### - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - filestore_share_name: homeshare - local_mount: /home - - - id: appsfs - source: modules/file-system/filestore - use: [network1] - settings: - filestore_share_name: appsshare - local_mount: /apps - - - id: bucket-software - source: community/modules/file-system/cloud-storage-bucket - settings: - name_prefix: hcls-user-provided-software - random_suffix: true - local_mount: /user_provided_software - force_destroy: $(vars.bucket_force_destroy) - outputs: [gcs_bucket_path] - - - id: bucket-input - source: community/modules/file-system/cloud-storage-bucket - settings: - name_prefix: hcls-inputs - random_suffix: true - local_mount: /data_input - mount_options: defaults,_netdev,implicit_dirs,allow_other,dir_mode=0777,file_mode=766 - force_destroy: $(vars.bucket_force_destroy) - - - id: bucket-output - source: community/modules/file-system/cloud-storage-bucket - settings: - name_prefix: hcls-outputs - random_suffix: true - local_mount: /data_output - mount_options: defaults,_netdev,implicit_dirs,allow_other,dir_mode=0777,file_mode=766 - force_destroy: $(vars.bucket_force_destroy) - -- group: software_installation - modules: - - ### Software ### - - - id: spack-setup - source: community/modules/scripts/spack-setup - settings: - install_dir: /apps/spack - - - id: spack-execute - source: community/modules/scripts/spack-execute - use: [spack-setup] - settings: - data_files: - - destination: /tmp/projections-config.yaml - content: | - modules: - default: - tcl: - hash_length: 0 - all: - conflict: - - '{name}' - projections: - all: '{name}/{version}-{compiler.name}-{compiler.version}' - - destination: /tmp/slurm-external-config.yaml - content: | - packages: - slurm: - externals: - - spec: slurm@21-08-8-2 - prefix: /usr/local - buildable: False - - destination: /share/spack/gromacs_env.yaml - content: | - spack: - definitions: - - compilers: - - gcc@11.3.0 - - cudas: - - cuda@11.8.0 - - cuda_mpis: - - openmpi@4.1.4+cuda - - mpi_cuda_packages: - - gromacs@2022.3+cuda+mpi - specs: - - $compilers - - matrix: - - [$cudas] - - [$%compilers] - - matrix: - - [$cuda_mpis] - - [$%compilers] - - [$^cudas] - - [target=skylake] - - matrix: - - [$mpi_cuda_packages] - - [$^cudas] - - [$^cuda_mpis] - - [$%compilers] - - [target=skylake] - commands: | - spack config --scope defaults add config:build_stage:/apps/spack/spack-stage - spack config --scope defaults add -f /tmp/projections-config.yaml - spack config --scope site add -f /tmp/slurm-external-config.yaml - - NVCC_PREPEND_FLAGS='-arch=all' - spack install gcc@11.3.0 target=x86_64 - spack load gcc@11.3.0 target=x86_64 - spack compiler find --scope site - - if ! spack env list | grep -q gromacs; then - spack env create gromacs /share/spack/gromacs_env.yaml - spack env activate gromacs - spack concretize - spack install - fi - - - id: spack-builder-startup - source: modules/scripts/startup-script - settings: - runners: - - $(spack-execute.spack_runner) - - - type: shell - destination: data_staging.sh - content: | - #!/bin/bash - wget --no-verbose -P /data_input/protein_data_bank/ https://files.rcsb.org/download/1AKI.pdb - wget --no-verbose -P /tmp/ https://ftp.gromacs.org/pub/benchmarks/water_GMX50_bare.tar.gz && \ - mkdir -p /data_input/gromacs_inputs/ && \ - tar xzf /tmp/water_GMX50_bare.tar.gz -C /data_input/gromacs_inputs/ && \ - rm /tmp/water_GMX50_bare.tar.gz - - # Set permissions for Spack environment - chmod -R a+rwX /apps/spack/var/spack/environments/gromacs - - - type: data - destination: /apps/gromacs/submit_gromacs_water_cpu.sh - content: | - #!/bin/bash - #SBATCH -N 1 - #SBATCH --ntasks-per-node 30 - #SBATCH -p compute - - # Size can be 0000.65 0000.96 0001.5 0003 0006 0012 0024 0048 0096 0192 0384 0768 1536 3072 - # Type can be 'pme' or 'rf' - - source /apps/spack/share/spack/setup-env.sh - spack env activate gromacs - - # Check that gmx_mpi exists - which gmx_mpi - cd $SLURM_SUBMIT_DIR - cp /data_input/gromacs_inputs/water-cut1.0_GMX50_bare/1536/* . - mpirun -n 1 gmx_mpi grompp -f pme.mdp -c conf.gro -p topol.top -o input.tpr - mpirun -n 30 gmx_mpi mdrun -notunepme -dlb yes -v -resethway -noconfout -nsteps 4000 -s input.tpr - - - type: data - destination: /apps/gromacs/submit_gromacs_water_gpu.sh - content: | - #!/bin/bash - #SBATCH -N 1 - #SBATCH --ntasks-per-node 1 - #SBATCH -p gpu - #SBATCH --gpus 1 - - # Size can be 0000.65 0000.96 0001.5 0003 0006 0012 0024 0048 0096 0192 0384 0768 1536 3072 - # Type can be 'pme' or 'rf' - - source /apps/spack/share/spack/setup-env.sh - spack env activate gromacs - - # Check that gmx_mpi exists - which gmx_mpi - cd $SLURM_SUBMIT_DIR - cp /data_input/gromacs_inputs/water-cut1.0_GMX50_bare/1536/* . - - # Significant GPU Optimizations only support constraints=h-bonds - # so we change this here for the water benchmark. - for a in *.mdp; do - sed -i 's/constraints[[:blank:]].*=.*all-bonds.*/constraints = h-bonds/' $a - done - mpirun -n 1 gmx_mpi grompp -f pme.mdp -c conf.gro -p topol.top -o input.tpr - - mpirun -n 1 -H localhost \ - env GMX_ENABLE_DIRECT_GPU_COMM=1 \ - gmx_mpi mdrun -v -nsteps 100000 -resetstep 90000 -noconfout \ - -pme gpu -update gpu -nb gpu -gputasks 00 -s input.tpr - - - type: shell - destination: shutdown.sh - content: | - #!/bin/bash - if [ ! -f /etc/block_auto_shutdown ]; then - touch /etc/block_auto_shutdown - shutdown -h +1 - fi - - - id: spack-builder - source: modules/compute/vm-instance - use: [network1, appsfs, bucket-input, spack-builder-startup] - settings: - name_prefix: spack-builder - add_deployment_name_before_prefix: true - threads_per_core: 2 - machine_type: c2-standard-16 - -- group: cluster - modules: - - ### Remote Desktop ### - - - id: desktop - source: community/modules/remote-desktop/chrome-remote-desktop - use: - - network1 - - homefs - - appsfs - - bucket-input - - bucket-output - - bucket-software - settings: - add_deployment_name_before_prefix: true - name_prefix: chrome-remote-desktop - install_nvidia_driver: true - startup_script: | - find /user_provided_software -name vmd-1.9.*.bin.LINUXAMD64*.tar.gz -exec tar xvzf '{}' -C . \; - cd vmd-1.9.*/ - ./configure - cd src/ - sudo make install - - ### Slurm Cluster ### - - - id: compute_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - machine_type: c2-standard-60 - - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - compute_node_group - - homefs - - appsfs - - bucket-input - - bucket-output - settings: - partition_name: compute - - - id: gpu_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - enable_smt: true - node_count_dynamic_max: 20 - machine_type: a2-highgpu-1g - - - id: gpu_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - gpu_node_group - - homefs - - appsfs - - bucket-input - - bucket-output - settings: - partition_name: gpu - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - compute_partition - - gpu_partition - - homefs - - appsfs - - bucket-input - - bucket-output - settings: - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller diff --git a/examples/README.md b/examples/README.md index 92031bf130..9890286175 100644 --- a/examples/README.md +++ b/examples/README.md @@ -17,34 +17,26 @@ md_toc github examples/README.md | sed -e "s/\s-\s/ * /" * [(Optional) Setting up a remote terraform state](#optional-setting-up-a-remote-terraform-state) * [Completed Migration to Slurm-GCP v6](#completed-migration-to-slurm-gcp-v6) * [Blueprint Descriptions](#blueprint-descriptions) - * [hpc-slurm-v5-legacy.yaml](#hpc-slurm-v5-legacyyaml--) ![core-badge] ![deprecated-badge] * [hpc-slurm.yaml](#hpc-slurmyaml-) ![core-badge] - * [hpc-enterprise-slurm-v5-legacy.yaml](#hpc-enterprise-slurm-v5-legacyyaml--) ![core-badge] ![deprecated-badge] * [hpc-enterprise-slurm.yaml](#hpc-enterprise-slurmyaml-) ![core-badge] * [hpc-slurm-static.yaml](#hpc-slurm-staticyaml-) ![core-badge] * [hpc-slurm6-tpu.yaml](#hpc-slurm6-tpuyaml--) ![community-badge] ![experimental-badge] * [hpc-slurm6-tpu-maxtext.yaml](#hpc-slurm6-tpu-maxtextyaml--) ![community-badge] ![experimental-badge] * [hpc-slurm6-apptainer.yaml](#hpc-slurm6-apptaineryaml--) ![community-badge] ![experimental-badge] - * [ml-slurm-v5-legacy.yaml](#ml-slurm-v5-legacyyaml--) ![core-badge] ![deprecated-badge] * [ml-slurm.yaml](#ml-slurmyaml-) ![core-badge] - * [image-builder-v5-legacy.yaml](#image-builder-v5-legacyyaml--) ![core-badge] ![deprecated-badge] * [image-builder.yaml](#image-builderyaml-) ![core-badge] * [serverless-batch.yaml](#serverless-batchyaml-) ![core-badge] * [serverless-batch-mpi.yaml](#serverless-batch-mpiyaml-) ![core-badge] * [pfs-lustre.yaml](#pfs-lustreyaml-) ![core-badge] * [ps-slurm.yaml](#ps-slurmyaml--) ![core-badge] ![experimental-badge] * [pfs-parallelstore.yaml](#pfs-parallelstoreyaml--) ![core-badge] ![experimental-badge] - * [cae-slurm-v5-legacy.yaml](#cae-slurm-v5-legacyyaml--) ![core-badge] ![deprecated-badge] * [cae-slurm.yaml](#cae-slurmyaml-) ![core-badge] * [hpc-build-slurm-image.yaml](#hpc-build-slurm-imageyaml--) ![community-badge] ![experimental-badge] - * [hpc-slurm-ubuntu2004-v5-legacy.yaml](#hpc-slurm-ubuntu2004-v5-legacyyaml--) ![community-badge] ![deprecated-badge] * [hpc-slurm-ubuntu2004.yaml](#hpc-slurm-ubuntu2004yaml--) ![community-badge] - * [hpc-amd-slurm-v5-legacy.yaml](#hpc-amd-slurm-v5-legacyyaml--) ![community-badge] ![deprecated-badge] * [hpc-amd-slurm.yaml](#hpc-amd-slurmyaml-) ![community-badge] * [hpc-slurm-sharedvpc.yaml](#hpc-slurm-sharedvpcyaml--) ![community-badge] ![experimental-badge] * [client-google-cloud-storage.yaml](#client-google-cloud-storageyaml--) ![community-badge] ![experimental-badge] * [hpc-slurm-gromacs.yaml](#hpc-slurm-gromacsyaml--) ![community-badge] ![experimental-badge] - * [hpc-slurm-local-ssd-v5-legacy.yaml](#hpc-slurm-local-ssd-v5-legacyyaml---) ![community-badge] ![experimental-badge] ![deprecated-badge] * [hpc-slurm-local-ssd.yaml](#hpc-slurm-local-ssdyaml--) ![community-badge] ![experimental-badge] * [hcls-blueprint.yaml](#hcls-blueprintyaml-) ![core-badge] * [hpc-gke.yaml](#hpc-gkeyaml--) ![core-badge] ![experimental-badge] @@ -52,14 +44,12 @@ md_toc github examples/README.md | sed -e "s/\s-\s/ * /" * [storage-gke](#storage-gkeyaml--) ![core-badge] ![experimental-badge] * [gke-a3-megagpu](#gke-a3-megagpuyaml--) ![core-badge] ![experimental-badge] * [gke-a3-highgpu](#gke-a3-highgpuyaml--) ![core-badge] ![experimental-badge] - * [htc-slurm-v5-legacy.yaml](#htc-slurm-v5-legacyyaml---) ![community-badge] ![experimental-badge] ![deprecated-badge] * [htc-slurm.yaml](#htc-slurmyaml-) ![community-badge] * [htc-htcondor.yaml](#htc-htcondoryaml--) ![community-badge] ![experimental-badge] * [fsi-montecarlo-on-batch.yaml](#fsi-montecarlo-on-batchyaml-) ![community-badge] ![experimental-badge] * [tutorial-starccm-slurm.yaml](#tutorial-starccm-slurmyaml--) ![community-badge] ![experimental-badge] * [tutorial-starccm.yaml](#tutorial-starccmyaml--) ![community-badge] ![experimental-badge] * [hpc-slurm-ramble-gromacs.yaml](#hpc-slurm-ramble-gromacsyaml--) ![community-badge] ![experimental-badge] - * [hpc-slurm-chromedesktop-v5-legacy.yaml](#hpc-slurm-chromedesktop-v5-legacyyaml---) ![community-badge] ![experimental-badge] ![deprecated-badge] * [flux-cluster](#flux-clusteryaml--) ![community-badge] ![experimental-badge] * [tutorial-fluent.yaml](#tutorial-fluentyaml--) ![community-badge] ![experimental-badge] * [omnia-cluster.yaml](#omnia-clusteryaml---) ![community-badge] ![experimental-badge] ![deprecated-badge] @@ -208,65 +198,6 @@ Toolkit team, partners, etc.) and are labeled with the community badge Blueprints that are still in development and less stable are also labeled with the experimental badge (![experimental-badge]). -### [hpc-slurm-v5-legacy.yaml] ![core-badge] ![deprecated-badge] - -> **Warning**: The variables `enable_reconfigure`, -> `enable_cleanup_compute`, and `enable_cleanup_subscriptions`, if set to -> `true`, require additional dependencies **to be installed on the system deploying the infrastructure**. -> -> ```shell -> # Install Python3 and run -> pip3 install -r https://raw.githubusercontent.com/GoogleCloudPlatform/slurm-gcp/5.12.2/scripts/requirements.txt -> ``` - -Creates a basic auto-scaling Slurm cluster with mostly default settings. The -blueprint also creates a new VPC network, and a filestore instance mounted to -`/home`. - -There are 3 partitions in this example: `debug` `compute`, and `h3`. The `debug` -partition uses `n2-standard-2` VMs, which should work out of the box without -needing to request additional quota. The purpose of the `debug` partition is to -make sure that first time users are not immediately blocked by quota -limitations. - -[hpc-slurm-v5-legacy.yaml]: ./hpc-slurm-v5-legacy.yaml - -#### Compute Partition - -There is a `compute` partition that achieves higher performance. Any -performance analysis should be done on the `compute` partition. By default it -uses `c2-standard-60` VMs with placement groups enabled. You may need to request -additional quota for `C2 CPUs` in the region you are deploying in. You can -select the compute partition using the `-p compute` argument when running `srun`. - -#### H3 Partition - -There is an `h3` partition that uses compute-optimized `h3-standard-88` machine type. -You can read more about the H3 machine series [here](https://cloud.google.com/compute/docs/compute-optimized-machines#h3_series). - -#### Quota Requirements for hpc-slurm-v5-legacy.yaml - -For this example the following is needed in the selected region: - -* Cloud Filestore API: Basic HDD (Standard) capacity (GB): **1,024 GB** -* Compute Engine API: Persistent Disk SSD (GB): **~50 GB** -* Compute Engine API: Persistent Disk Standard (GB): **~50 GB static + 50 - GB/node** up to 1,250 GB -* Compute Engine API: N2 CPUs: **2** for the login node and **2/node** active - in the `debug` partition up to 12 -* Compute Engine API: C2 CPUs: **4** for the controller node and **60/node** - active in the `compute` partition up to 1,204 -* Compute Engine API: H3 CPUs: **88/node** active in the `h3` partition up to - 1760 - * The H3 CPU quota can be increased on the Cloud Console by navigating to - `IAM & Admin`->`Quotas` or searching `All Quotas` and entering `vm_family:H3` - into the filter bar. From there, the quotas for each region may be selected - and edited. -* Compute Engine API: Affinity Groups: **one for each job in parallel** - _only - needed for the `compute` partition_ -* Compute Engine API: Resource policies: **one for each job in parallel** - - _only needed for the `compute` partition_ - ### [hpc-slurm.yaml] ![core-badge] Creates a basic auto-scaling Slurm cluster with mostly default settings. The @@ -317,96 +248,6 @@ For this example the following is needed in the selected region: * Compute Engine API: Resource policies: **one for each job in parallel** - _only needed for the `compute` partition_ -### [hpc-enterprise-slurm-v5-legacy.yaml] ![core-badge] ![deprecated-badge] - -This advanced blueprint creates a cluster with Slurm with several performance -tunings enabled, along with tiered file systems for higher performance. Some of -these features come with additional cost and required additional quotas. - -The Slurm system deployed here connects to the default VPC of the project and -creates a login node and the following seven partitions: - -* `n2` with general-purpose [`n2-stardard-2` nodes][n2]. Placement policies and -exclusive usage are disabled, which means the nodes can be used for multiple jobs. -Nodes will remain idle for 5 minutes before Slurm deletes them. This partition can -be used for debugging and workloads that do not require high performance. -* `c2` with compute-optimized [`c2-standard-60` nodes][c2] based on Intel 3.9 GHz -Cascade Lake processors. -* `c2d` with compute optimized [`c2d-standard-112` nodes][c2d] base on the third -generation AMD EPYC Milan. -* `c3` with compute-optimized [`c3-highcpu-176` nodes][c3] based on Intel Sapphire -Rapids processors. When configured with Tier_1 networking, C3 nodes feature 200 Gbps -low-latency networking. -* `h3` with compute-optimized [`h3-standard-88` nodes][h3] based on Intel Sapphire -Rapids processors. H3 VMs can use the entire host network bandwidth and come with a default network bandwidth rate of up to 200 Gbps. -* `a208` with [`a2-ultragpu-8g` nodes][a2] with 8 of the NVIDIA A100 GPU accelerators -with 80GB of GPU memory each. -* `a216` with [`a2-megagpu-16g` nodes][a2] with 16 of the NVIDIA A100 GPU accelerators -with 40GB of GPU memory each. - -For all partitions other than `n2`, [compact placement] policies are enabled by default -and nodes are created and destroyed on a per-job basis. Furthermore, these partitions -are configured with: - -* Faster networking: Google Virtual NIC ([GVNIC]) is used for the GPU partitions and -[Tier_1] is selected when available. Selecting Tier_1 automatically enables GVNIC. -* SSD PDs disks for compute nodes. See the [Storage options] page for more details. - -[n2]: https://cloud.google.com/compute/docs/general-purpose-machines#n2_series -[c2]: https://cloud.google.com/compute/docs/compute-optimized-machines#c2_machine_types -[c2d]: https://cloud.google.com/compute/docs/compute-optimized-machines#c2d_machine_types -[c3]: https://cloud.google.com/blog/products/compute/introducing-c3-machines-with-googles-custom-intel-ipu -[h3]: https://cloud.google.com/compute/docs/compute-optimized-machines#h3_series -[a2]: https://cloud.google.com/compute/docs/gpus#a100-gpus -[g2]: https://cloud.google.com/compute/docs/gpus#l4-gpus -[compact placement]: https://cloud.google.com/compute/docs/instances/define-instance-placement -[GVNIC]: https://cloud.google.com/compute/docs/networking/using-gvnic -[Tier_1]: https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration -[Storage options]: https://cloud.google.com/compute/docs/disks - -File systems: - -* The homefs mounted at `/home` uses the "BASIC_SSD" tier filestore with - 2.5 TiB of capacity -* The projectsfs is mounted at `/projects` and is a high scale SSD filestore - instance with 10TiB of capacity. -* The scratchfs is mounted at `/scratch` and is a - [DDN Exascaler Lustre](../community/modules/file-system/DDN-EXAScaler/README.md) - file system designed for high IO performance. The capacity is ~10TiB. - -> **Warning**: The DDN Exascaler Lustre file system has a license cost as -> described in the pricing section of the -> [DDN EXAScaler Cloud Marketplace Solution](https://console.developers.google.com/marketplace/product/ddnstorage/). - -#### Quota Requirements for hpc-enterprise-slurm-v5-legacy.yaml - -For this example the following is needed in the selected region: - -* Cloud Filestore API: Basic SSD capacity (GB) per region: **2,560 GB** -* Cloud Filestore API: High Scale SSD capacity (GB) per region: **10,240 GiB** - - _min quota request is 61,440 GiB_ -* Compute Engine API: Persistent Disk SSD (GB): **~14,050 GB** static + - **100 GB/node** up to 23,250 GB -* Compute Engine API: Persistent Disk Standard (GB): **~396 GB** static + - **50 GB/node** up to 596 GB -* Compute Engine API: N2 CPUs: **116** for login and lustre and **2/node** active - in `n2` partition up to 124. -* Compute Engine API: C2 CPUs: **4** for controller node and **60/node** active - in `c2` partition up to 1,204 -* Compute Engine API: C2D CPUs: **112/node** active in `c2d` partition up to 2,240 -* Compute Engine API: C3 CPUs: **176/node** active in `c3` partition up to 3,520 -* Compute Engine API: H3 CPUs: **88/node** active in `h3` partition up to 1,408 -* Compute Engine API: A2 CPUs: **96/node** active in `a208` and `a216` partitions -up to 3,072 -* Compute Engine API: NVIDIA A100 80GB GPUs: **8/node** active in `a208` partition - up to 128 -* Compute Engine API: NVIDIA A100 GPUs: **8/node** active in `a216` partition up -to 256 -* Compute Engine API: Resource policies: **one for each job in parallel** - - _not needed for `n2` partition_ - -[hpc-enterprise-slurm-v5-legacy.yaml]: ./hpc-enterprise-slurm-v5-legacy.yaml - ### [hpc-enterprise-slurm.yaml] ![core-badge] This advanced blueprint creates a cluster with Slurm with several performance @@ -549,55 +390,6 @@ This blueprint creates a custom [Apptainer](https:https://apptainer.org) enabled [hpc-slurm6-apptainer.yaml]: ../community/examples/hpc-slurm6-apptainer.yaml -### [ml-slurm-v5-legacy.yaml] ![core-badge] ![deprecated-badge] - -This blueprint provisions an HPC cluster running the Slurm scheduler with the -machine learning frameworks PyTorch and TensorFlow pre-installed on every -VM. The cluster has 2 partitions: - -* [A2 family VMs][a2] with the NVIDIA A100 GPU accelerator -* [G2 family VMs][g2] with the NVIDIA L4 GPU accelerator - -[a2]: https://cloud.google.com/compute/docs/gpus#a100-gpus -[g2]: https://cloud.google.com/compute/docs/gpus#l4-gpus - -To provision the cluster, please run: - -```text -./gcluster create examples/ml-slurm-v5-legacy.yaml --vars "project_id=${GOOGLE_CLOUD_PROJECT}" -./gcluster deploy ml-example -``` - -After accessing the login node, you can activate the conda environment for each -library with: - -```shell -source /etc/profile.d/conda.sh -# to activate PyTorch -conda activate pytorch -# to activate TensorFlow -conda activate tf -``` - -An example benchmarking job for PyTorch can be run under Slurm: - -```shell -cp /var/tmp/torch_test.* . -sbatch -N 1 --gpus-per-node=1 torch_test.sh -``` - -When you are done, clean up the resources in reverse order of creation: - -```text -./gcluster destroy ml-example -``` - -Finally, browse to the [Cloud Console][console-images] to delete your custom -image. It will be named beginning with `ml-slurm` followed by a date and -timestamp for uniqueness. - -[ml-slurm-v5-legacy.yaml]: ../examples/ml-slurm-v5-legacy.yaml - ### [ml-slurm.yaml] ![core-badge] This blueprint provisions an HPC cluster running the Slurm scheduler with the @@ -647,131 +439,6 @@ timestamp for uniqueness. [ml-slurm.yaml]: ../examples/ml-slurm.yaml -### [image-builder-v5-legacy.yaml] ![core-badge] ![deprecated-badge] - -This blueprint uses the [Packer template module][pkr] to create a custom VM -image and uses it to provision an HPC cluster using the Slurm scheduler. By -using a custom image, the cluster is able to begin running jobs sooner and more -reliably because there is no need to install applications as VMs boot. This -example takes the following steps: - -1. Creates a network with outbound internet access in which to build the image (see -[Custom Network](#custom-network-deployment-group-1)). -2. Creates a script that will be used to customize the image (see -[Toolkit Runners](#toolkit-runners-deployment-group-1)). -3. Builds a custom Slurm image by executing the script on a standard Slurm image -(see [Packer Template](#packer-template-deployment-group-2)). -4. Deploys a Slurm cluster using the custom image (see -[Slurm Cluster Based on Custom Image](#slurm-cluster-based-on-custom-image-deployment-group-3)). - -#### Building and using the custom image - -Create the deployment folder from the blueprint: - -```text -./gcluster create examples/image-builder-v5-legacy.yaml --vars "project_id=${GOOGLE_CLOUD_PROJECT}" -./gcluster deploy image-builder-001" -``` - -Follow the on-screen prompts to approve the creation of each deployment group. -For example, the network is created in the first deployment group, the VM image -is created in the second group, and the third group uses the image to create an -HPC cluster using the Slurm scheduler. - -When you are done, clean up the resources in reverse order of creation: - -```text -terraform -chdir=image-builder-001/cluster destroy --auto-approve -terraform -chdir=image-builder-001/primary destroy --auto-approve -``` - -Finally, browse to the [Cloud Console][console-images] to delete your custom -image. It will be named beginning with `my-slurm-image` followed by a date and -timestamp for uniqueness. - -[console-images]: https://console.cloud.google.com/compute/images - -#### Why use a custom image? - -Using a custom VM image can be more scalable and reliable than installing -software using boot-time startup scripts because: - -* it avoids reliance on continued availability of package repositories -* VMs will join an HPC cluster and execute workloads more rapidly due to reduced - boot-time configuration -* machines are guaranteed to boot with a static software configuration chosen - when the custom image was created. No potential for some machines to have - different software versions installed due to `apt`/`yum`/`pip` installations - executed after remote repositories have been updated. - -[hpcimage]: https://cloud.google.com/compute/docs/instances/create-hpc-vm -[pkr]: ../modules/packer/custom-image/README.md -[image-builder-v5-legacy.yaml]: ./image-builder-v5-legacy.yaml - -#### Custom Network (deployment group 1) - -A tool called [Packer](https://packer.io) builds custom VM images by creating -short-lived VMs, executing scripts on them, and saving the boot disk as an -image that can be used by future VMs. The short-lived VM typically operates in a -network that has outbound access to the internet for downloading software. - -This deployment group creates a network using [Cloud Nat][cloudnat] and -[Identity-Aware Proxy (IAP)][iap] to allow outbound traffic and inbound SSH -connections without exposing the machine to the internet on a public IP address. - -[cloudnat]: https://cloud.google.com/nat/docs/overview -[iap]: https://cloud.google.com/iap/docs/using-tcp-forwarding - -#### Toolkit Runners (deployment group 1) - -The Toolkit [startup-script](../modules/scripts/startup-script/README.md) -module supports boot-time configuration of VMs using "runners". Runners are -configured as a series of scripts uploaded to Cloud Storage. A simple, standard -[VM startup script][vmstartup] runs at boot-time, downloads the scripts from -Cloud Storage and executes them in sequence. - -The script in this example performs the trivial task of creating a file as a -simple demonstration of functionality. You can use the startup-script module -to address more complex scenarios. - -[vmstartup]: https://cloud.google.com/compute/docs/instances/startup-scripts/linux - -#### Packer Template (deployment group 2) - -The Packer module uses the startup-script module from the first deployment group -and executes the script to produce a custom image. - -#### Slurm Cluster Based on Custom Image (deployment group 3) - -Once the Slurm cluster has been deployed we can test that our Slurm compute -partition is using the custom image. Each compute node should contain the -`hello.txt` file added by the startup-script. - -1. SSH into the login node `slurm-image-builder-001-login0`. -2. Run a job that prints the contents of the added file: - - ```bash - $ srun -N 2 cat /home/hello.txt - Hello World - Hello World - ``` - -#### Quota Requirements for image-builder-v5-legacy.yaml - -For this example the following is needed in the selected region: - -* Compute Engine API: Images (global, not regional quota): 1 image per invocation of `packer build` -* Compute Engine API: Persistent Disk SSD (GB): **~50 GB** -* Compute Engine API: Persistent Disk Standard (GB): **~64 GB static + 32 - GB/node** up to 704 GB -* Compute Engine API: N2 CPUs: **4** (for short-lived Packer VM and Slurm login node) -* Compute Engine API: C2 CPUs: **4** for controller node and **60/node** active - in `compute` partition up to 1,204 -* Compute Engine API: Affinity Groups: **one for each job in parallel** - _only - needed for `compute` partition_ -* Compute Engine API: Resource policies: **one for each job in parallel** - - _only needed for `compute` partition_ - ### [image-builder.yaml] ![core-badge] This blueprint uses the [Packer template module][pkr] to create a custom VM @@ -1056,39 +723,6 @@ For this example the following is needed in the selected region: [pfs-parallelstore.yaml]: ./pfs-parallelstore.yaml [Parallelstore]: ../modules/file-system/parallelstore/README.md -### [cae-slurm-v5-legacy.yaml] ![core-badge] ![deprecated-badge] - -The Computer Aided Engineering (CAE) blueprint captures a reference architecture -where the right cloud components are assembled to optimally cater to the -requirements of computationally-intensive CAE workloads. Specifically, it is -architected around Google Cloud’s VM families that provide a high memory bandwidth -and a balanced memory/flop ratio, which is particularly useful for per-core licensed -CAE software. The solution caters also to large CAE use cases, requiring multiple nodes -that are tightly-coupled via MPI. Special high-memory shapes support even very -memory-demanding workloads with up to 16GB/core. For file IO, different Google managed -high performance NFS storage services are available. For very IO demanding workloads, -third party parallel file systems can be integrated. The scheduling of the workloads -is done by a workload manager. - -The CAE blueprint is intended to be a starting point for more tailored explorations -or installations of specific CAE codes, as provided by ISVs separately. - -A detailed documentation is provided in this [README](cae/README.md). - -#### Quota Requirements for cae-slurm-v5-legacy.yaml - -For this example the following is needed in the selected region: - -* Cloud Filestore API: Basic SSD capacity (GB) per region: **5,120 GB** -* Cloud Filestore API: High Scale SSD capacity (GB) per region: **10,240 GB** -* Compute Engine API: H3 CPUs: **88/node** active in `balance` partition up to 880 -* Compute Engine API: C3-highmem CPUs: **176/node** active in `highmem` partition up to 1,760 -* Compute Engine API: N1 CPUs: **8/node** active in `desktop` partition up to 40 -* Compute Engine API: T4 GPUs: **1/node** active in `desktop` partition up to 5 -* Compute Engine API: N2 CPUs: **8** for login and **16** for controller - -[cae-slurm-v5-legacy.yaml]: ../examples/cae/cae-slurm-v5-lgacy.yaml - ### [cae-slurm.yaml] ![core-badge] The Computer Aided Engineering (CAE) blueprint captures a reference architecture @@ -1141,46 +775,6 @@ The blueprint contains 3 groups: [hpc-build-slurm-image.yaml]: ../community/examples/hpc-build-slurm-image.yaml -### [hpc-slurm-ubuntu2004-v5-legacy.yaml] ![community-badge] ![deprecated-badge] - -> **Warning**: The variables `enable_reconfigure`, -> `enable_cleanup_compute`, and `enable_cleanup_subscriptions`, if set to -> `true`, require additional dependencies **to be installed on the system deploying the infrastructure**. -> -> ```shell -> # Install Python3 and run -> pip3 install -r https://raw.githubusercontent.com/GoogleCloudPlatform/slurm-gcp/5.12.2/scripts/requirements.txt -> ``` - -Similar to the [hpc-slurm-v5-legacy.yaml] example, but using Ubuntu 20.04 instead of CentOS 7. -[Other operating systems] are supported by SchedMD for the the Slurm on GCP project and images are listed [here](https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/images.md#published-image-family). Only the examples listed in this page been tested by the Cluster Toolkit team. - -The cluster will support 2 partitions named `debug` and `compute`. -The `debug` partition is the default partition and runs on smaller -`n2-standard-2` nodes. The `compute` partition is not default and requires -specifying in the `srun` command via the `--partition` flag. The `compute` -partition runs on compute optimized nodes of type `cs-standard-60`. The -`compute` partition may require additional quota before using. - -[Other operating systems]: https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/images.md#supported-operating-systems -[hpc-slurm-ubuntu2004-v5-legacy.yaml]: ../community/examples/hpc-slurm-ubuntu2004-v5-legacy.yaml - -#### Quota Requirements for hpc-slurm-ubuntu2004-v5-legacy.yaml - -For this example the following is needed in the selected region: - -* Cloud Filestore API: Basic HDD (Standard) capacity (GB): **1,024 GB** -* Compute Engine API: Persistent Disk SSD (GB): **~50 GB** -* Compute Engine API: Persistent Disk Standard (GB): **~50 GB static + 50 - GB/node** up to 1,250 GB -* Compute Engine API: N2 CPUs: **12** -* Compute Engine API: C2 CPUs: **4** for controller node and **60/node** active - in `compute` partition up to 1,204 -* Compute Engine API: Affinity Groups: **one for each job in parallel** - _only - needed for `compute` partition_ -* Compute Engine API: Resource policies: **one for each job in parallel** - - _only needed for `compute` partition_ - ### [hpc-slurm-ubuntu2004.yaml] ![community-badge] Similar to the [hpc-slurm.yaml] example, but using Ubuntu 20.04 instead of CentOS 7. @@ -1212,18 +806,6 @@ For this example the following is needed in the selected region: * Compute Engine API: Resource policies: **one for each job in parallel** - _only needed for `compute` partition_ -### [hpc-amd-slurm-v5-legacy.yaml] ![community-badge] ![deprecated-badge] - -This example provisions a Slurm cluster using AMD VM machine types. It -automates the initial setup of Spack, including a script that can be used to -install the AMD Optimizing C/C++ Compiler ([AOCC]) and compile OpenMPI with -AOCC. It is more extensively discussed in a dedicated [README for AMD -examples][amd-examples-readme]. - -[hpc-amd-slurm-v5-legacy.yaml]: ../community/examples/AMD/hpc-amd-slurm-v5-legacy.yaml -[AOCC]: https://developer.amd.com/amd-aocc/ -[amd-examples-readme]: ../community/examples/AMD/README.md - ### [hpc-amd-slurm.yaml] ![community-badge] This example provisions a Slurm cluster using AMD VM machine types. It @@ -1380,17 +962,6 @@ the nodes are provisioned. All nodes mount a filestore instance on `/home`. [omnia-github]: https://github.com/dellhpc/omnia [omnia-cluster.yaml]: ../community/examples/omnia-cluster.yaml -### [hpc-slurm-local-ssd-v5-legacy.yaml] ![community-badge] ![experimental-badge] ![deprecated-badge] - -This blueprint demonstrates the use of Slurm and Filestore, with the definition -of a partition which deploys compute nodes that have local ssd drives deployed. -Before deploying this blueprint, one must first ensure to have an existing VPC -properly configured (allowing Internet access and allowing inter virtual -machine communications, for NFS and also for communications between the Slurm -nodes) - -[hpc-slurm-local-ssd-v5-legacy.yaml]: ../community/examples/hpc-slurm-local-ssd-v5-legacy.yaml - ### [hpc-slurm-local-ssd.yaml] ![community-badge] ![experimental-badge] This blueprint demonstrates the use of Slurm and Filestore, with compute nodes @@ -1628,18 +1199,6 @@ walks through the use of this blueprint. [htc-htcondor.yaml]: ../community/examples/htc-htcondor.yaml [hpcvmimage]: https://cloud.google.com/compute/docs/instances/create-hpc-vm -### [htc-slurm-v5-legacy.yaml] ![community-badge] ![experimental-badge] ![deprecated-badge] - -This blueprint provisions a cluster using the Slurm scheduler in a configuration -tuned for the execution of many short-duration, loosely-coupled (non-MPI) jobs. - -For more information see: - -* [Slurm on Google Cloud High Throughput documentation](https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/htc.md) -* [General Slurm High Throughput documentation](https://slurm.schedmd.com/high_throughput.html) - -[htc-slurm-v5-legacy.yaml]: ../community/examples/htc-slurm-v5-legacy.yaml - ### [htc-slurm.yaml] ![community-badge] This blueprint provisions a cluster using the Slurm scheduler in a configuration @@ -1693,20 +1252,6 @@ tutorial. [tutorial-fluent.yaml]: ../community/examples/tutorial-fluent.yaml -### [hpc-slurm-chromedesktop-v5-legacy.yaml] ![community-badge] ![experimental-badge] ![deprecated-badge] - -This example shows how to use the `chrome-remote-desktop` module with a Slurm -partition to be able to `salloc` a GPU accelerated remote desktop. - -After deploying the blueprint perform the following actions: -1. SSH to the Slurm login node or controller. -1. Provision a remote desktop with the following command: `salloc -p desktop -N - 1` -1. Once you see `salloc: Nodes slurmchrom-desktop-ghpc-0 are ready for job`, - follow the [instructions to set up the remote desktop][crd-instructions]. - -[crd-instructions]: ../community/modules/remote-desktop/chrome-remote-desktop/README.md#setting-up-the-remote-desktop -[hpc-slurm-chromedesktop-v5-legacy.yaml]: ../community/examples/hpc-slurm-chromedesktop-v5-legacy.yaml ### [flux-cluster.yaml] ![community-badge] ![experimental-badge] The [flux-cluster.yaml] blueprint describes a flux-framework cluster where flux diff --git a/examples/cae/README.md b/examples/cae/README.md index 6107bf831e..8cc2274e52 100644 --- a/examples/cae/README.md +++ b/examples/cae/README.md @@ -1,7 +1,3 @@ -> **_NOTE:_** This document uses Slurm-GCP v5 version of CAE blueprint. You can -> also use Slurm-GCP v6 version of the CAE blueprint in this folder. it would -> require to append "-v6" suffix at the end of blueprint name/ deployment folder. - # Computer Aided Engineering (CAE) Reference Architecture The Computer Aided Engineering (CAE) [blueprint](./cae-slurm.yaml) in diff --git a/examples/cae/cae-slurm-v5-legacy.yaml b/examples/cae/cae-slurm-v5-legacy.yaml deleted file mode 100644 index 01dddbecdb..0000000000 --- a/examples/cae/cae-slurm-v5-legacy.yaml +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- - -# -# **************** -####### CAE Solution Blueprint ####### -# **************** -# -# This blueprint features a reference design suited for CAE applications on GCP. -# It sets up the following infrastructure: -# * Google's H3 VMs, ideally suited for CAE workloads -# * Google's C3-highmem VM, suited for workloads with 16GB/core requirement -# * Google's Filestore NFS-based shared storage -# * Google's Chrome Remote Desktop -# * SLURM workload scheduler -# -blueprint_name: cae-slurm -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: cae-slurm - # check here for other regions with H3 deployments: https://cloud.google.com/compute/docs/regions-zones - # For example - # region: europe-west4 - # zone: europe-west4-b - region: us-central1 - zone: us-central1-a - # Visit https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/images.md#published-image-family - # for a list of valid family options with Slurm; note: the image types for the compute nodes - # and the Chrome Remote Desktop (CRD) need to have the same Slurm base. - instance_image: - family: slurm-gcp-5-12-hpc-centos-7 - project: schedmd-slurm-public - crd_instance_image: - family: slurm-gcp-5-12-debian-11 # must be Debian for CRD - project: schedmd-slurm-public - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md -deployment_groups: - -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# -# -# Deployment Group: Setup -# -# Sets up VPC network, persistent NFS shares, dashboard -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -- group: setup - modules: - - ####### Virtual Private Cloud Setup ####### - # This creates a virtual private network for your cloud setup - - id: network1 - source: modules/network/vpc - settings: - network_name: cae-slurm-net - subnetwork_name: primary-subnet - - ####### User Home Storage ####### - # This block creates an NFS file share for /home - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - filestore_tier: BASIC_SSD - size_gb: 2560 - filestore_share_name: homeshare - local_mount: /home - - ####### Shared Software Storage ####### - # This block creates NFS file share for shared software installations - - id: appsfs - source: modules/file-system/filestore - use: [network1] - settings: - filestore_tier: BASIC_SSD - size_gb: 2560 - filestore_share_name: appsshare - local_mount: /apps - - ####### Dashboard ####### - # This module activates integration with a dashboard on the Google Cloud Console - - id: hpc_dash - source: modules/monitoring/dashboard - outputs: [instructions] - -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# -# -# Deployment Group: Software Installation -# -# This deployment group is a stub for installing software before -# bringing up the actual cluster. -# See the README.md for useful software deployment patterns. -# -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# - group: software_installation -# modules: - -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# -# -# Deployment Group: Cluster -# -# Provisions the actual CAE cluster with compute partitions, -# remote desktop partition and connects to the previously set up -# NFS shares. -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -- group: cluster - modules: - - ####### Scratch ####### - # This block creates an NFS file share for scratch. If you experience an IO bottleneck, - # consider to use the more performant version HIGH_SCALE_SSD with the following settings: - - id: scratchfs - source: modules/file-system/filestore - use: [network1] - settings: - filestore_tier: HIGH_SCALE_SSD - size_gb: 10240 # smallest size for HIGH_SSD_SCALE - filestore_share_name: scratchshare - local_mount: /scratch - - # If you require maximum IO performance, you can consider to bring up a dedicated parallel - # file system, e.g. DDN Exascaler Lustre, Sycomp GPFS, or Parallelstore. - # Note: Those solutions may have associated license cost. - # - # Please visit here for more information - # - DDN Exascaler Lustre: https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/community/modules/file-system/DDN-EXAScaler/README.md - # - Sycomp IBM Spectrum Scale: https://console.developers.google.com/marketplace/product/sycomp/sycomp-storage-fueled-by-ibm-spectrum-scale - # - Parallelstore: https://github.com/GoogleCloudPlatform/cluster-toolkit/blob/main/modules/file-system/parallelstore/README.md - - ######## Remote Desktop(s) ####### - # This block enables a partition for nodes that support Chrome Remote Desktop - # see here for use: https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/examples#hpc-slurm-chromedesktopyaml-- - - id: remotedesktop - source: community/modules/remote-desktop/chrome-remote-desktop - use: [network1] - settings: - install_nvidia_driver: true - # instance_count: 0 will create installation scripts only - # which can be used with slurm node provisioning - instance_count: 0 - - - id: remotedesktop_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - machine_type: n1-standard-8 - node_count_dynamic_max: 5 - instance_image: $(vars.crd_instance_image) - guest_accelerator: - - type: nvidia-tesla-t4-vws - count: 1 - - - id: remotedesktop_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - appsfs - - scratchfs - - remotedesktop - - remotedesktop_node_group - settings: - partition_name: desktop - enable_placement: false - partition_startup_scripts_timeout: 900 - - ####### Balanced partition ####### - # this block creates a partition uses GCP H3-standard VM for regular jobs with 4GB/core - - id: h3_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 10 - machine_type: h3-standard-88 - disk_type: 'pd-balanced' - bandwidth_tier: gvnic_enabled - - - id: h3_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - appsfs - - scratchfs - - h3_node_group - settings: - partition_name: balance - is_default: true - - ####### High-Mem partition ####### - # this block creates partition uses GCP C3-highmem VM for jobs with 16GB/core requirement - - id: c3_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 10 - machine_type: c3-highmem-176 - disk_type: 'pd-balanced' - bandwidth_tier: tier_1_enabled - - - id: c3_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - appsfs - - scratchfs - - c3_node_group - settings: - partition_name: highmem - - ####### Scheduler: SLURM ####### - # This block creates a SLURM controller - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - homefs - - appsfs - - scratchfs - - h3_partition - - c3_partition - - remotedesktop_partition - settings: - machine_type: n2-standard-16 - compute_startup_scripts_timeout: 900 - cloud_parameters: - resume_rate: 0 - resume_timeout: 900 - suspend_rate: 0 - suspend_timeout: 300 - no_comma_params: false - - ####### Scheduler: SLURM ####### - # This block creates a SLURM login node - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - machine_type: n2-standard-8 diff --git a/examples/hpc-enterprise-slurm-v5-legacy.yaml b/examples/hpc-enterprise-slurm-v5-legacy.yaml deleted file mode 100644 index 99e831ca60..0000000000 --- a/examples/hpc-enterprise-slurm-v5-legacy.yaml +++ /dev/null @@ -1,326 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: hpc-enterprise-slurm - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: hpc01 - region: us-central1 - zone: us-central1-a - gpu_zones: [us-central1-a, us-central1-b, us-central1-c, us-central1-f] - slurm_image: - # Visit https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/images.md#published-image-family - # for a list of valid family options with Slurm - family: slurm-gcp-5-12-hpc-centos-7 - project: schedmd-slurm-public - # If image above is changed to use custom image, then setting below must be set to true - instance_image_custom: false - # Set to true for active cluster reconfiguration. - # Note that setting this option requires additional dependencies to be installed locally. - # https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/community/modules/scheduler/schedmd-slurm-gcp-v5-controller#description - enable_reconfigure: true - # When set, active compute nodes will be cleaned up on destroy. - # Note that setting this option requires additional dependencies to be installed locally. - enable_cleanup_compute: true - metadata: # Workaround for https://github.com/GoogleCloudPlatform/cluster-toolkit/discussions/3243 - VmDnsSetting: GlobalOnly - -# Recommended to use GCS backend for Terraform state -# See https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/examples#optional-setting-up-a-remote-terraform-state -# -# terraform_backend_defaults: -# type: gcs -# configuration: -# bucket: <> - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md - -deployment_groups: -- group: primary - modules: - # Source is an embedded module, denoted by "modules/*" without ./, ../, / - # as a prefix. To refer to a local or community module, prefix with ./, ../ or / - - id: network1 - source: modules/network/pre-existing-vpc - - - id: controller_sa - source: community/modules/project/service-account - settings: - name: controller - project_roles: - - compute.instanceAdmin.v1 - - iam.serviceAccountUser - - logging.logWriter - - monitoring.metricWriter - - pubsub.admin - - storage.objectViewer - - - id: login_sa - source: community/modules/project/service-account - settings: - name: login - project_roles: - - logging.logWriter - - monitoring.metricWriter - - storage.objectViewer - - - id: compute_sa - source: community/modules/project/service-account - settings: - name: compute - project_roles: - - logging.logWriter - - monitoring.metricWriter - - storage.objectCreator - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /home - - - id: projectsfs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /projects - - # This file system has an associated license cost. - # https://console.developers.google.com/marketplace/product/ddnstorage/exascaler-cloud - - id: scratchfs - source: community/modules/file-system/DDN-EXAScaler - use: [network1] - settings: - local_mount: /scratch - - - id: n2_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 4 - machine_type: n2-standard-2 - instance_image: $(vars.slurm_image) - service_account: - email: $(compute_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - - - id: n2_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: [n2_node_group, network1, homefs, projectsfs, scratchfs] - settings: - partition_name: n2 - exclusive: false # allows nodes to stay up after jobs are done - enable_placement: false # the default is: true - is_default: true - partition_conf: - SuspendTime: 300 # time (in secs) the nodes in this partition stay active after their tasks have completed - - - id: c2_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - machine_type: c2-standard-60 # this is the default - instance_image: $(vars.slurm_image) - bandwidth_tier: tier_1_enabled - disk_type: pd-ssd - disk_size_gb: 100 - service_account: - email: $(compute_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - - # use `-p c2` to submit jobs to this partition: - # ex: `srun -p c2 -N 1 hostname` - - id: c2_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: [c2_node_group, network1, homefs, projectsfs, scratchfs] - settings: - partition_name: c2 - # the following two are true by default - exclusive: true # this must be true if enable_placement is true - enable_placement: true - - - id: c2d_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - machine_type: c2d-standard-112 - instance_image: $(vars.slurm_image) - bandwidth_tier: tier_1_enabled - disk_type: pd-ssd - disk_size_gb: 100 - service_account: - email: $(compute_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - - - id: c2d_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: [c2d_node_group, network1, homefs, projectsfs, scratchfs] - settings: - partition_name: c2d - - - id: c3_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - machine_type: c3-highcpu-176 - instance_image: $(vars.slurm_image) - bandwidth_tier: tier_1_enabled - disk_type: pd-ssd - disk_size_gb: 100 - service_account: - email: $(compute_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - - - id: c3_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: [c3_node_group, network1, homefs, projectsfs, scratchfs] - settings: - partition_name: c3 - - - id: a2_8_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 16 - machine_type: a2-ultragpu-8g - bandwidth_tier: gvnic_enabled - instance_image: $(vars.slurm_image) - disk_type: pd-ssd - disk_size_gb: 100 - node_conf: - Sockets: 2 - CoresPerSocket: 24 - service_account: - email: $(compute_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - - # use `-p a208` to submit jobs to this partition: - # ex: `srun -p a208 --gpus-per-node=8 -N 1 nvidia-smi` - - id: a2_8_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: [a2_8_node_group, network1, homefs, projectsfs, scratchfs] - settings: - partition_name: a208 - # This makes this partition look for machines in any of the following zones - # https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/develop/community/modules/compute/schedmd-slurm-gcp-v5-partition#compute-vm-zone-policies - zones: $(vars.gpu_zones) - # The following allows users to use more host memory without specifying cpus on a job - partition_conf: - DefMemPerGPU: 160000 - DefMemPerCPU: null - - - id: a2_16_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 16 - machine_type: a2-megagpu-16g - bandwidth_tier: gvnic_enabled - instance_image: $(vars.slurm_image) - disk_type: pd-ssd - disk_size_gb: 100 - node_conf: - Sockets: 2 - CoresPerSocket: 24 - service_account: - email: $(compute_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - - # use `-p a216` to submit jobs to this partition: - # ex: `srun -p a216 --gpus-per-node=16 -N 1 nvidia-smi` - - id: a2_16_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: [a2_16_node_group, network1, homefs, projectsfs, scratchfs] - settings: - partition_name: a216 - # This makes this partition look for machines in any of the following zones - # https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/develop/community/modules/compute/schedmd-slurm-gcp-v5-partition#compute-vm-zone-policies - zones: $(vars.gpu_zones) - # The following allows users to use more host memory without specifying cpus on a job - partition_conf: - DefMemPerGPU: 160000 - DefMemPerCPU: null - - - id: h3_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 16 - machine_type: h3-standard-88 - bandwidth_tier: gvnic_enabled # https://cloud.google.com/compute/docs/compute-optimized-machines#h3_network - instance_image: $(vars.slurm_image) - service_account: - email: $(compute_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - # H3 does not support pd-ssd and pd-standard - # https://cloud.google.com/compute/docs/compute-optimized-machines#h3_disks - disk_type: pd-balanced - disk_size_gb: 100 - - # use `-p h3` to submit jobs to this partition: - # ex: `srun -p h3 -N 1 hostname` - - id: h3_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: [h3_node_group, network1, homefs, projectsfs, scratchfs] - settings: - partition_name: h3 - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: [network1, homefs, projectsfs, scratchfs, n2_partition, - c2_partition, c2d_partition, c3_partition, a2_8_partition, a2_16_partition, - h3_partition] - settings: - instance_image: $(vars.slurm_image) - # the following allow for longer boot time - # which is useful for large GPU nodes - cloud_parameters: - no_comma_params: false - resume_rate: 0 - resume_timeout: 600 - suspend_rate: 0 - suspend_timeout: 600 - # we recommend disabling public IPs if possible - # but that requires your network to have a NAT or - # private access configured - disable_controller_public_ips: false - service_account: - email: $(controller_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - instance_image: $(vars.slurm_image) - machine_type: n2-standard-4 - disable_login_public_ips: false - service_account: - email: $(login_sa.service_account_email) - scopes: - - https://www.googleapis.com/auth/cloud-platform - - - id: hpc_dashboard - source: modules/monitoring/dashboard - outputs: [instructions] diff --git a/examples/hpc-slurm-v5-legacy.yaml b/examples/hpc-slurm-v5-legacy.yaml deleted file mode 100644 index 234277208d..0000000000 --- a/examples/hpc-slurm-v5-legacy.yaml +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: hpc-slurm - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: hpc-small - region: us-central1 - zone: us-central1-a - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md - -deployment_groups: -- group: primary - modules: - # Source is an embedded module, denoted by "modules/*" without ./, ../, / - # as a prefix. To refer to a local module, prefix with ./, ../ or / - - id: network1 - source: modules/network/vpc - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /home - - - id: debug_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 4 - machine_type: n2-standard-2 - - - id: debug_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - debug_node_group - settings: - partition_name: debug - exclusive: false # allows nodes to stay up after jobs are done - enable_placement: false # the default is: true - is_default: true - - - id: compute_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - bandwidth_tier: gvnic_enabled - - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - compute_node_group - settings: - partition_name: compute - - - id: h3_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - machine_type: h3-standard-88 - # H3 does not support pd-ssd and pd-standard - # https://cloud.google.com/compute/docs/compute-optimized-machines#h3_disks - disk_type: pd-balanced - bandwidth_tier: gvnic_enabled - - - id: h3_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - h3_node_group - settings: - partition_name: h3 - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - debug_partition - - compute_partition - - h3_partition - - homefs - settings: - disable_controller_public_ips: false - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - machine_type: n2-standard-4 - disable_login_public_ips: false diff --git a/examples/image-builder-v5-legacy.yaml b/examples/image-builder-v5-legacy.yaml deleted file mode 100644 index c48627f85d..0000000000 --- a/examples/image-builder-v5-legacy.yaml +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -# Deploying the various groups of this blueprint requires passing the output of -# the primary group to the packer group. Instructions for how to do that are -# available at the following link: -# https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/examples#image-builderyaml- - -blueprint_name: image-builder - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: image-builder-001 - region: us-central1 - zone: us-central1-c - custom_image: - family: my-slurm-image - project: $(vars.project_id) - disk_size: 32 - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md - -deployment_groups: -- group: primary - modules: - - id: network1 - source: modules/network/vpc - - - id: scripts_for_image - source: modules/scripts/startup-script - settings: - runners: - - type: shell - destination: generate_hello.sh - content: | - #!/bin/sh - echo "Hello World" > /home/hello.txt - -- group: packer - modules: - - id: custom-image - source: modules/packer/custom-image - kind: packer - use: - - network1 - - scripts_for_image - settings: - source_image_project_id: [schedmd-slurm-public] - # see latest in https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/images.md#published-image-family - source_image_family: slurm-gcp-5-12-hpc-centos-7 - # You can find size of source image by using following command - # gcloud compute images describe-from-family --project schedmd-slurm-public - disk_size: $(vars.disk_size) - image_family: $(vars.custom_image.family) - state_timeout: 15m - -- group: cluster - modules: - - id: compute_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - disk_size_gb: $(vars.disk_size) - instance_image: $(vars.custom_image) - instance_image_custom: true - bandwidth_tier: gvnic_enabled - - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - compute_node_group - settings: - partition_name: compute - is_default: true - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - compute_partition - settings: - disable_controller_public_ips: false - disk_size_gb: $(vars.disk_size) - instance_image: $(vars.custom_image) - instance_image_custom: true - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - disable_login_public_ips: false - disk_size_gb: $(vars.disk_size) - instance_image: $(vars.custom_image) - instance_image_custom: true diff --git a/examples/machine-learning/a3-highgpu-8g/v5-legacy/README.md b/examples/machine-learning/a3-highgpu-8g/v5-legacy/README.md deleted file mode 100644 index 96087ef64c..0000000000 --- a/examples/machine-learning/a3-highgpu-8g/v5-legacy/README.md +++ /dev/null @@ -1,342 +0,0 @@ -# Objective - -> [!CAUTION] -> This solution is built upon "legacy" blueprints using Slurm-GCP v5. The -> [solution using v6](../README.md) is recommended for all new deployments. -> The legacy solution is presented for customers with existing deployments. We -> recommend maintaining existing deployments with the version of the Toolkit -> originally used to deploy. - -This document will guide you to successfully provisioning a Slurm cluster with -a3-highgpu-8g compute nodes running NVIDIA H100 GPUs. - -## Before starting - -> [!IMPORTANT] -> Before beginning, submit a request to your Google Cloud representative for -> access to the Deep Learning VM Image for a3-highgpu-8g. It is currently -> available only by Private Preview request. This image contains patches that -> significantly enhance the network performance of workloads that span multiple -> a3-highgpu-8g VMs. You will use the image ID in the steps shown below. - -## Required setup - -Please follow the initial instructions for: - -- Installing Cluster Toolkit [dependencies][tkdeps] (Go, Terraform, Packer) -- Installing the Cluster [Toolkit][tkinstall] - -Verify that your release of the Cluster Toolkit is greater than 1.31.1 and less -than or equal to 1.37.0. - -```shell -gcluster --version -``` - -The solution requires several Python packages to be available. We recommend -installing them in a Python virtual environment: - -```shell -python3 -m venv toolkit-a3 -source toolkit-a3/bin/activate -pip3 install -r \ - https://raw.githubusercontent.com/GoogleCloudPlatform/slurm-gcp/5.12.2/scripts/requirements.txt -``` - -**Always** activate the environment before running any gcluster commands such as -deploy or destroy. - -```shell -source /absolute/path/to/toolkit-a3/bin/activate -``` - -## Top-Level Design of Solution - -The solution is split into 3 Cluster Toolkit blueprints: - -1. Provision 1 system network and 1 Filestore instance for mounting `/home` -across the cluster. -2. Build a custom image installing Slurm in an Ubuntu 20.04 image. The image -runs a kernel patched with performance enhancements for the a3-highgpu-8g VM. -3. Provision 4 GPU networks and a Slurm cluster using the custom image. - -The 1st and 2nd blueprints should be provisioned once and rarely need further -modification. This approach separates the lifecycle of a Filestore instance from -the lifecycle of the cluster, allowing the cluster to be deleted while retaining -access to data and home directories. The 3rd cluster blueprint may be more -frequently updated and re-provisioned as discussed below. - -## First time considerations - -> [!IMPORTANT] -> These steps do not need to be repeated when a cluster is re-provisioned. They -> are initial setup steps in a project. - -Replace the values for `PROJECT_ID`, `REGION`, and `ZONE` with the project, -region, and zone in which you have an a3-highgpu-8g allocation. The value for -`BUCKET` must be unique and will be used to create a new bucket. After replacing -the values, execute them so that they automatically populate parameters in the -commands shown below. Note that each a3-highgpu-8g VM (`N_VMS`) contains 8 NVIDIA -H100 GPUs. - -```shell -export PROJECT_ID=customer-project-id -export BUCKET=customer-bucket -export REGION=customer-region -export ZONE=customer-zone -export N_VMS=32 -``` - -### Saving Terraform state -Create a bucket with versioning enabled to store Terraform state: - -```shell -gcloud storage buckets create gs://${BUCKET} --project=${PROJECT_ID} \ - --default-storage-class=STANDARD --location=${REGION} \ - --uniform-bucket-level-access -gcloud storage buckets update gs://${BUCKET} --versioning -``` - -Modify all 3 blueprints to configure the new bucket to serve as a Terraform -remote backend: - -```yaml -terraform_backend_defaults: - type: gcs - configuration: - bucket: customer-bucket # modify to bucket created above -``` - -### Set default values - -Modify the the deployment variables `project_id`, `region`, `zone`, in the -`vars` block of all 3 blueprints: - -```yaml - project_id: customer-project - region: customer-region - zone: customer-zone -``` - -### Set kernel-patched OS image - -Obtain values for `source_image_project_id` and `source_image` from your Google -Cloud representative. Set them at approximately lines 33 and 34 of -`ml-slurm-a3-1-image.yaml`. - -```yaml - source_image_project_id: source-image-project-id # use value supplied by Google Cloud staff - source_image: source-image-name # use value supplied by Google Cloud staff -``` - -### Reservation created by Google - -> [!IMPORTANT] -> If you have ***not*** received a VM reservation from Google Cloud staff, then -> skip this step and proceed to [manual reservation creation](#manual-creation-of-reservation). - -Set the deployment variable `a3_reservation_name` at approximately line 38 of -`ml-slurm-a3-2-cluster.yaml` to the reservation name provided by Google. The -value for `a3_maintenance_interval` should also be set as directed by Google -staff. A common setting is `PERIODIC`, shown below, but this value must be -confirmed with Google staff. - -```yaml - # a3_reservation_name must be specified; if Google staff have provided you - # with a reservation name, use it. Otherwise supply user-created reservation. - a3_reservation_name: reservation-name-provided-by-google - # a3_maintenance_interval should be empty string by default; if Google staff - # have created a reservation, they will also provide a3_maintenance_interval - a3_maintenance_interval: PERIODIC -``` - -### Manual creation of reservation - -> [!IMPORTANT] -> If you received a VM reservation from Google Cloud staff, then skip this step -> after confirming that you followed the instructions in [reservation created by -> Google](#reservation-created-by-google). - -We recommend creating a reservation to ensure reliable access to re-create VMs -if you need to redeploy or otherwise maintain your cluster. - -```shell -gcloud compute reservations create a3-reservation-0 \ - --project=${PROJECT_ID} \ - --machine-type=a3-highgpu-8g \ - --vm-count=${N_VMS} \ - --zone=${ZONE} \ - --require-specific-reservation \ - --log-http -``` - -This reservation be must be specified when creating VMs with matching parameters -(e.g. a3-highgpu-8g VM in configured zone). If you executed the command above -without modification, you may leave `a3_reservation_name` and -`a3_maintenance_interval` at their default values in -`ml-slurm-a3-2-cluster.yaml`. Otherwise, ensure that the reservation name in the -blueprint matches the name of the user-created reservation. - -```yaml - # a3_reservation_name must be specified; if Google staff have provided you - # with a reservation name, use it. Otherwise supply user-created reservation. - a3_reservation_name: a3-reservation-0 - # a3_maintenance_interval should be empty string by default; if Google staff - # have created a reservation, they will also provide a3_maintenance_interval - a3_maintenance_interval: "" -``` - -### Set cluster size - -At approximately line 37 of `ml-slurm-a3-2-cluster.yaml`, set the static cluster -size. Recall that there are 8 NVIDIA H100 GPUs per a3-highgpu-8g VM. - -```yaml - a3_static_cluster_size: 32 -``` - -## Cluster creation - -The blueprint `ml-slurm-a3-0-base.yaml` will create 1 system network and a -Filestore `/home` filesystem. Run the standard Toolkit workflow at the command -line (approx. 5 minutes): - -```shell -gcluster deploy ml-slurm-a3-0-base.yaml --auto-approve -``` - -Several values will be output to the screen. The output will be similar to: - -```hcl -network_name_sysnet = "sys-net" -network_storage_homefs = { - "client_install_runner" = { - "destination" = "install-nfs_home.sh" - "source" = "modules/embedded/modules/file-system/filestore/scripts/install-nfs-client.sh" - "type" = "shell" - } - "fs_type" = "nfs" - "local_mount" = "/home" - "mount_options" = "defaults,_netdev" - "mount_runner" = { - "args" = "\"10.224.153.226\" \"/nfsshare\" \"/home\" \"nfs\" \"defaults,_netdev\"" - "destination" = "mount_home.sh" - "source" = "modules/embedded/modules/file-system/filestore/scripts/mount.sh" - "type" = "shell" - } - "remote_mount" = "/nfsshare" - "server_ip" = "10.224.153.226" -} -subnetwork_name_sysnet = "sys-subnet" -``` - -Build the custom image using ml-slurm-a3-1-image.yaml and the same workflow -as above. Run at the command line: - -```shell -gcluster deploy ml-slurm-a3-1-image.yaml --auto-approve -``` - -The image will take approximately 30 minutes to build. - -> [!IMPORTANT] -> You must modify `ml-slurm-a3-2-cluster.yaml` to update the IP address of the -> Filestore instance for `/home`. Your IP address will differ from that shown -> below and must match the output from deploying the base blueprint above: -> -> ```yaml -> server_ip_homefs: 10.224.153.226 -> ``` - -Provision the cluster blueprint (approximately 5-10 minutes): - -```shell -gcluster deploy ml-slurm-a3-2-cluster.yaml --auto-approve -``` - -## Receive Data Path Manager (RxDM) - -To achieve optimal application performance, an additional service called the -"Receive Data Path Manager" (RxDM) must run with the same lifetime as the job. -Additionally, a NCCL plugin must be installed into the execution environment of -the workload. Both the RxDM and plugin are distributed by Docker container -images. - -This blueprint includes a Slurm "Prolog" and "Epilog" script that will run -before and after every job running on more than 1 a3-highgpu-8g compute node. -The Prolog will perform the following actions: - -- Install the NCCL plugin into /var/lib of the host -- Run the RxDM service - - This is a long-lived service that runs alongside the job - - Mounts `/var/lib/nvidia/lib64` into `/usr/lib/nvidia/lib64` of the container - - Mount `/opt/tcpdirect_benchmark/` from the host into the container so that a - textproto file defining the mapping from GPU to NIC is available. This file - is present in the Deep Learning VM (DLVM) images that contain TCPDirect - patches. - - Mount `/run/tcpx-${SLURM_JOB_ID}` from the container into the host. This is - set to the environment variables `${UDS_PATH}` in the script. This directory - contains Unix socket files that implement a TCPx interface available to the - user workload at `${UDS_PATH}`. The job must be configured to be aware of this - path using `NCCL_GPUDIRECTTCPX_UNIX_CLIENT_PREFIX` environment variable! - -The Epilog will - -- Stop the RxDM service -- Prune any stopped containers (freeing up disk space) -- Remove the directory at `${UDS_PATH}` - -## Jobs using the RxDM / TCPx - -Jobs that are running across multiple a3-highgpu-8g VMs will benefit from using -the RxDM and the NCCL plugin. An example containerized job is located at -`/opt/apps/scripts/run-nccl-tests.sh`. In addition to setting standard NCCL -configuration values, a job must: - -- Set `NCCL_GPUDIRECTTCPX_UNIX_CLIENT_PREFIX` to `${UDS_PATH}` -- Set the `LD_LIBRARY_PATH` to include `/var/lib/tcpx/lib64` and `/usr/local/nvidia/lib64` - -If job is containerized - -- Mount `${UDS_PATH}` into the container at the same path -- Mount `/var/lib/tcpx/lib64` to `/var/lib/tcpx/lib64` in the container (to make the - NCCL plugin available) -- Paths can be modified if `LD_LIBRARY_PATH` is likewise modified - -## Example workload (NCCL benchmark) - -The example workload below demonstrates the pattern recommended in Activating -the Receive Data Path Manager during jobs while running the standard nccl-tests -benchmark. It assumes the availability of a GPU/NIC topology file at -`/opt/tcpdirect_benchmark/gpu_rxq_configuration.textproto`. This file is built -into the DLVM images used by this solution, but may need to be provided if -using an alternative image. - -### Clone the Cluster Toolkit repository containing the NCCL benchmark - -```shell -git clone https://github.com/GoogleCloudPlatform/cluster-toolkit -cd cluster-toolkit/examples/machine-learning/a3-highgpu-8g/nccl-tests -``` - -### Import the PyTorch image from the NVIDIA Container Registry - -```shell -bash import_pytorch_container.sh -``` - -### Build NCCL - -```shell -sbatch build-nccl-tests.sh -``` - -### Run NCCL tests - -```shell -sbatch run-nccl-tests.sh -``` - -[consume]: https://cloud.google.com/compute/docs/instances/reservations-consume#consuming_instances_from_any_matching_reservation -[tkdeps]: https://cloud.google.com/cluster-toolkit/docs/setup/install-dependencies -[tkinstall]: https://github.com/GoogleCloudPlatform/cluster-toolkit/#quickstart diff --git a/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-0-base-v5-legacy.yaml b/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-0-base-v5-legacy.yaml deleted file mode 100644 index 79c06980d3..0000000000 --- a/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-0-base-v5-legacy.yaml +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -blueprint_name: slurm-a3-base - -terraform_backend_defaults: - type: gcs - configuration: - bucket: customer-tf-state-bucket - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-a3-base - region: customer-region - zone: customer-zone - sys_net_range: 172.16.0.0/16 - filestore_ip_range: 192.168.0.0/29 - -deployment_groups: -- group: primary - modules: - - id: sysnet - source: modules/network/vpc - settings: - network_name: $(vars.deployment_name)-sysnet - network_address_range: $(vars.sys_net_range) - mtu: 8244 - # using explicit var.subnetworks to allow for easier addition - # of multiple system subnetworks in other regions - subnetworks: - - subnet_name: $(vars.deployment_name)-sysnet-subnet - subnet_region: $(vars.region) - new_bits: 4 - subnet_private_access: true - description: primary subnetwork in $(vars.deployment_name)-sysnet - outputs: - - network_name - - subnetwork_name - - - id: homefs - source: modules/file-system/filestore - use: - - sysnet - settings: - filestore_tier: BASIC_SSD - size_gb: 2560 - local_mount: /home - reserved_ip_range: $(vars.filestore_ip_range) - outputs: - - network_storage diff --git a/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-1-image-v5-legacy.yaml b/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-1-image-v5-legacy.yaml deleted file mode 100644 index 08060286b6..0000000000 --- a/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-1-image-v5-legacy.yaml +++ /dev/null @@ -1,283 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -blueprint_name: slurm-a3-image - -terraform_backend_defaults: - type: gcs - configuration: - bucket: customer-tf-state-bucket - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-a3-image - region: customer-region - zone: customer-zone - disk_size: 200 - final_image_family: slurm-dlvm - network_name_system: slurm-a3-base-sysnet - subnetwork_name_system: slurm-a3-base-sysnet-subnet - slurm_cluster_name: slurm0 - source_image_project_id: source-image-project-id # use value supplied by Google Cloud staff - source_image: source-image-name # use value supplied by Google Cloud staff - -deployment_groups: -- group: build_script - modules: - - id: sysnet - source: modules/network/pre-existing-vpc - settings: - network_name: $(vars.network_name_system) - subnetwork_name: $(vars.subnetwork_name_system) - - - id: image_build_script - source: modules/scripts/startup-script - settings: - install_ansible: true - docker: - enabled: true - world_writable: true - configure_ssh_host_patterns: - - 10.0.0.* - - 10.1.0.* - - 10.2.0.* - - 10.3.0.* - - $(vars.slurm_cluster_name)* - runners: - - type: shell - destination: workaround_apt_change.sh - content: | - #!/bin/bash - set -e -o pipefail - rm -f /etc/apt/sources.list.d/kubernetes.list - apt-get update --allow-releaseinfo-change - - type: shell - destination: disable_dlvm_builtin_services.sh - content: | - #!/bin/bash - # many extra services are being started via /etc/rc.local; disable - # them on future boots of image - echo -e '#!/bin/bash\n/usr/bin/nvidia-persistenced --user root\nexit 0' > /etc/rc.local - # disable jupyter and notebooks-collection-agent services - systemctl stop jupyter.service notebooks-collection-agent.service - systemctl disable jupyter.service notebooks-collection-agent.service - - type: data - destination: /var/tmp/slurm_vars.json - content: | - { - "reboot": false, - "slurm_version": "23.02.7", - "install_cuda": false, - "nvidia_version": "latest", - "install_ompi": true, - "install_lustre": false, - "install_gcsfuse": true, - "monitoring_agent": "cloud-ops" - } - - type: shell - destination: install_slurm.sh - content: | - #!/bin/bash - set -e -o pipefail - ansible-galaxy role install googlecloudplatform.google_cloud_ops_agents - ansible-pull \ - -U https://github.com/GoogleCloudPlatform/slurm-gcp -C 5.12.2 \ - -i localhost, --limit localhost --connection=local \ - -e @/var/tmp/slurm_vars.json \ - ansible/playbook.yml - # this duplicates the ulimits configuration of the HPC VM Image - - type: data - destination: /etc/security/limits.d/99-unlimited.conf - content: | - * - memlock unlimited - * - nproc unlimited - * - stack unlimited - * - nofile 1048576 - * - cpu unlimited - * - rtprio unlimited - - type: data - destination: /etc/systemd/system/slurmd.service.d/file_ulimit.conf - content: | - [Service] - LimitNOFILE=infinity - - type: data - destination: /etc/systemd/system/delay-a3.service - content: | - [Unit] - Description=Delay A3 boot until all network interfaces are routable - After=network-online.target - Wants=network-online.target - Before=google-startup-scripts.service - - [Service] - ExecCondition=/bin/bash -c '/usr/bin/curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/machine-type | grep -q "/a3-highgpu-8g$"' - ExecStart=/usr/lib/systemd/systemd-networkd-wait-online -i enp6s0 -i enp12s0 -i enp134s0 -i enp140s0 -o routable --timeout=120 - ExecStartPost=/bin/sleep 60 - - [Install] - WantedBy=multi-user.target - - type: shell - destination: install_enroot_pyxis.sh - content: | - #!/bin/bash - set -e -o pipefail - ### Setting up Enroot - if ! dpkg -l enroot &>/dev/null; then - arch=\$(dpkg --print-architecture) - curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.4.1/enroot_3.4.1-1_${arch}.deb - curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.4.1/enroot+caps_3.4.1-1_${arch}.deb # optional - apt-get update - apt-get install --assume-yes ./*.deb - rm enroot*.deb - fi - # configure enroot - # use single quotes around EOT to avoid shell interpolation - cat <<'EOT' > /etc/enroot/enroot.conf - ENROOT_RUNTIME_PATH /mnt/localssd/${UID}/enroot/runtime - ENROOT_CACHE_PATH /mnt/localssd/${UID}/enroot/cache - ENROOT_DATA_PATH /mnt/localssd/${UID}/enroot/data - ENROOT_TEMP_PATH /mnt/localssd/${UID}/enroot - EOT - ### Install Pyxis - if [ ! -f "/usr/local/lib/slurm/spank_pyxis.so" ]; then - git clone --depth 1 https://github.com/NVIDIA/pyxis.git - cd pyxis && make install && cd - - rm -rf pyxis - echo "required /usr/local/lib/slurm/spank_pyxis.so" > /etc/slurm/plugstack.conf - fi - - type: shell - destination: install_mdadm.sh - content: | - #!/bin/bash - apt-get update - apt-get install mdadm --no-install-recommends --assume-yes - - type: data - destination: /usr/local/ghpc/mount_localssd.sh - content: | - #!/bin/bash - set -e -o pipefail - - RAID_DEVICE=/dev/md0 - DST_MNT=/mnt/localssd - DISK_LABEL=LOCALSSD - OPTIONS=discard,defaults - - # if mount is successful, do nothing - if mount --source LABEL="$DISK_LABEL" --target="$DST_MNT" -o "$OPTIONS"; then - exit 0 - fi - - # Create new RAID, format ext4 and mount - # TODO: handle case of zero or 1 local SSD disk - # TODO: handle case when /dev/md0 exists but was not mountable for - # some reason - DEVICES=`nvme list | grep nvme_ | grep -v nvme_card-pd | awk '{print $1}' | paste -sd ' '` - NB_DEVICES=`nvme list | grep nvme_ | grep -v nvme_card-pd | awk '{print $1}' | wc -l` - mdadm --create "$RAID_DEVICE" --level=0 --raid-devices=$NB_DEVICES $DEVICES - mkfs.ext4 -F "$RAID_DEVICE" - tune2fs "$RAID_DEVICE" -r 131072 - e2label "$RAID_DEVICE" "$DISK_LABEL" - mkdir -p "$DST_MNT" - mount --source LABEL="$DISK_LABEL" --target="$DST_MNT" -o "$OPTIONS" - chmod 1777 "$DST_MNT" - - type: data - destination: /etc/systemd/system/mount-local-ssd.service - content: | - [Unit] - Description=Assemble local SSDs as software RAID; then format and mount - - [Service] - ExecCondition=bash -c '/usr/bin/curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/machine-type | grep -q "/a3-highgpu-8g$"' - ExecStart=/bin/bash /usr/local/ghpc/mount_localssd.sh - - [Install] - WantedBy=local-fs.target - - type: shell - destination: install_dcgm.sh - content: | - #!/bin/bash - set -e -o pipefail - apt-key del 7fa2af80 - distribution=\$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.//g') - wget https://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-keyring_1.0-1_all.deb - dpkg -i cuda-keyring_1.0-1_all.deb - apt-get update - apt-get install -y datacenter-gpu-manager - # libnvidia-nscq needed for A100/A800 and H100/H800 systems - apt-get install -y libnvidia-nscq-550 - - type: shell - destination: add_dcgm_to_op_config.sh - content: | - #!/bin/bash - tee -a /etc/google-cloud-ops-agent/config.yaml > /dev/null << EOF - metrics: - receivers: - dcgm: - type: dcgm - service: - pipelines: - dcgm: - receivers: - - dcgm - EOF - - type: shell - destination: systemctl_services.sh - content: | - #!/bin/bash - set -e -o pipefail - # workaround b/309016676 (systemd-resolved restarts 4 times causing DNS - # resolution failures during google-startup-scripts.service) - systemctl daemon-reload - systemctl enable delay-a3.service - systemctl enable mount-local-ssd.service - systemctl enable nvidia-dcgm - - type: shell - destination: remove_snap_gcloud.sh - content: | - #!/bin/bash - # THIS RUNNER MUST BE THE LAST RUNNER BECAUSE IT WILL BREAK GSUTIL IN - # PARENT SCRIPT OF STARTUP-SCRIPT MODULE - set -e -o pipefail - # Remove original DLVM gcloud, lxds install due to conflict with snapd and NFS - snap remove google-cloud-cli lxd - # Install key and google-cloud-cli from apt repo - GCLOUD_APT_SOURCE="/etc/apt/sources.list.d/google-cloud-sdk.list" - if [ ! -f "${GCLOUD_APT_SOURCE}" ]; then - # indentation matters in EOT below; do not blindly edit! - cat < "${GCLOUD_APT_SOURCE}" - deb [signed-by=/usr/share/keyrings/cloud.google.asc] https://packages.cloud.google.com/apt cloud-sdk main - EOT - fi - curl -o /usr/share/keyrings/cloud.google.asc https://packages.cloud.google.com/apt/doc/apt-key.gpg - apt-get update - apt-get install --assume-yes google-cloud-cli - # Clean up the bash executable hash for subsequent steps using gsutil - hash -r - -- group: slurm-build - modules: - - id: slurm-image - source: modules/packer/custom-image - kind: packer - use: - - image_build_script - - sysnet - settings: - # building this image does not require a GPU-enabled VM but must *not* be - # run on a N-series VM otherwise, the "open" drivers will not install - machine_type: c2d-standard-32 - source_image_project_id: [$(vars.source_image_project_id)] - source_image: $(vars.source_image) - image_family: $(vars.final_image_family) diff --git a/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-2-cluster-v5-legacy.yaml b/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-2-cluster-v5-legacy.yaml deleted file mode 100644 index b504650c7f..0000000000 --- a/examples/machine-learning/a3-highgpu-8g/v5-legacy/ml-slurm-a3-2-cluster-v5-legacy.yaml +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -blueprint_name: slurm-a3-cluster - -terraform_backend_defaults: - type: gcs - configuration: - bucket: customer-tf-state-bucket # modify to be a bucket owned and writable by customer - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-a3-cluster - region: customer-region - zone: customer-zone - server_ip_homefs: 0.0.0.0 ## MUST set to IP address of Filestore instance from base deployment! - remote_mount_homefs: /nfsshare - local_mount_homefs: /home - zones: - - $(vars.zone) - disk_size_gb: 200 - final_image_family: slurm-dlvm - slurm_cluster_name: slurm0 - enable_reconfigure: true - enable_cleanup_compute: true - enable_cleanup_subscriptions: true - a3_partition_name: a3 - a3_static_cluster_size: 32 - # a3_reservation_name must be specified; if Google staff have provided you - # with a reservation name, use it. Otherwise supply user-created reservation. - a3_reservation_name: a3-reservation-0 - # a3_maintenance_interval should be empty string by default; if Google staff - # have created a reservation, they will also provide a3_maintenance_interval - a3_maintenance_interval: "" - # network parameters must match base blueprint deployment_name! - # these values are accurate if deployment_name was not modified from example - network_name_system: slurm-a3-base-sysnet - subnetwork_name_system: slurm-a3-base-sysnet-subnet - -deployment_groups: -- group: cluster - modules: - - id: sysnet - source: modules/network/pre-existing-vpc - settings: - network_name: $(vars.network_name_system) - subnetwork_name: $(vars.subnetwork_name_system) - - - id: gpunets - source: modules/network/multivpc - settings: - global_ip_address_range: 10.0.0.0/9 - network_name_prefix: $(vars.deployment_name)-gpunet - network_count: 4 - subnetwork_cidr_suffix: 20 - - - id: homefs - source: modules/file-system/pre-existing-network-storage - settings: - server_ip: $(vars.server_ip_homefs) - remote_mount: $(vars.remote_mount_homefs) - local_mount: $(vars.local_mount_homefs) - - - id: compute_sa - source: community/modules/project/service-account - settings: - name: compute - project_roles: - - logging.logWriter - - monitoring.metricWriter - - pubsub.subscriber - - storage.objectAdmin - - - id: debug_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_static: 0 - node_count_dynamic_max: 4 - machine_type: n2-standard-2 - instance_image_custom: true - instance_image: - family: $(vars.final_image_family) - project: $(vars.project_id) - - - id: debug_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - debug_node_group - - sysnet - - homefs - settings: - partition_name: debug - exclusive: false - enable_placement: false - - - id: a3_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - use: - - gpunets - settings: - reservation_name: $(vars.a3_reservation_name) - maintenance_interval: $(vars.a3_maintenance_interval) - node_count_static: $(vars.a3_static_cluster_size) - node_count_dynamic_max: 0 - disk_type: pd-ssd - machine_type: a3-highgpu-8g - instance_image_custom: true - disable_public_ips: true - enable_smt: true - instance_image: - family: $(vars.final_image_family) - project: $(vars.project_id) - node_conf: - CoresPerSocket: 52 - ThreadsPerCore: 2 - on_host_maintenance: TERMINATE - service_account: - email: $(compute_sa.service_account_email) - scopes: - - cloud-platform - bandwidth_tier: gvnic_enabled - - - id: a3_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - a3_node_group - - sysnet - - homefs - settings: - partition_name: $(vars.a3_partition_name) - enable_placement: false - exclusive: false - is_default: true - partition_conf: - OverSubscribe: EXCLUSIVE - - - id: controller_startup - source: modules/scripts/startup-script - settings: - runners: - - type: shell - destination: stage_scripts.sh - content: | - #!/bin/bash - # use script from master branch which is actively maintained - curl -s --create-dirs -o /opt/apps/adm/slurm/scripts/receive-data-path-manager \ - https://raw.githubusercontent.com/GoogleCloudPlatform/slurm-gcp/master/tools/prologs-epilogs/receive-data-path-manager - chmod 0755 /opt/apps/adm/slurm/scripts/receive-data-path-manager - mkdir -p /opt/apps/adm/slurm/partition-$(vars.a3_partition_name)-prolog_slurmd.d - mkdir -p /opt/apps/adm/slurm/partition-$(vars.a3_partition_name)-epilog_slurmd.d - ln -s /opt/apps/adm/slurm/scripts/receive-data-path-manager /opt/apps/adm/slurm/partition-$(vars.a3_partition_name)-prolog_slurmd.d/start-rxdm.prolog_slurmd - ln -s /opt/apps/adm/slurm/scripts/receive-data-path-manager /opt/apps/adm/slurm/partition-$(vars.a3_partition_name)-epilog_slurmd.d/stop-rxdm.epilog_slurmd - - type: shell - destination: reset_enroot.sh - content: | - #!/bin/bash - # reset enroot to defaults of files under /home and running under /run - # allows basic enroot testing on login/controller nodes (reduced I/O) - rm -f /etc/enroot/enroot.conf - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - sysnet - - a3_partition - - debug_partition - - homefs - settings: - machine_type: c2-standard-8 - cloud_parameters: - resume_rate: 0 - resume_timeout: 900 - suspend_rate: 0 - suspend_timeout: 600 - no_comma_params: false - tree_width: $(vars.a3_static_cluster_size) - instance_image_custom: true - instance_image: - family: $(vars.final_image_family) - project: $(vars.project_id) - slurm_conf_tpl: modules/embedded/community/modules/scheduler/schedmd-slurm-gcp-v5-controller/etc/long-prolog-slurm.conf.tpl - controller_startup_script: $(controller_startup.startup_script) - enable_external_prolog_epilog: true - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - sysnet - - slurm_controller - settings: - disk_type: pd-balanced - instance_image_custom: true - instance_image: - family: $(vars.final_image_family) - project: $(vars.project_id) - machine_type: c2-standard-4 - startup_script: | - #!/bin/bash - # reset enroot to defaults of files under /home and running under /run - # allows basic enroot testing on login node (reduced I/O) - rm -f /etc/enroot/enroot.conf diff --git a/examples/ml-slurm-v5-legacy.yaml b/examples/ml-slurm-v5-legacy.yaml deleted file mode 100644 index 113c052405..0000000000 --- a/examples/ml-slurm-v5-legacy.yaml +++ /dev/null @@ -1,266 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -blueprint_name: ml-slurm - -vars: - project_id: ## Set project id here - deployment_name: ml-example - region: asia-southeast1 - zone: asia-southeast1-b - zones: - - asia-southeast1-a - - asia-southeast1-b - - asia-southeast1-c - new_image: - family: ml-slurm - project: $(vars.project_id) - disk_size_gb: 200 - metadata: # Workaround for https://github.com/GoogleCloudPlatform/cluster-toolkit/discussions/3243 - VmDnsSetting: GlobalOnly - -# Recommended to use GCS backend for Terraform state -# See https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/examples#optional-setting-up-a-remote-terraform-state -# -# terraform_backend_defaults: -# type: gcs -# configuration: -# bucket: <> - -deployment_groups: -- group: primary - modules: - - id: network - source: modules/network/pre-existing-vpc - - # this example anticipates that the VPC default network has internal traffic - # allowed and IAP tunneling for SSH connections - - id: firewall_rule - source: modules/network/firewall-rules - use: - - network - settings: - ingress_rules: - - name: $(vars.deployment_name)-allow-internal-traffic - description: Allow internal traffic - destination_ranges: - - $(network.subnetwork_address) - source_ranges: - - $(network.subnetwork_address) - allow: - - protocol: tcp - ports: - - 0-65535 - - protocol: udp - ports: - - 0-65535 - - protocol: icmp - - name: $(vars.deployment_name)-allow-iap-ssh - description: Allow IAP-tunneled SSH connections - destination_ranges: - - $(network.subnetwork_address) - source_ranges: - - 35.235.240.0/20 - allow: - - protocol: tcp - ports: - - 22 - - - id: homefs - source: modules/file-system/filestore - use: - - network - settings: - local_mount: /home - size_gb: 2560 - filestore_tier: BASIC_SSD - - - id: script - source: modules/scripts/startup-script - settings: - runners: - - type: shell - destination: install-ml-libraries.sh - content: | - #!/bin/bash - # this script is designed to execute on Slurm images published by SchedMD that: - # - are based on Debian distribution of Linux - # - have NVIDIA drivers pre-installed - - set -e -o pipefail - - echo "deb https://packages.cloud.google.com/apt google-fast-socket main" > /etc/apt/sources.list.d/google-fast-socket.list - apt-get update --allow-releaseinfo-change - apt-get install --assume-yes google-fast-socket - - CONDA_BASE=/opt/conda - - if [ -d $CONDA_BASE ]; then - exit 0 - fi - - DL_DIR=\$(mktemp -d) - cd $DL_DIR - curl -L -O https://github.com/conda-forge/miniforge/releases/download/24.7.1-2/Miniforge3-24.7.1-2-Linux-x86_64.sh - HOME=$DL_DIR bash Miniforge3-24.7.1-2-Linux-x86_64.sh -b -p $CONDA_BASE - cd - - rm -rf $DL_DIR - unset DL_DIR - - source $CONDA_BASE/bin/activate base - conda init --system - conda config --system --set auto_activate_base False - # following channel ordering is important! use strict_priority! - conda config --system --set channel_priority strict - conda update -n base conda --yes - - ### create a virtual environment for tensorflow - conda create -n tf python=3.11 --yes - conda activate tf - pip install tensorflow[and-cuda]==2.18.* - -- group: packer - modules: - - id: custom-image - source: modules/packer/custom-image - kind: packer - use: - - network - - script - settings: - # give VM a public IP to ensure startup script can reach public internet - # w/o new VPC - omit_external_ip: false - source_image_project_id: [schedmd-slurm-public] - # see latest in https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/images.md#published-image-family - source_image_family: slurm-gcp-5-12-debian-11 - # You can find size of source image by using following command - # gcloud compute images describe-from-family --project schedmd-slurm-public - disk_size: $(vars.disk_size_gb) - disk_type: pd-ssd - image_family: $(vars.new_image.family) - # building this image does not require a GPU-enabled VM - machine_type: c2-standard-4 - state_timeout: 15m - -- group: cluster - modules: - - id: examples - source: modules/scripts/startup-script - settings: - runners: - - type: data - destination: /var/tmp/torch_test.sh - content: | - #!/bin/bash - source /etc/profile.d/conda.sh - conda activate pytorch - python3 torch_test.py - - type: data - destination: /var/tmp/torch_test.py - content: | - import torch - import torch.utils.benchmark as benchmark - - def batched_dot_mul_sum(a, b): - '''Computes batched dot by multiplying and summing''' - return a.mul(b).sum(-1) - - def batched_dot_bmm(a, b): - '''Computes batched dot by reducing to bmm''' - a = a.reshape(-1, 1, a.shape[-1]) - b = b.reshape(-1, b.shape[-1], 1) - return torch.bmm(a, b).flatten(-3) - - # use GPU if available, else CPU - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - print('Using device:', device) - if device.type == 'cuda': - print(torch.cuda.get_device_name(0)) - - # benchmarking - x = torch.randn(10000, 64) - t0 = benchmark.Timer( - stmt='batched_dot_mul_sum(x, x)', - setup='from __main__ import batched_dot_mul_sum', - globals={'x': x}) - t1 = benchmark.Timer( - stmt='batched_dot_bmm(x, x)', - setup='from __main__ import batched_dot_bmm', - globals={'x': x}) - print(t0.timeit(100)) - print(t1.timeit(100)) - - - id: a2_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - bandwidth_tier: gvnic_enabled - machine_type: a2-highgpu-1g - instance_image: $(vars.new_image) - instance_image_custom: true - - - id: a2_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - a2_node_group - - homefs - - network - settings: - partition_name: a2 - is_default: true - - - id: g2_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - bandwidth_tier: gvnic_enabled - machine_type: g2-standard-4 - instance_image: $(vars.new_image) - instance_image_custom: true - - - id: g2_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - g2_node_group - - homefs - - network - settings: - partition_name: g2 - enable_placement: false - exclusive: false - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network - - a2_partition - - g2_partition - - homefs - settings: - disable_controller_public_ips: false - instance_image: $(vars.new_image) - instance_image_custom: true - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - examples - - network - - slurm_controller - settings: - disable_login_public_ips: false - instance_image: $(vars.new_image) - instance_image_custom: true diff --git a/tools/cloud-build/daily-tests/blueprints/lustre-slurm-v5-legacy.yaml b/tools/cloud-build/daily-tests/blueprints/lustre-slurm-v5-legacy.yaml deleted file mode 100644 index e7e9d5e09e..0000000000 --- a/tools/cloud-build/daily-tests/blueprints/lustre-slurm-v5-legacy.yaml +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: test-slurm-lustre - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: test-slurm-lustre - region: us-central1 - zone: us-central1-a - machine_type: n2-standard-2 - disk_type: pd-ssd - # enable_placement: false - # on_host_maintenance: MIGRATE - num_nodes: 1 - centos_image: - family: slurm-gcp-5-12-hpc-centos-7 - project: schedmd-slurm-public - rocky_image: - family: slurm-gcp-5-12-hpc-rocky-linux-8 - project: schedmd-slurm-public - -deployment_groups: -- group: primary - modules: - - - id: network1 - source: modules/network/pre-existing-vpc - - ########### - # Storage # - ########### - - # This file system has an associated license cost. - # https://console.developers.google.com/marketplace/product/ddnstorage/exascaler-cloud - - id: lustre - source: community/modules/file-system/DDN-EXAScaler - use: [network1] - settings: - local_mount: /lustre - waiter: deploymentmanager - mgs: - nic_type: "GVNIC" - node_type: n2-standard-2 - node_count: 1 - node_cpu: "Intel Cascade Lake" - public_ip: true - mds: - nic_type: "GVNIC" - node_type: n2-standard-2 - node_count: 1 - node_cpu: "Intel Cascade Lake" - public_ip: true - oss: - nic_type: "GVNIC" - node_type: n2-standard-2 - node_count: 3 - node_cpu: "Intel Cascade Lake" - public_ip: true - - ############# - # Slurm VMs # - ############# - - # # Ubuntu 20.04 LTS - # - id: ubuntu_node_group - # source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - # settings: - # node_count_dynamic_max: $(vars.num_nodes) - # instance_image: - # family: slurm-gcp-5-12-ubuntu-2004-lts - # project: schedmd-slurm-public - - # - id: ubuntu_partition - # source: community/modules/compute/schedmd-slurm-gcp-v5-partition - # use: - # - network1 - # - ubuntu_node_group - # - lustre - # settings: - # partition_name: ubuntu - - # Rocky Linux 8 - - id: rocky_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: $(vars.num_nodes) - instance_image: $(vars.rocky_image) - - - id: rocky_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - rocky_node_group - - lustre - settings: - partition_name: rocky - - # CentOS 7 - - id: centos_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: $(vars.num_nodes) - instance_image: $(vars.centos_image) - - - id: centos_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - centos_node_group - - lustre - settings: - partition_name: centos - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - # - ubuntu_partition - - rocky_partition - - centos_partition - - lustre - settings: - disable_controller_public_ips: false - # cloud_parameters: - # no_comma_params: false - # resume_rate: 0 - # resume_timeout: 1200 - # suspend_rate: 0 - # suspend_timeout: 1200 - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - disable_login_public_ips: false diff --git a/tools/validate_configs/test_configs/gpu-v5-legacy.yaml b/tools/validate_configs/test_configs/gpu-v5-legacy.yaml deleted file mode 100644 index 16f4a9fde8..0000000000 --- a/tools/validate_configs/test_configs/gpu-v5-legacy.yaml +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: gpu-vm - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: gpu-vm - region: us-central1 - zone: us-central1-c - instance_image_vm: - family: common-dl-gpu-debian-10 - project: ml-images - -# Broken into 3 groups to better manage GPU quotas -deployment_groups: -- group: high-count-auto - modules: - - id: network-hca - source: modules/network/pre-existing-vpc - - - id: auto-megagpu - source: modules/compute/vm-instance - use: - - network-hca - settings: - name_prefix: auto-megagpu - machine_type: a2-megagpu-16g - instance_image: $(vars.instance_image_vm) - -- group: high-count-manual - modules: - - id: network-hcm - source: modules/network/pre-existing-vpc - - - id: manual-megagpu - source: modules/compute/vm-instance - use: - - network-hcm - settings: - name_prefix: manual-megagpu - machine_type: a2-megagpu-16g - instance_image: $(vars.instance_image_vm) - guest_accelerator: - - type: nvidia-tesla-a100 - count: 16 - -- group: low-count - modules: - # Source is an embedded module, denoted by "modules/*" without ./, ../, / - # as a prefix. To refer to a local or community module, prefix with ./, ../ or / - - id: network1 - source: modules/network/pre-existing-vpc - - - id: nogpu-n1 - source: modules/compute/vm-instance - use: - - network1 - settings: - name_prefix: nogpu-n1 - machine_type: n1-standard-8 - instance_image: $(vars.instance_image_vm) - - - id: manual-n1 - source: modules/compute/vm-instance - use: - - network1 - settings: - name_prefix: manual-n1 - machine_type: n1-standard-32 - on_host_maintenance: TERMINATE - instance_image: $(vars.instance_image_vm) - guest_accelerator: - - type: nvidia-tesla-t4 - count: 1 - - - id: auto-highgpu - source: modules/compute/vm-instance - use: - - network1 - settings: - name_prefix: auto-highgpu - machine_type: a2-highgpu-1g - instance_image: $(vars.instance_image_vm) - - - id: manual-highgpu - source: modules/compute/vm-instance - use: - - network1 - settings: - name_prefix: manual-highgpu - machine_type: a2-highgpu-2g - instance_image: $(vars.instance_image_vm) - guest_accelerator: - - type: nvidia-tesla-a100 - count: 2 - - - id: auto-ultragpu - source: modules/compute/vm-instance - use: - - network1 - settings: - name_prefix: auto-ultragpu - machine_type: a2-ultragpu-2g - instance_image: $(vars.instance_image_vm) - - - id: manual-ultragpu - source: modules/compute/vm-instance - use: - - network1 - settings: - name_prefix: manual-ultragpu - machine_type: a2-ultragpu-2g - instance_image: $(vars.instance_image_vm) - guest_accelerator: - - type: nvidia-a100-80gb - count: 2 - -- group: slurm-gcp-v5 - modules: - - id: network_slurm - source: modules/network/pre-existing-vpc - - - id: nogpu_nodegroup - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: nogpu - node_count_dynamic_max: 4 - machine_type: n2-standard-2 - - - id: manual_nodegroup - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: man - node_count_dynamic_max: 4 - machine_type: a2-ultragpu-2g - guest_accelerator: - - type: nvidia-a100-80gb - count: 2 - - - id: auto_nodegroup - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: auto - node_count_dynamic_max: 4 - machine_type: a2-ultragpu-2g - - - id: partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network_slurm - - nogpu_nodegroup - - manual_nodegroup - - auto_nodegroup - settings: - partition_name: debug - enable_placement: false - is_default: true - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network_slurm - - partition - settings: - disable_controller_public_ips: false - machine_type: a2-highgpu-2g - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network_slurm - - slurm_controller - settings: - disable_login_public_ips: false - machine_type: a2-highgpu-1g diff --git a/tools/validate_configs/test_configs/node-groups-v5-legacy.yaml b/tools/validate_configs/test_configs/node-groups-v5-legacy.yaml deleted file mode 100644 index 9dcd1332bc..0000000000 --- a/tools/validate_configs/test_configs/node-groups-v5-legacy.yaml +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: node-group-test-v5 - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-gcp-v5 - region: us-central1 - zone: us-central1-c - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md - -deployment_groups: -- group: primary - modules: - - id: network1 - source: modules/network/vpc - - - id: homefs - source: community/modules/file-system/nfs-server - use: [network1] - settings: - local_mounts: [/home] - auto_delete_disk: true - - ## Single node group, use defaults where appropriate - - id: default_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: simple - machine_type: c2-standard-30 - - - id: one_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - default_node_group - settings: - partition_name: simple - - ## Complex partition using node groups - - id: node_group1 - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: c30 - machine_type: c2-standard-30 - instance_image: - family: slurm-gcp-5-12-debian-11 - project: schedmd-slurm-public - instance_image_custom: true - - - id: node_group2 - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: c60 - machine_type: c2-standard-60 - instance_image: - name: slurm-gcp-dev-hpc-centos-7-1684970018 - project: schedmd-slurm-public - - - id: node_group3 - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: cd112 - machine_type: c2d-standard-112 - instance_image: - family: slurm-gcp-5-12-hpc-centos-7 - project: schedmd-slurm-public - instance_image_custom: true - enable_smt: true - - - id: node_group4 - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - name: cd56 - machine_type: c2d-standard-56 - - - id: multiple_node_groups - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - node_group1 - - node_group2 - - node_group3 - - node_group4 - settings: - partition_name: multng - enable_reconfigure: true - - ## Explicitly set node partition with one node group - - id: one_node_group_explicit - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - settings: - partition_name: explng - enable_placement: false - is_default: true - node_groups: - - node_count_static: 0 - node_count_dynamic_max: 4 - group_name: expl - node_conf: {} - additional_disks: [] - additional_networks: [] - bandwidth_tier: null - can_ip_forward: false - disable_smt: false - disk_auto_delete: true - disk_labels: {} - disk_size_gb: 50 - disk_type: pd-standard - enable_confidential_vm: false - enable_oslogin: true - enable_shielded_vm: false - enable_spot_vm: false - gpu: null - instance_template: null - labels: $(vars.labels) - machine_type: n2-standard-16 - maintenance_interval: "" - metadata: {} - min_cpu_platform: null - on_host_maintenance: TERMINATE - preemptible: false - reservation_name: null # will be replaced by default value empty string - service_account: null - shielded_instance_config: null - spot_instance_config: null - source_image_family: null - source_image_project: null - source_image: null - tags: [] - access_config: [] - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - one_node_group - - multiple_node_groups - - one_node_group_explicit - - homefs - settings: - disable_controller_public_ips: false - enable_reconfigure: true - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - machine_type: n2-standard-4 - disable_login_public_ips: false diff --git a/tools/validate_configs/test_configs/slurm-gcp-v5-startup-scripts-v5-legacy.yaml b/tools/validate_configs/test_configs/slurm-gcp-v5-startup-scripts-v5-legacy.yaml deleted file mode 100644 index f15605b90d..0000000000 --- a/tools/validate_configs/test_configs/slurm-gcp-v5-startup-scripts-v5-legacy.yaml +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: hpc-cluster-slurm-v5 - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: hpc-small-v5 - region: us-west4 - zone: us-west4-c - -deployment_groups: -- group: primary - modules: - - id: network1 - source: modules/network/vpc - - - id: homefs - source: community/modules/file-system/nfs-server - use: [network1] - settings: - local_mounts: [/home] - auto_delete_disk: true - - - id: bucket - source: community/modules/file-system/cloud-storage-bucket - settings: - name_prefix: input-data - local_mount: /data - random_suffix: true - mount_options: defaults,_netdev,implicit_dirs,allow_other - - # Used by the partitions, this tests startup scripts that are partition specific - - id: startup-partition - source: modules/scripts/startup-script - settings: - runners: - - type: shell - destination: startup-test-partition.sh - content: | - #!/bin/bash - set -ex - echo "Hello partition! Hostname: \$(hostname)" - - - id: debug_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 4 - machine_type: n2-standard-2 - - - id: debug_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - bucket - - debug_node_group - settings: - partition_name: debug - enable_placement: false - is_default: true - - - id: compute_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 20 - - - id: compute_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - bucket - - compute_node_group - - startup-partition - settings: - partition_name: compute - - # Used by the login and controller, the controller applies it to all partitions as well. - - id: startup-all - source: modules/scripts/startup-script - settings: - runners: - - type: shell - destination: startup-test-all.sh - content: | - #!/bin/bash - set -ex - echo "Hello world! Hostname: \$(hostname)" - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - debug_partition - - compute_partition - - homefs - - bucket - - startup-all - settings: - disable_controller_public_ips: false - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - - startup-all - settings: - disable_login_public_ips: false diff --git a/tools/validate_configs/test_configs/slurm-static-test-v5-legacy.yaml b/tools/validate_configs/test_configs/slurm-static-test-v5-legacy.yaml deleted file mode 100644 index 46e7eb37f0..0000000000 --- a/tools/validate_configs/test_configs/slurm-static-test-v5-legacy.yaml +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: test-slurm-static-nodes-v5 - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: test-v5 - region: us-central1 - zone: us-central1-a - machine_type: n1-standard-2 - instance_image: - # Please refer to the following link for the latest images: - # https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/docs/images.md#supported-operating-systems - # family: slurm-gcp-5-12-ubuntu-2004-lts - # family: slurm-gcp-5-12-hpc-centos-7 - family: slurm-gcp-5-12-hpc-rocky-linux-8 - # family: slurm-gcp-5-12-debian-11 - project: schedmd-slurm-public - instance_image_custom: true - enable_reconfigure: true - enable_cleanup_compute: true - enable_cleanup_subscriptions: true - # num_dynamic_nodes: 2 - num_static_nodes: 3 - -deployment_groups: -- group: primary - modules: - - ########### - # Network # - ########### - - id: network1 - source: modules/network/pre-existing-vpc - - ############# - # Slurm VMs # - ############# - # - id: dynamic_node_group - # source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - # settings: - # node_count_dynamic_max: $(vars.num_dynamic_nodes) - # machine_type: n2-standard-2 - - # - id: dynamic_partition - # source: community/modules/compute/schedmd-slurm-gcp-v5-partition - # use: - # - network1 - # - dynamic_node_group - # settings: - # partition_name: dynamic - # # exclusive: false # allows nodes to stay up after jobs are done - # enable_placement: false # the default is: true - # # is_default: true - - - id: static_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 0 - node_count_static: $(vars.num_static_nodes) - - - id: static_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - static_node_group - settings: - partition_name: static - enable_placement: false - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - # - dynamic_partition - - static_partition - settings: - disable_controller_public_ips: false - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - disable_login_public_ips: false diff --git a/tools/validate_configs/test_configs/zone-policies-slurm-v5-legacy.yaml b/tools/validate_configs/test_configs/zone-policies-slurm-v5-legacy.yaml deleted file mode 100644 index 0403fb7fdd..0000000000 --- a/tools/validate_configs/test_configs/zone-policies-slurm-v5-legacy.yaml +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: slurm-gcp-v5-hpc-centos7 - -vars: - project_id: ## Set GCP Project ID Here ## - deployment_name: slurm-gcp-v5 - region: us-central1 - zone: us-central1-c - additional_zones: - - us-central1-a - - us-central1-b - -# Documentation for each of the modules used below can be found at -# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md - -deployment_groups: -- group: primary - modules: - - id: network1 - source: modules/network/vpc - - - id: homefs - source: modules/file-system/filestore - use: [network1] - settings: - local_mount: /home - - # Partition which permits a specific zone - - id: zonal_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 4 - machine_type: n2-standard-2 - disable_public_ips: false - - id: zonal_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - zonal_node_group - settings: - partition_name: zonal - enable_placement: false - - # Partition which allows a total of 3 zones - - id: multizonal_node_group - source: community/modules/compute/schedmd-slurm-gcp-v5-node-group - settings: - node_count_dynamic_max: 4 - machine_type: n2-standard-2 - - id: multizonal_partition - source: community/modules/compute/schedmd-slurm-gcp-v5-partition - use: - - network1 - - homefs - - multizonal_node_group - settings: - partition_name: multiz - enable_placement: false - zones: $(vars.additional_zones) - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller - use: - - network1 - - homefs - - zonal_partition - - multizonal_partition - settings: - disable_controller_public_ips: false - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v5-login - use: - - network1 - - slurm_controller - settings: - machine_type: n2-standard-4 - disable_login_public_ips: false From d5f6312c9573ae41978cdd80fbedf6545283cea1 Mon Sep 17 00:00:00 2001 From: Harsh Thakkar Date: Mon, 6 Jan 2025 13:12:57 -0800 Subject: [PATCH 086/140] Revert "Revert use of toolkit_modules_url in examples" --- community/examples/tutorial-starccm-slurm.yaml | 2 ++ docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/community/examples/tutorial-starccm-slurm.yaml b/community/examples/tutorial-starccm-slurm.yaml index ebf52861ff..9e64014ea7 100644 --- a/community/examples/tutorial-starccm-slurm.yaml +++ b/community/examples/tutorial-starccm-slurm.yaml @@ -15,6 +15,8 @@ --- blueprint_name: starccm-on-slurm +toolkit_modules_url: github.com/GoogleCloudPlatform/cluster-toolkit +toolkit_modules_version: v1.41.0 vars: project_id: ## Set GCP Project ID Here ## diff --git a/docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml b/docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml index 45312348ed..813a90f0b6 100644 --- a/docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml +++ b/docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml @@ -15,6 +15,8 @@ --- blueprint_name: hpc-cluster-hybrid-v5 +toolkit_modules_url: github.com/GoogleCloudPlatform/cluster-toolkit +toolkit_modules_version: v1.41.0 vars: project_id: ## <> From fb53d148227ba85eac7c0e0e60417232c8f31748 Mon Sep 17 00:00:00 2001 From: Harsh Thakkar Date: Mon, 6 Jan 2025 13:25:51 -0800 Subject: [PATCH 087/140] Update tutorial-starccm-slurm.yaml --- community/examples/tutorial-starccm-slurm.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/community/examples/tutorial-starccm-slurm.yaml b/community/examples/tutorial-starccm-slurm.yaml index 9e64014ea7..e3e50b013d 100644 --- a/community/examples/tutorial-starccm-slurm.yaml +++ b/community/examples/tutorial-starccm-slurm.yaml @@ -24,6 +24,23 @@ vars: region: us-central1 zone: us-central1-c +terraform_providers: + google: + source: hashicorp/google + version: 5.45.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + + google-beta: + source: hashicorp/google-beta + version: 5.45.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + # Documentation for each of the modules used below can be found at # https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md From 7f1583c1874cf421ee6c674280ddf2cae7e0b357 Mon Sep 17 00:00:00 2001 From: Harsh Thakkar Date: Mon, 6 Jan 2025 13:26:10 -0800 Subject: [PATCH 088/140] Update hybrid-configuration.yaml --- .../blueprints/hybrid-configuration.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml b/docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml index 813a90f0b6..0f96ec1ac5 100644 --- a/docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml +++ b/docs/hybrid-slurm-cluster/blueprints/hybrid-configuration.yaml @@ -28,6 +28,23 @@ vars: network_name: compute-vpc-network subnetwork_name: primary-subnet +terraform_providers: + google: + source: hashicorp/google + version: 5.45.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + + google-beta: + source: hashicorp/google-beta + version: 5.45.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + deployment_groups: # Uncomment the below section if network used for bursting has not been created # - group: create_network From 1b65000d1c2c884e11108c58a69f38d24713d68c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 22:19:17 +0000 Subject: [PATCH 089/140] Bump github.com/go-git/go-git/v5 from 5.12.0 to 5.13.1 Bumps [github.com/go-git/go-git/v5](https://github.com/go-git/go-git) from 5.12.0 to 5.13.1. - [Release notes](https://github.com/go-git/go-git/releases) - [Commits](https://github.com/go-git/go-git/compare/v5.12.0...v5.13.1) --- updated-dependencies: - dependency-name: github.com/go-git/go-git/v5 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 6 +++--- go.sum | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/go.mod b/go.mod index 6e9502407e..193ad2ba2d 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.22 require ( cloud.google.com/go/storage v1.41.0 // indirect - github.com/go-git/go-git/v5 v5.12.0 + github.com/go-git/go-git/v5 v5.13.1 github.com/hashicorp/go-getter v1.7.6 github.com/hashicorp/hcl v1.0.0 // indirect github.com/hashicorp/hcl/v2 v2.23.0 @@ -63,7 +63,7 @@ require ( cloud.google.com/go/compute/metadata v0.3.0 // indirect cloud.google.com/go/iam v1.1.8 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect - github.com/ProtonMail/go-crypto v1.1.0-alpha.2 // indirect + github.com/ProtonMail/go-crypto v1.1.3 // indirect github.com/agext/levenshtein v1.2.3 github.com/aws/aws-sdk-go v1.44.122 // indirect github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect @@ -90,7 +90,7 @@ require ( github.com/mitchellh/go-wordwrap v1.0.1 // indirect github.com/pjbgf/sha1cd v0.3.0 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect - github.com/skeema/knownhosts v1.2.2 // indirect + github.com/skeema/knownhosts v1.3.0 // indirect github.com/spf13/pflag v1.0.5 github.com/ulikunitz/xz v0.5.10 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect diff --git a/go.sum b/go.sum index f976fd23a8..506041e49b 100644 --- a/go.sum +++ b/go.sum @@ -195,8 +195,8 @@ github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/ProtonMail/go-crypto v1.1.0-alpha.2 h1:bkyFVUP+ROOARdgCiJzNQo2V2kiB97LyUpzH9P6Hrlg= -github.com/ProtonMail/go-crypto v1.1.0-alpha.2/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= +github.com/ProtonMail/go-crypto v1.1.3 h1:nRBOetoydLeUb4nHajyO2bKqMLfWQ/ZPwkXqXxPxCFk= +github.com/ProtonMail/go-crypto v1.1.3/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo= github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= @@ -237,8 +237,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a h1:mATvB/9r/3gvcejNsXKSkQ6lcIaNec2nyfOdlTBR2lU= -github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM= +github.com/elazarl/goproxy v1.2.3 h1:xwIyKHbaP5yfT6O9KIeYJR5549MXRQkoQMRXGztz8YQ= +github.com/elazarl/goproxy v1.2.3/go.mod h1:YfEbZtqP4AetfO6d40vWchF3znWX7C7Vd6ZMfdL8z64= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -257,16 +257,16 @@ github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/gliderlabs/ssh v0.3.7 h1:iV3Bqi942d9huXnzEF2Mt+CY9gLu8DNM4Obd+8bODRE= -github.com/gliderlabs/ssh v0.3.7/go.mod h1:zpHEXBstFnQYtGnB8k8kQLol82umzn/2/snG7alWVD8= +github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= +github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= github.com/go-git/go-billy/v5 v5.6.1 h1:u+dcrgaguSSkbjzHwelEjc0Yj300NUevrrPphk/SoRA= github.com/go-git/go-billy/v5 v5.6.1/go.mod h1:0AsLr1z2+Uksi4NlElmMblP5rPcDZNRCD8ujZCRR2BE= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= -github.com/go-git/go-git/v5 v5.12.0 h1:7Md+ndsjrzZxbddRDZjF14qK+NN56sy6wkqaVrjZtys= -github.com/go-git/go-git/v5 v5.12.0/go.mod h1:FTM9VKtnI2m65hNI/TenDDDnUf2Q9FHnXYjuz9i5OEY= +github.com/go-git/go-git/v5 v5.13.1 h1:DAQ9APonnlvSWpvolXWIuV6Q6zXy2wHbN4cVlNR5Q+M= +github.com/go-git/go-git/v5 v5.13.1/go.mod h1:qryJB4cSBoq3FRoBRf5A77joojuBcmPJ0qu3XXXVixc= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -464,8 +464,8 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/skeema/knownhosts v1.2.2 h1:Iug2P4fLmDw9f41PB6thxUkNUkJzB5i+1/exaj40L3A= -github.com/skeema/knownhosts v1.2.2/go.mod h1:xYbVRSPxqBZFrdmDyMmsOs+uX1UZC3nTN3ThzgDxUwo= +github.com/skeema/knownhosts v1.3.0 h1:AM+y0rI04VksttfwjkSTNQorvGqmwATnvnAHpSgc0LY= +github.com/skeema/knownhosts v1.3.0/go.mod h1:sPINvnADmT/qYH1kfv+ePMmOBTH6Tbl7b5LvTDjFK7M= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= From 63dd3a38e987586d54267c258ce3834aed898717 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 Jan 2025 22:19:37 +0000 Subject: [PATCH 090/140] Bump golang.org/x/sys from 0.28.0 to 0.29.0 Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.28.0 to 0.29.0. - [Commits](https://github.com/golang/sys/compare/v0.28.0...v0.29.0) --- updated-dependencies: - dependency-name: golang.org/x/sys dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 6e9502407e..f99e4240fa 100644 --- a/go.mod +++ b/go.mod @@ -98,7 +98,7 @@ require ( golang.org/x/crypto v0.31.0 // indirect golang.org/x/net v0.33.0 // indirect golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sys v0.28.0 + golang.org/x/sys v0.29.0 golang.org/x/text v0.21.0 // indirect google.golang.org/grpc v1.64.1 // indirect google.golang.org/protobuf v1.34.2 // indirect diff --git a/go.sum b/go.sum index f976fd23a8..92d2312558 100644 --- a/go.sum +++ b/go.sum @@ -732,8 +732,8 @@ golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From f3ffd14b42b6bd5a666eae7509d25d8a14a8248b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 00:14:29 +0000 Subject: [PATCH 091/140] Bump jinja2 from 3.1.4 to 3.1.5 in /community/front-end/ofe Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: jinja2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- community/front-end/ofe/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/community/front-end/ofe/requirements.txt b/community/front-end/ofe/requirements.txt index 22ed40cfac..efaabed2a5 100644 --- a/community/front-end/ofe/requirements.txt +++ b/community/front-end/ofe/requirements.txt @@ -46,7 +46,7 @@ identify==2.5.24 idna==3.7 importlib-resources==6.1.1 isort==5.12.0 -Jinja2==3.1.4 +Jinja2==3.1.5 jsonschema==4.20.0 jsonschema-specifications==2023.11.1 lazy-object-proxy==1.9.0 From c290e52cf4dfbc9bf3e5c99684a24edf73f9cbf4 Mon Sep 17 00:00:00 2001 From: Alyssa Date: Mon, 16 Dec 2024 21:16:41 +0000 Subject: [PATCH 092/140] Adding max_distance variable --- .../schedmd-slurm-gcp-v6-nodeset/README.md | 1 + .../schedmd-slurm-gcp-v6-nodeset/main.tf | 1 + .../schedmd-slurm-gcp-v6-nodeset/outputs.tf | 10 +++++++ .../schedmd-slurm-gcp-v6-nodeset/variables.tf | 12 ++++++++ .../schedmd-slurm-gcp-v6-controller/README.md | 2 +- .../modules/slurm_files/scripts/resume.py | 30 +++++++++++++++---- .../slurm_files/scripts/tests/common.py | 1 + .../slurm_files/scripts/tests/test_resume.py | 2 ++ .../partition.tf | 1 + .../variables.tf | 1 + 10 files changed, 55 insertions(+), 6 deletions(-) diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md index 297c40bb7a..f79a9307b5 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md @@ -196,6 +196,7 @@ No modules. | [node\_count\_dynamic\_max](#input\_node\_count\_dynamic\_max) | Maximum number of auto-scaling nodes allowed in this partition. | `number` | `10` | no | | [node\_count\_static](#input\_node\_count\_static) | Number of nodes to be statically created. | `number` | `0` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy.

Note: Placement groups are not supported when on\_host\_maintenance is set to
"MIGRATE" and will be deactivated regardless of the value of
enable\_placement. To support enable\_placement, ensure on\_host\_maintenance is
set to "TERMINATE". | `string` | `"TERMINATE"` | no | +| [placement\_max\_distance](#input\_placement\_max\_distance) | Maximum distance between nodes in the placement group. Requires enable\_placement to be true. Values must be supported by the chosen machine type. | `number` | `null` | no | | [preemptible](#input\_preemptible) | Should use preemptibles to burst. | `bool` | `false` | no | | [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes | | [region](#input\_region) | The default region for Cloud resources. | `string` | n/a | yes | diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf index 84cb60457a..eca10e9d1a 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf @@ -71,6 +71,7 @@ locals { enable_confidential_vm = var.enable_confidential_vm enable_placement = var.enable_placement + placement_max_distance = var.placement_max_distance enable_oslogin = var.enable_oslogin enable_shielded_vm = var.enable_shielded_vm gpu = one(local.guest_accelerator) diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf index 5781d2415c..d618644d52 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/outputs.tf @@ -45,6 +45,16 @@ output "nodeset" { error_message = "Cannot use placement with static and auto-scaling nodes in the same node set." } + precondition { + condition = var.placement_max_distance == null || var.enable_placement + error_message = "placement_max_distance requires enable_placement to be set to true." + } + + precondition { + condition = !(startswith(var.machine_type, "a3-") && var.placement_max_distance == 1) + error_message = "A3 machines do not support a placement_max_distance of 1." + } + precondition { condition = var.reservation_name == "" || !var.dws_flex.enabled error_message = "Cannot use reservations with DWS Flex." diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/variables.tf index 3b7e342c32..82adca0b1b 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/variables.tf @@ -565,3 +565,15 @@ variable "dws_flex" { error_message = "Max duration must be more than 30 seconds, and cannot be more than two weeks." } } + +variable "placement_max_distance" { + type = number + description = "Maximum distance between nodes in the placement group. Requires enable_placement to be true. Values must be supported by the chosen machine type." + nullable = true + default = null + + validation { + condition = coalesce(var.placement_max_distance, 1) >= 1 && coalesce(var.placement_max_distance, 3) <= 3 + error_message = "Invalid value for placement_max_distance. Valid values are null, 1, 2, or 3." + } +} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index b03fbf0973..d485cb1765 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -336,7 +336,7 @@ limitations under the License. | [metadata](#input\_metadata) | Metadata, provided as a map. | `map(string)` | `{}` | no | | [min\_cpu\_platform](#input\_min\_cpu\_platform) | Specifies a minimum CPU platform. Applicable values are the friendly names of
CPU platforms, such as Intel Haswell or Intel Skylake. See the complete list:
https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform | `string` | `null` | no | | [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured on all instances. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
}))
| `[]` | no | -| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, false)
enable_opportunistic_maintenance = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
dws_flex = object({
enabled = bool
max_run_duration = number
use_job_duration = bool
})
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
future_reservation = string
startup_script = optional(list(object({
filename = string
content = string })), [])

zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
| `[]` | no | +| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
placement_max_distance = optional(number, null)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, false)
enable_opportunistic_maintenance = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
dws_flex = object({
enabled = bool
max_run_duration = number
use_job_duration = bool
})
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
future_reservation = string
startup_script = optional(list(object({
filename = string
content = string })), [])

zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
| `[]` | no | | [nodeset\_dyn](#input\_nodeset\_dyn) | Defines dynamic nodesets, as a list. |
list(object({
nodeset_name = string
nodeset_feature = string
}))
| `[]` | no | | [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 5)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, false)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
project_id = string
reserved = optional(string, false)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy. | `string` | `"MIGRATE"` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py index fa5413e53c..de09c9fa12 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/resume.py @@ -444,12 +444,13 @@ def hold_job(job_id, reason): run(f"{lookup().scontrol} update jobid={job_id} comment='{reason}'") -def create_placement_request(pg_name, region): +def create_placement_request(pg_name: str, region: str, max_distance: Optional[int]): config = { "name": pg_name, "region": region, "groupPlacementPolicy": { "collocation": "COLLOCATED", + "maxDistance": max_distance }, } if lookup().cfg.enable_slurm_gcp_plugins: @@ -489,11 +490,13 @@ def _allocate_nodes_to_placements(nodes: List[str], excl_job_id:Optional[int], l if not (nodeset.enable_placement and valid_placement_node(model)): return no_pp + max_count = calculate_chunk_size(nodeset, lkp) + name_prefix = f"{lkp.cfg.slurm_cluster_name}-slurmgcp-managed-{nodeset.nodeset_name}" if excl_job_id: # simply chunk given nodes by max size of placement return [ PlacementAndNodes(placement=f"{name_prefix}-{excl_job_id}-{i}", nodes=chunk) - for i, chunk in enumerate(chunked(nodes, n=PLACEMENT_MAX_CNT)) + for i, chunk in enumerate(chunked(nodes, n=max_count)) ] # split whole nodeset (not only nodes to resume) into chunks of max size of placement @@ -503,7 +506,7 @@ def _allocate_nodes_to_placements(nodes: List[str], excl_job_id:Optional[int], l for node in nodes: try: - chunk = lkp.node_index(node) // PLACEMENT_MAX_CNT + chunk = lkp.node_index(node) // max_count chunks[chunk].append(node) except: invalid.append(node) @@ -520,18 +523,35 @@ def _allocate_nodes_to_placements(nodes: List[str], excl_job_id:Optional[int], l return placements +def calculate_chunk_size(nodeset: NSDict, lkp: util.Lookup) -> int: + # Calculates the chunk size based on max distance value received + machine_type = lkp.template_info(nodeset.instance_template).machine_type.family + max_distance = nodeset.placement_max_distance + if max_distance == 1: + return 22 + elif max_distance == 2: + if machine_type.startswith("a3"): + return 256 + else: + return 150 + elif max_distance == 3: + return 1500 + else: + return PLACEMENT_MAX_CNT + def create_nodeset_placements(nodes: List[str], excl_job_id:Optional[int], lkp: util.Lookup) -> List[PlacementAndNodes]: placements = _allocate_nodes_to_placements(nodes, excl_job_id, lkp) region = lkp.node_region(nodes[0]) + max_distance = lkp.node_nodeset(nodes[0]).get('placement_max_distance') if log.isEnabledFor(logging.DEBUG): debug_p = {p.placement: to_hostlist(p.nodes) for p in placements} log.debug( f"creating {len(placements)} placement groups: \n{yaml.safe_dump(debug_p).rstrip()}" ) - + requests = { - p.placement: create_placement_request(p.placement, region) for p in placements if p.placement + p.placement: create_placement_request(p.placement, region, max_distance) for p in placements if p.placement } if not requests: return placements diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py index 643712efa7..f8434168de 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/common.py @@ -38,6 +38,7 @@ class TstNodeset: reservation_name: Optional[str] = "" zone_policy_allow: Optional[list[str]] = field(default_factory=list) enable_placement: bool = True + placement_max_distance: Optional[int] = None @dataclass class TstPartition: diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_resume.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_resume.py index 3c637bbe10..77f1229605 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_resume.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_resume.py @@ -170,4 +170,6 @@ def test_allocate_nodes_to_placements(nodes: list[str], excl_job_id: Optional[in with unittest.mock.patch("resume.valid_placement_node") as mock_valid_placement_node: mock_valid_placement_node.return_value = True + lkp.template_info = unittest.mock.Mock(return_value=unittest.mock.Mock(machine_type=unittest.mock.Mock(family="n1"))) + assert resume._allocate_nodes_to_placements(nodes, excl_job_id, lkp) == expected diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf index 308b60d19d..f7e1c8b526 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf @@ -93,6 +93,7 @@ locals { maintenance_interval = ns.maintenance_interval instance_properties_json = ns.instance_properties_json enable_placement = ns.enable_placement + placement_max_distance = ns.placement_max_distance network_storage = ns.network_storage zone_target_shape = ns.zone_target_shape zone_policy_allow = ns.zone_policy_allow diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf index 6264576b2c..8daa202afd 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf @@ -205,6 +205,7 @@ variable "nodeset" { disk_type = optional(string) enable_confidential_vm = optional(bool, false) enable_placement = optional(bool, false) + placement_max_distance = optional(number, null) enable_oslogin = optional(bool, true) enable_shielded_vm = optional(bool, false) enable_maintenance_reservation = optional(bool, false) From 0b217d0efcc9d8f09ec27f402c2ce500fd7061e9 Mon Sep 17 00:00:00 2001 From: Parul Bajaj Date: Tue, 7 Jan 2025 05:11:24 +0000 Subject: [PATCH 093/140] Update A3U blueprint to remove MTU var --- examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml index 3037132c21..20699334df 100644 --- a/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml +++ b/examples/gke-a3-ultragpu/gke-a3-ultragpu.yaml @@ -28,7 +28,6 @@ vars: nccl_installer_path: $(ghpc_stage("./nccl-installer.yaml")) # Temporary fix for COS issue, will be fixed in next release mglru_disable_path: $(ghpc_stage("./mglru-disable.yaml")) - mtu_size: 8896 static_node_count: # add this system_node_pool_disk_size_gb: 200 a3ultra_node_pool_disk_size_gb: 100 @@ -81,7 +80,7 @@ deployment_groups: source: modules/network/vpc settings: network_name: $(vars.deployment_name)-net-1 - mtu: $(vars.mtu_size) + mtu: 8896 subnetworks: - subnet_name: $(vars.deployment_name)-sub-1 subnet_region: $(vars.region) @@ -100,7 +99,7 @@ deployment_groups: source: modules/network/gpu-rdma-vpc settings: network_name: $(vars.deployment_name)-rdma-net - mtu: $(vars.mtu_size) + mtu: 8896 network_profile: https://www.googleapis.com/compute/beta/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-roce network_routing_mode: REGIONAL subnetworks_template: From d357df767f5335b0c43a540abae671f227f98657 Mon Sep 17 00:00:00 2001 From: Alyssa Date: Tue, 7 Jan 2025 09:13:12 +0000 Subject: [PATCH 094/140] Remove max_hops plugin --- .../schedmd-slurm-gcp-v6-controller/README.md | 20 +++--- .../slurm_gcp_plugins/max_hops/README.md | 38 ---------- .../slurm_gcp_plugins/max_hops/__init__.py | 72 ------------------- .../modules/slurm_files/variables.tf | 4 ++ .../variables.tf | 4 ++ 5 files changed, 17 insertions(+), 121 deletions(-) delete mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/max_hops/README.md delete mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/max_hops/__init__.py diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index 99078dbcce..74b3534c42 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -151,23 +151,21 @@ flag which can used to control the maximum spreading allowed. Read more about [official docs](https://cloud.google.com/compute/docs/instances/use-compact-placement-policies ). -You can use the `enable_slurm_gcp_plugins.max_hops.max_hops` setting on the -controller module to control the `max-distance` behavior. See the following -example: +You can use the `placement_max_distance` setting on the nodeset module to control the `max-distance` behavior. See the following example: ```yaml - - id: controller - source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller - use: [ network, partition ] + - id: nodeset + source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset + use: [ network ] settings: - enable_slurm_gcp_plugins: - max_hops: - max_hops: 1 -``` + machine_type: c2-standard-4 + node_count_dynamic_max: 30 + enable_placement: true + placement_max_distance: 1 > [!NOTE] > `schedmd-slurm-gcp-v6-nodeset.settings.enable_placement: true` must also be -> set for max-distance to take effect. +> set for placement_max_distance to take effect. In the above case using a value of 1 will restrict VM to be placed on the same rack. You can confirm that the `max-distance` was applied by calling the diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/max_hops/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/max_hops/README.md deleted file mode 100644 index 9e8ad4afeb..0000000000 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/max_hops/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# max_hops slurm_gcp_plugin plugin - -## Overview - -This plugin allows placement parameters to be set controlling the max number of -network hops between nodes in dynamic jobs. - -## Usage - -### Configuration - -This plugin can be enabled by adding the following to the slurm-gcp config: - -```yaml -enable_slurm_gcp_plugins: - #possibly other plugins - max_hops: - max_hops: 1 -``` - -to set the default max_hops to, in this example, 1 for _all_ jobs. - -### Per job setting - -The max hops setting can be changed on a per job basis using the --prefer -argument e.g. as follows: - -salloc --prefer=max_hops.max_hops=1 - -to allow at most one network hop. For this to work the -`ignore_prefer_validation` needs to be added to the slurm `SchedulerParameters` -configuration item. - -## Callbacks used - -### pre_placement_group_insert - -Used to change the placement group creation request. diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/max_hops/__init__.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/max_hops/__init__.py deleted file mode 100644 index 6e1f8dfae7..0000000000 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/slurm_gcp_plugins/max_hops/__init__.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2024 "Google LLC" -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import sys -import slurm_gcp_plugins.utils as sgp_utils - -# Allows setting a specific max_hop for jobs -# -# To enable: -# * add this directory to the slurm-gcp plugin path (usually /slurm/scripts/slurm-gcp-plugins) -# * add the following to the slurm-gcp config (usually /slurm/scripts/config.yaml): -# -# enable_slurm_gcp_plugins: -# -# max_hops: -# max_hops: -# -# -# Where can be either of 1,2,3 (in increasing order of distance) -# If no max_hops is provided but the plugins is still enabled the default level is 3 - - -def pre_placement_group_insert(*pos_args, **keyword_args): - logging.info("Trying to enable max hop") - # Avoid circular import (util imports the plugins) - if "util" in sys.modules: - logging.info("Setting compute service version to beta") - sys.modules["util"].compute = sys.modules["util"].compute_service( - version="beta" - ) - max_distance = sgp_utils.get_plugin_setting( - plugin="max_hops", - setting="max_hops", - job=get_job_from_placement_group_name(keyword_args["pg_name"]), - lkp=keyword_args["lkp"], - default=3, - ) - logging.debug(f"Setting max hop for placement policy to {max_distance}") - keyword_args["request_body"]["groupPlacementPolicy"][ - "collocation=" - ] = "COLLOCATED" - keyword_args["request_body"]["groupPlacementPolicy"][ - "maxDistance" - ] = max_distance - else: - logging.error( - "max_hops can not be set (slurm_gcp util.py must be imported by the caller of the plugin callback)" - ) - - -__all__ = [ - "pre_placement_group_insert", -] - - -# This should be replaced if the job id becomes available in the context of this plugin hook -def get_job_from_placement_group_name(pg_name): - # f"{cfg.slurm_cluster_name}-{partition_name}-{job_id}-{i}" - - return pg_name.split("-")[2] diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf index 308a42e639..653e7d74ca 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/variables.tf @@ -64,6 +64,10 @@ Enables calling hooks in scripts/slurm_gcp_plugins during cluster resume and sus EOD type = any default = false + validation { + condition = !can(var.enable_slurm_gcp_plugins.max_hops) + error_message = "The 'max_hops' plugin is no longer supported. Please use the 'placement_max_distance' nodeset property instead." + } } variable "enable_bigquery_load" { diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf index 6264576b2c..8d47995d31 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf @@ -620,6 +620,10 @@ Enables calling hooks in scripts/slurm_gcp_plugins during cluster resume and sus EOD type = any default = false + validation { + condition = !can(var.enable_slurm_gcp_plugins.max_hops) + error_message = "The 'max_hops' plugin is no longer supported. Please use the 'placement_max_distance' nodeset property instead." + } } variable "universe_domain" { From 6cfaeed31d2d94d7f035fb1d38e579bd1c4dccf9 Mon Sep 17 00:00:00 2001 From: Harsh Thakkar Date: Tue, 7 Jan 2025 10:58:50 +0000 Subject: [PATCH 095/140] Freeze gcluster version for blueprints which were not migrated to slurm-gcp v6 --- .../blueprints/static-cluster.yaml | 19 +++++++++++++++++++ .../test_configs/two-clusters-sql.yaml | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/docs/hybrid-slurm-cluster/blueprints/static-cluster.yaml b/docs/hybrid-slurm-cluster/blueprints/static-cluster.yaml index 5abb581eaf..ce0b68dc1b 100644 --- a/docs/hybrid-slurm-cluster/blueprints/static-cluster.yaml +++ b/docs/hybrid-slurm-cluster/blueprints/static-cluster.yaml @@ -15,6 +15,8 @@ --- blueprint_name: static-slurm-cluster +toolkit_modules_url: github.com/GoogleCloudPlatform/cluster-toolkit +toolkit_modules_version: v1.41.0 vars: project_id: ## <> @@ -22,6 +24,23 @@ vars: region: us-central1 zone: us-central1-c +terraform_providers: + google: + source: hashicorp/google + version: 5.45.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + + google-beta: + source: hashicorp/google-beta + version: 5.45.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + deployment_groups: - group: primary modules: diff --git a/tools/validate_configs/test_configs/two-clusters-sql.yaml b/tools/validate_configs/test_configs/two-clusters-sql.yaml index 56c46200d3..ab6f71c302 100644 --- a/tools/validate_configs/test_configs/two-clusters-sql.yaml +++ b/tools/validate_configs/test_configs/two-clusters-sql.yaml @@ -13,6 +13,8 @@ # limitations under the License. blueprint_name: two-clusters +toolkit_modules_url: github.com/GoogleCloudPlatform/cluster-toolkit +toolkit_modules_version: v1.41.0 vars: project_id: ## Set GCP Project ID Here ## @@ -25,6 +27,23 @@ vars: enable_bigquery_load: True instance_image_custom: True +terraform_providers: + google: + source: hashicorp/google + version: 5.45.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + + google-beta: + source: hashicorp/google-beta + version: 5.45.0 + configuration: + project: $(vars.project_id) + region: $(vars.region) + zone: $(vars.zone) + deployment_groups: - group: net modules: From 682e29737f1cd2b7c9f6e35acf159e2e487c2a7e Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:37:39 -0800 Subject: [PATCH 096/140] Update nccl-installer.yaml --- examples/gke-a3-ultragpu/nccl-installer.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/gke-a3-ultragpu/nccl-installer.yaml b/examples/gke-a3-ultragpu/nccl-installer.yaml index 7f28f673df..0227658184 100644 --- a/examples/gke-a3-ultragpu/nccl-installer.yaml +++ b/examples/gke-a3-ultragpu/nccl-installer.yaml @@ -73,6 +73,7 @@ spec: /scripts/container_entry.sh install --install-nccl cp -r /var/lib/gib/lib64/. /usr/local/home/kubernetes/bin/nvidia/lib64 cp -r /var/lib/gib/. /usr/local/home/kubernetes/bin/gib + ibv_devinfo || exit 1 echo "installation finishes" containers: - image: "gke.gcr.io/pause:3.8@sha256:880e63f94b145e46f1b1082bb71b85e21f16b99b180b9996407d61240ceb9830" From cd11d0291c0087e3b294504b2103f147a09ba77d Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:38:11 -0800 Subject: [PATCH 097/140] Delete examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/README.md --- .../a3u-slurm-ubuntu-gcs/README.md | 153 ------------------ 1 file changed, 153 deletions(-) delete mode 100644 examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/README.md diff --git a/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/README.md b/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/README.md deleted file mode 100644 index 7f0c062080..0000000000 --- a/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/README.md +++ /dev/null @@ -1,153 +0,0 @@ -# A3-Ultra Slurm + Ubuntu + GCS - -This reference design creates a Slurm cluster with the following design: - -1. Ubuntu 22 Operating System -1. A static a3-ultragpu-8g partition that uses a reservation. -1. 3 VPCs (2x CPU, 1x for GPU RDMA networks), with a total of 9 subnetworks -1. A GCS bucket that is configured with Hierarchical Namespace enabled -1. Cloud Storage Fuse, configured to utilize Local-SSD storage - -## Deployment Instructions - -### Build the Cluster Toolkit gcluster binary - -Follow instructions -[here](https://cloud.google.com/cluster-toolkit/docs/setup/configure-environment) - -### (Optional, but recommended) Create a GCS Bucket for storing terraform state - -```bash -#!/bin/bash - -TF_STATE_BUCKET_NAME= -PROJECT_ID= -REGION= - -gcloud storage buckets create gs://${TF_STATE_BUCKET_NAME} \ - --project=${PROJECT_ID} \ - --default-storage-class=STANDARD --location=${REGION} \ - --uniform-bucket-level-access -gcloud storage buckets update gs://${TF_STATE_BUCKET_NAME} --versioning -``` - -### Create and configure a GCS Bucket - -This will be used for input data and checkpoint/restart data. This bucket should -be created with Hierarchical Namespace enabled. See -[here](https://cloud.google.com/storage/docs/hns-overview) for more details. - -```bash -#!/bin/bash -PROJECT_ID= -REGION= -HNS_BUCKET_NAME= -PROJECT_NUMER= - -gcloud storage buckets create gs://${HNS_BUCKET_NAME} \ - --location=${REGION} --uniform-bucket-level-access - --enable-hierarchical-namespace - -``` - -### Create/modify the deployment.yaml file with your preferred configuration - -For example, set the such as size, reservation to be used, etc, as well as the -name of the bucket that you just created. Below is an example - -```yaml ---- -terraform_backend_defaults: - type: gcs - configuration: - bucket: TF_STATE_BUCKET_NAME - -vars: - deployment_name: a3u-gcs - project_id: - region: - zone: - a3u_reservation_name: - a3u_cluster_size: - hns_gcs_bucket: # This bucket must have been previously created - -``` - -### Deploy the cluster - -```bash -#!/bin/bash -gcluster deploy -d deployment.yaml a3u-slurm-ubuntu-gcs.yaml -``` - -## Storage Design Components - -On the login and controller nodes, the gcs bucket is mounted at /gcs, using -fairly standard [Cloud Storage Fuse configuration](https://cloud.google.com/storage/docs/cloud-storage-fuse/config-file). On the compute nodes, there are two -mounts of the same bucket. First, `/gcs` is mounted with with the following -configuration: - -```yaml -file-cache: - max-size-mb: -1 - enable-parallel-downloads: true - download-chunk-size-mb: 50 - parallel-downloads-per-file: 16 -cache-dir: /mnt/localssd -file-system: - dir-mode: "777" - file-mode: "777" - rename-dir-limit: 20000 # Set to 20000 for hierarchical buckets - temp-dir: /mnt/localssd - fuse-options: allow_other -foreground: true -``` - -This uses /mnt/localssd as a cache dir (for reads) and temp-dir (for writes). -It also enables parallel downloads, which is particularly useful for -checkpoint restarts. - -Next, `/gcs-ro` is mounted in a "read-only" mode, and optimized to for -input (training) data reading. - -```yaml -file-cache: - max-size-mb: -1 -metadata-cache: - ttl-secs: 3600 # Decrease if your data changes quickly. -cache-dir: /mnt/localssd -file-system: - dir-mode: "755" # need 5 on dir to enable ls - file-mode: "644" - temp-dir: /mnt/localssd - fuse-options: allow_other - kernel-list-cache-ttl-secs: 60 -foreground: true -``` - -The local ssds will be used for a file cache, and the metadata-cache -for the data is set to 1 hour, with kernel-list-cache ttl set to 60 seconds. -This reduces the amount of requests that will be sent to GCS, and improves -data loading performance. - -We suggest using /gcs for checkpoint saving/loading. and use /gcs-ro for -data input loading. - -## Running Benchmarks with Ramble - -To run a series of NCCL test benchmarks on your cluster, you can use -the use the following script: `run-nccl-tests-via-ramble.sh`, -which will use [ramble](https://github.com/GoogleCloudPlatform/ramble) to -automate the building and running of nccl tests from 2 nodes up to 32 node -scales. - -Copy the contents of `run-nccl-tests-via-ramble.sh` to your slurm -login or controller node, for example: - -```bash -#!/bin/bash -wget -np -nd https://raw.githubusercontent.com/GoogleCloudPlatform/cluster-toolkit/refs/heads/develop/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/run-nccl-tests-via-ramble.sh -``` - -and then launch with `bash run-nccl-tests-via-ramble.sh`. The entire process -will take ~30 minutes. From b619d76f2b4f37f902f3ba2582c3ac70cb4ef6d0 Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:38:41 -0800 Subject: [PATCH 098/140] Delete examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/a3u-slurm-ubuntu-gcs.yaml --- .../a3u-slurm-ubuntu-gcs.yaml | 615 ------------------ 1 file changed, 615 deletions(-) delete mode 100644 examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/a3u-slurm-ubuntu-gcs.yaml diff --git a/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/a3u-slurm-ubuntu-gcs.yaml b/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/a3u-slurm-ubuntu-gcs.yaml deleted file mode 100644 index 7be9f89a00..0000000000 --- a/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/a3u-slurm-ubuntu-gcs.yaml +++ /dev/null @@ -1,615 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: a3u-slurm-ubuntu-gcs - -vars: - # The following are supplied through the deployment.yaml file. - deployment_name: # supply deployment name - project_id: # supply project ID - region: # supply region - zone: # supply zone - a3u_cluster_size: # supply cluster size - a3u_reservation_name: # supply reservation name - hns_gcs_bucket: # Name of HNS enabled GCS bucket - # End of variables defined by deployment.yaml. The remainder - # of this blueprint need not be modified. - - # Image settings - base_image: - project: ubuntu-os-accelerator-images - family: ubuntu-accelerator-2204-amd64-with-nvidia-550 - image_build_machine_type: n2-standard-16 - build_slurm_from_git_ref: 6.8.6 - - # Cluster env settings - # net0 and filestore ranges must not overlap - net0_range: 192.168.0.0/19 - filestore_ip_range: 192.168.32.0/24 - net1_range: 192.168.64.0/18 - rdma_net_range: 192.168.128.0/18 - - # Cluster Settings - local_ssd_mountpoint: /mnt/localssd - instance_image: - project: $(vars.project_id) - family: $(vars.deployment_name)-u22 - disk_size_gb: 200 - nccl_plugin_version: v1.0.2 - - # Here we define a set of startup script runners that are used to configure - # the controller node - controller_runners: - - type: shell - destination: stage_scripts.sh - content: | - #!/bin/bash - SLURM_ROOT=/opt/apps/adm/slurm - PARTITION_NAME=a3ultra - mkdir -m 0755 -p "${SLURM_ROOT}/scripts" - mkdir -p "${SLURM_ROOT}/partition-${PARTITION_NAME}-epilog_slurmd.d" - ln -s "/slurm/scripts/tools/gpu-test" "${SLURM_ROOT}/partition-${PARTITION_NAME}-epilog_slurmd.d/gpu-test.epilog_slurmd" - - # Shared runners between login and controller: - # Configure an enroot config path - shared_runners: - - type: data - destination: /etc/enroot/enroot.conf - content: | - ENROOT_CONFIG_PATH ${HOME}/.enroot - - # Here we define a set of startup script runners that are used to configure - # the A3-Ultra nodes - # Set up enroot, using the local ssds for runtime/cache/data/temp storage. - a3u_runners: - - type: data - destination: /etc/enroot/enroot.conf - content: | - ENROOT_CONFIG_PATH ${HOME}/.enroot - ENROOT_RUNTIME_PATH $(vars.local_ssd_mountpoint)/${UID}/enroot/runtime - ENROOT_CACHE_PATH $(vars.local_ssd_mountpoint)/${UID}/enroot/cache - ENROOT_DATA_PATH $(vars.local_ssd_mountpoint)/${UID}/enroot/data - ENROOT_TEMP_PATH $(vars.local_ssd_mountpoint)/${UID}/enroot - - # Install NCCL Network Plugin - - type: ansible-local - destination: nccl_plugin.yml - content: | - --- - - name: Install NCCL plugin for A3 Ultra series - hosts: all - become: true - tasks: - - name: Add SystemD unit for NCCL plugin installation - ansible.builtin.copy: - dest: /etc/systemd/system/nccl-plugin@.service - mode: 0o0644 - content: | - [Unit] - After=network-online.target - Before=slurmd.service - - [Service] - Type=oneshot - ExecStartPre=/usr/bin/rm -rf /usr/local/gib - ExecStartPre=/usr/bin/mkdir -p /usr/local/gib - ExecStartPre=/snap/bin/gcloud auth configure-docker --quiet us-docker.pkg.dev - ExecStart=/usr/bin/docker run --rm --name nccl-gib-installer --volume /usr/local/gib:/var/lib/gib \ - us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:%i install --install-nccl - - [Install] - WantedBy=slurmd.service - notify: - - Reload SystemD - handlers: - - name: Reload SystemD - ansible.builtin.systemd: - daemon_reload: true - post_tasks: - - name: Enable NCCL plugin SystemD unit - ansible.builtin.service: - name: nccl-plugin@$(vars.nccl_plugin_version).service - state: started - enabled: true - - # Configure Cloud Storage FUSE - - type: ansible-local - destination: gcsfuse.yml - content: | - --- - - name: Create LSSD optimized gcsfuse mount - hosts: all - become: true - tasks: - - name: Create gcsfuse rwx configuration - ansible.builtin.copy: - dest: /etc/gcsfuse-lssd.yml - owner: root - group: root - mode: 0o644 - content: | - file-cache: - max-size-mb: -1 - enable-parallel-downloads: true - download-chunk-size-mb: 50 - parallel-downloads-per-file: 16 - cache-dir: /mnt/localssd - file-system: - dir-mode: "777" - file-mode: "777" - rename-dir-limit: 20000 # Set to 20000 for hierarchical buckets - temp-dir: /mnt/localssd - fuse-options: allow_other - foreground: true - - - name: Create gcsfuse read-only configuration for input data - ansible.builtin.copy: - dest: /etc/gcsfuse-ro.yml - owner: root - group: root - mode: 0o644 - content: | - file-cache: - max-size-mb: -1 - metadata-cache: - ttl-secs: 3600 # Decrease if your data changes quickly. - cache-dir: /mnt/localssd - file-system: - dir-mode: "755" # need 5 on dir to enable ls - file-mode: "644" - temp-dir: /mnt/localssd - fuse-options: allow_other - kernel-list-cache-ttl-secs: 60 - foreground: true - - - name: Create gcsfuse systemd service - ansible.builtin.copy: - dest: /etc/systemd/system/gcsfuse-lssd.service - owner: root - group: root - mode: 0o644 - content: | - [Unit] - Description=gcsfuse mount of all buckets - After=local-fs.target - - [Service] - Type=simple - User=root - ExecStartPre=/bin/mkdir -p /gcs - ExecStart=gcsfuse --config-file /etc/gcsfuse-lssd.yml $(vars.hns_gcs_bucket) /gcs - ExecStop=fusermount3 -u /gcs - - [Install] - WantedBy=slurmd.service multi-user.target - - - name: Create read-only gcsfuse systemd service - ansible.builtin.copy: - dest: /etc/systemd/system/gcsfuse-ro.service - owner: root - group: root - mode: 0o644 - content: | - [Unit] - Description=gcsfuse-ro mount - After=local-fs.target - - [Service] - Type=simple - User=root - ExecStartPre=/bin/mkdir -p /gcs-ro - ExecStart=gcsfuse --config-file /etc/gcsfuse-ro.yml $(vars.hns_gcs_bucket) /gcs-ro - ExecStop=fusermount3 -u /gcs-ro - - [Install] - WantedBy=slurmd.service multi-user.target - - post_tasks: - - name: Enable and restart gcsfuse - ansible.builtin.service: - name: gcsfuse-lssd.service - state: restarted - enabled: true - - - name: Enable and restart gcsfuse-ro - ansible.builtin.service: - name: gcsfuse-ro.service - state: restarted - enabled: true - - # Configure Cloud Storage FUSE for login/controller nodes - gcsfuse_runners: - - type: ansible-local - destination: gcsfuse.yml - content: | - --- - - name: Create Standard RWX gcsfuse mount - hosts: localhost - become: true - tasks: - - name: Create gcsfuse configuration - ansible.builtin.copy: - dest: /etc/gcsfuse.yml - owner: root - group: root - mode: 0o644 - content: | - file-system: - dir-mode: "777" - file-mode: "777" - rename-dir-limit: 20000 - fuse-options: allow_other - foreground: true - - - name: Create gcsfuse systemd service - ansible.builtin.copy: - dest: /etc/systemd/system/gcsfuse.service - owner: root - group: root - mode: 0o644 - content: | - [Unit] - Description=gcsfuse mount of all buckets - After=local-fs.target - - [Service] - Type=simple - User=root - ExecStartPre=/bin/mkdir -p /gcs - ExecStart=gcsfuse --config-file /etc/gcsfuse.yml $(vars.hns_gcs_bucket) /gcs - ExecStop=fusermount3 -u /gcs - - [Install] - WantedBy=slurmd.service multi-user.target - - post_tasks: - - name: Enable and restart gcsfuse - ansible.builtin.service: - name: gcsfuse.service - state: restarted - enabled: true - -deployment_groups: -- group: image-env - modules: - - id: slurm-image-network - source: modules/network/vpc - - - id: slurm-build-script - source: modules/scripts/startup-script - settings: - install_ansible: true - docker: - enabled: true - runners: - - type: data - destination: /etc/cluster_toolkit/a3ultra-prod-slurm-image.yaml - source: ../.ghpc/artifacts/expanded_blueprint.yaml - - type: data - destination: /var/tmp/slurm_vars.json - content: | - { - "reboot": false, - "install_cuda": false, - "install_gcsfuse": true, - "install_lustre": false, - "install_ompi": true, - "update_kernel": false, - "monitoring_agent": "cloud-ops", - } - - type: shell - destination: install_slurm.sh - content: | - #!/bin/bash - set -e -o pipefail - ansible-pull \ - -U https://github.com/GoogleCloudPlatform/slurm-gcp -C $(vars.build_slurm_from_git_ref) \ - -i localhost, --limit localhost --connection=local \ - -e @/var/tmp/slurm_vars.json \ - ansible/playbook.yml - # this duplicates the ulimits configuration of the HPC VM Image - - type: data - destination: /etc/security/limits.d/99-unlimited.conf - content: | - * - memlock unlimited - * - nproc unlimited - * - stack unlimited - * - nofile 1048576 - * - cpu unlimited - * - rtprio unlimited - - type: data - destination: /etc/systemd/system/slurmd.service.d/file_ulimit.conf - content: | - [Service] - LimitNOFILE=infinity - - type: data - destination: /etc/netplan/60-cloud-mrdma-init.yaml - content: | - network: - ethernets: - primary: - match: - name: enp0s* - driver: gve - dhcp4: true - dhcp4-overrides: - use-domains: true - dhcp6: true - dhcp6-overrides: - use-domains: true - optional: true - secondary: - match: - driver: gve - dhcp4: true - dhcp4-overrides: - use-domains: false - use-dns: false - use-ntp: false - dhcp6: true - dhcp6-overrides: - use-domains: false - use-dns: false - use-ntp: false - optional: true - mrdma_devices: - match: - driver: mlx5_core - dhcp-identifier: mac - dhcp4: true - dhcp4-overrides: - use-domains: true - use-dns: false - use-ntp: false - optional: true - version: 2 - - type: ansible-local - destination: configure_gpu.yml - content: | - --- - - name: Install NVIDIA packages - hosts: all - become: true - vars: - distribution: "{{ ansible_distribution | lower }}{{ ansible_distribution_version | replace('.','') }}" - cuda_repo_url: https://developer.download.nvidia.com/compute/cuda/repos/{{ distribution }}/x86_64/cuda-keyring_1.1-1_all.deb - cuda_repo_filename: /tmp/{{ cuda_repo_url | basename }} - enable_nvidia_dcgm: false - nvidia_packages: - - cuda-toolkit-12-4 - - datacenter-gpu-manager - - libnvidia-nscq-550 - tasks: - - name: Download NVIDIA repository package - ansible.builtin.get_url: - url: "{{ cuda_repo_url }}" - dest: "{{ cuda_repo_filename }}" - - name: Install NVIDIA repository package - ansible.builtin.apt: - deb: "{{ cuda_repo_filename }}" - state: present - - name: Reduce NVIDIA repository priority - ansible.builtin.copy: - dest: /etc/apt/preferences.d/cuda-repository-pin-600 - mode: 0o0644 - owner: root - group: root - content: | - Package: nsight-compute - Pin: origin *ubuntu.com* - Pin-Priority: -1 - - Package: nsight-systems - Pin: origin *ubuntu.com* - Pin-Priority: -1 - - Package: * - Pin: release l=NVIDIA CUDA - Pin-Priority: 400 - - name: Install NVIDIA fabric and CUDA - ansible.builtin.apt: - name: "{{ item }}" - update_cache: true - loop: "{{ nvidia_packages }}" - - name: Freeze NVIDIA fabric and CUDA - ansible.builtin.dpkg_selections: - name: "{{ item }}" - selection: hold - loop: "{{ nvidia_packages }}" - post_tasks: - - name: Disable NVIDIA DCGM by default (enable during boot on GPU nodes) - ansible.builtin.service: - name: nvidia-dcgm.service - state: stopped - enabled: false - - type: ansible-local - destination: install_mellanox_drivers.yml - content: | - --- - - name: Update Netplan and Install Network Utils - hosts: all - become: true - tasks: - - name: Install Linux Modules Extra - ansible.builtin.package: - name: - - ibverbs-utils - state: present - - name: Apply netplan - ansible.builtin.command: netplan apply - -- group: image - modules: - - id: slurm-a3ultra-image - source: modules/packer/custom-image - kind: packer - settings: - disk_size: $(vars.disk_size_gb) - machine_type: $(vars.image_build_machine_type) - source_image_family: $(vars.base_image.family) - source_image_project_id: [$(vars.base_image.project)] - image_family: $(vars.instance_image.family) - omit_external_ip: false - use: - - slurm-image-network - - slurm-build-script - -- group: cluster-env - modules: - - id: a3ultra-slurm-net-0 - source: modules/network/vpc - settings: - network_name: $(vars.deployment_name)-net-0 - mtu: 8896 - subnetworks: - - subnet_name: $(vars.deployment_name)-sub-0 - subnet_region: $(vars.region) - subnet_ip: $(vars.net0_range) - - - id: a3ultra-slurm-net-1 - source: modules/network/vpc - settings: - network_name: $(vars.deployment_name)-net-1 - mtu: 8896 - subnetworks: - - subnet_name: $(vars.deployment_name)-sub-1 - subnet_region: $(vars.region) - subnet_ip: $(vars.net1_range) - - - id: a3ultra-slurm-rdma-net - source: modules/network/gpu-rdma-vpc - settings: - network_name: $(vars.deployment_name)-rdma-net - network_profile: https://www.googleapis.com/compute/beta/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-roce - network_routing_mode: REGIONAL - nic_type: MRDMA - subnetworks_template: - name_prefix: $(vars.deployment_name)-mrdma-sub - count: 8 - ip_range: $(vars.rdma_net_range) - region: $(vars.region) - - - id: homefs - source: modules/file-system/filestore - use: - - a3ultra-slurm-net-0 - settings: - filestore_tier: HIGH_SCALE_SSD - size_gb: 10240 - local_mount: /home - reserved_ip_range: $(vars.filestore_ip_range) - deletion_protection: - enabled: true - reason: Avoid data loss - outputs: - - network_storage - -- group: cluster - modules: - - id: a3ultra_startup - source: modules/scripts/startup-script - settings: - local_ssd_filesystem: - mountpoint: $(vars.local_ssd_mountpoint) - permissions: "1777" # must quote numeric filesystem permissions! - docker: - enabled: true - world_writable: true - daemon_config: | - { - "data-root": "$(vars.local_ssd_mountpoint)/docker" - } - runners: $(flatten([vars.a3u_runners])) - - - id: a3_ultra_nodeset - source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset - use: [a3ultra-slurm-net-0, a3ultra_startup] - settings: - bandwidth_tier: gvnic_enabled - machine_type: a3-ultragpu-8g - instance_image_custom: true - enable_public_ips: true - node_count_static: $(vars.a3u_cluster_size) - node_count_dynamic_max: 0 - enable_placement: false - disk_type: hyperdisk-balanced - on_host_maintenance: TERMINATE - reservation_name: $(vars.a3u_reservation_name) - additional_networks: - $(concat( - [{ - network=null, - subnetwork=a3ultra-slurm-net-1.subnetwork_self_link, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip="", - stack_type=null, - access_config=[], - ipv6_access_config=[], - alias_ip_range=[] - }], - a3ultra-slurm-rdma-net.subnetwork_interfaces - )) - - - id: a3_ultra_partition - source: community/modules/compute/schedmd-slurm-gcp-v6-partition - use: - - a3_ultra_nodeset - settings: - exclusive: false - partition_name: a3ultra - is_default: true - partition_conf: - ResumeTimeout: 900 - SuspendTimeout: 600 - OverSubscribe: EXCLUSIVE - - - id: controller_startup - source: modules/scripts/startup-script - settings: - runners: $(flatten([vars.shared_runners, vars.controller_runners, vars.gcsfuse_runners])) - - - id: login_startup - source: modules/scripts/startup-script - settings: - runners: $(flatten([vars.shared_runners, vars.gcsfuse_runners])) - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v6-login - use: [a3ultra-slurm-net-0] - settings: - instance_image_custom: true - disk_size_gb: 300 - enable_login_public_ips: true - machine_type: n2-standard-8 - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller - use: - - a3ultra-slurm-net-0 - - a3_ultra_partition - - slurm_login - - homefs - settings: - enable_controller_public_ips: true - instance_image_custom: true - disk_type: pd-extreme - disk_size_gb: 300 - machine_type: n2-standard-80 - controller_startup_script: $(controller_startup.startup_script) - login_startup_script: $(login_startup.startup_script) - enable_external_prolog_epilog: true From 57fd7af299d45636394e944e655b68c54faef398 Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:38:57 -0800 Subject: [PATCH 099/140] Delete examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/deployment.yaml --- .../a3u-slurm-ubuntu-gcs/deployment.yaml | 31 ------------------- 1 file changed, 31 deletions(-) delete mode 100644 examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/deployment.yaml diff --git a/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/deployment.yaml b/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/deployment.yaml deleted file mode 100644 index d955eda1f4..0000000000 --- a/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/deployment.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -# If using GCS as a terraform backend (suggested), add the following. If not, -# comment out or remove. -terraform_backend_defaults: - type: gcs - configuration: - bucket: # Name of terraform state bucket. -# End of optional section - -vars: - deployment_name: # Unique name of this Cluster Toolkit Deployment, e.g. a3u-gcs - project_id: # Your GCP project name - region: # e.g. europe-west1 - zone: # e.g. europe-west1-b - a3u_reservation_name: # reservation name, e.g. a3u-reservation-00 - a3u_cluster_size: # Number of A3-Ultra nodes in the cluster - hns_gcs_bucket: # This bucket must have been previously created From 0a3b379a0d6bad93103f8a32276b7a5d8d369b3d Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:39:22 -0800 Subject: [PATCH 100/140] Delete examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/run-nccl-tests-via-ramble.sh --- .../run-nccl-tests-via-ramble.sh | 224 ------------------ 1 file changed, 224 deletions(-) delete mode 100644 examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/run-nccl-tests-via-ramble.sh diff --git a/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/run-nccl-tests-via-ramble.sh b/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/run-nccl-tests-via-ramble.sh deleted file mode 100644 index 62061533f3..0000000000 --- a/examples/hypercompute_clusters/a3u-slurm-ubuntu-gcs/run-nccl-tests-via-ramble.sh +++ /dev/null @@ -1,224 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -eu - -trap "printf '\nCaught Ctrl+c. Exiting...\n'; exit" INT - -# Use current unix timestamp as a unique tag -# for jobs submitted -TAG=$(date +%s) -TEST_DIR=nccl-tests-"${TAG}" -SOFTWARE_INSTALL=/opt/apps - -cat <"${TEST_DIR}"/configs/ramble.yaml -# Ramble Configuration for NCCL Tests -ramble: - env_vars: - set: - OMPI_MCA_pml: "^ucx" - OMPI_MCA_btl: "^openib" - OMPI_MCA_btl_tcp_if_include: enp0s19 - - CUDA_VISIBLE_DEVICES: 0,1,2,3,4,5,6,7 - NCCL_NET: gIB - NCCL_SOCKET_IFNAME: enp0s19,enp192s20 - NCCL_CROSS_NIC: 0 - NCCL_NET_GDR_LEVEL: PIX - NCCL_P2P_NET_CHUNKSIZE: 131072 - NCCL_P2P_PCI_CHUNKSIZE: 131072 - NCCL_P2P_NVL_CHUNKSIZE: 524288 - NCCL_NVLS_CHUNKSIZE: 524288 - NCCL_IB_GID_INDEX: 3 - NCCL_IB_ADAPTIVE_ROUTING: 1 - NCCL_IB_QPS_PER_CONNECTION: 4 - NCCL_IB_TC: 52 - NCCL_IB_FIFO_TC: 84 - NCCL_SHIMNET_GUEST_CONFIG_CHECKER_CONFIG_FILE: /usr/local/gib/configs/guest_config.txtpb - NCCL_TUNER_CONFIG_PATH: /usr/local/gib/configs/tuner_config.txtpb - prepend: - - paths: - LD_LIBRARY_PATH: /usr/local/gib/lib64 - - variables: - mpi_command: srun --mpi=pmix - batch_submit: 'sbatch {execute_experiment}' - processes_per_node: '{gpus_per_node}' - gpus_per_node: '8' - applications: - nccl-tests: - workloads: - '{workload}': - experiments: - '{workload}-{n_nodes}': - variants: - package_manager: spack - variables: - workload: [all-gather, all-reduce, reduce-scatter] - n_nodes: [2, 4, 8, 16, 32] - matrix: - - n_nodes - - workload - - software: - packages: - pmix: - pkg_spec: pmix - mpi: - pkg_spec: openmpi +cuda cuda_arch=90 - cuda: - pkg_spec: cuda@12.4.0 - nccl: - pkg_spec: nccl@2.23.4-1 cuda_arch=90 - nccl-tests: - pkg_spec: nccl-tests cuda_arch=90 - environments: - nccl-tests: - packages: [cuda, mpi, nccl, nccl-tests, pmix] - -EOF - -# Populate slurm sbatch script -cat <"${TEST_DIR}"/configs/execute_experiment.tpl -#!/bin/bash -#SBATCH -J {experiment_name}-"${TAG}" -#SBATCH --output={experiment_run_dir}/slurm-%j.out -#SBATCH -N {n_nodes} -#SBATCH --gpus-per-node=8 -#SBATCH --exclusive -#SBATCH --ntasks-per-node={processes_per_node} - -cd "{experiment_run_dir}" -{command} -EOF - -# Get number of nodes available -N_NODES=$(sinfo -h -o %D) - -# Print available benchmarks -printf "\n--------- Setting up Benchmarks ----------\n" -ramble workspace info --where '{n_nodes} <= '"$N_NODES" - -printf "\n------- About to run the following: ------\n\n" -printf "source %s/ramble/env/bin/activate\n" "${SOFTWARE_INSTALL}" -printf ". %s/ramble/share/ramble/setup-env.sh\n" "${SOFTWARE_INSTALL}" -printf ". %s/spack/share/spack/setup-env.sh\n" "${SOFTWARE_INSTALL}" -printf "ramble workspace activate %s\n" "${TEST_DIR}" -printf "ramble workspace setup --where '{n_nodes} <= %s'\n" "${N_NODES}" -printf "ramble on --where '{n_nodes} <= %s' \n" "${N_NODES}" - -# Set up experiments -printf "\n--------- Setting up Benchmarks -------\n" -printf " This may take 20-30 minutes \n" -ramble workspace setup --where '{n_nodes} <= '"${N_NODES}" - -# Submit Experiments to Slurm -printf "\n----------- Running Benchmarks --------\n" -ramble on --where '{n_nodes} <= '"${N_NODES}" - -# Wait for all to be done -# Use the TAG in the slurm jobs -until [[ $(squeue -h -o %j | grep -c "${TAG}") -eq 0 ]]; do - clear - echo "waiting for $(squeue -h -o %j | grep -c "${TAG}") jobs to finish" - squeue - sleep 5 -done - -# Analyze -ramble workspace analyze -f json --where '{n_nodes} <= '"${N_NODES}" - -# Summarize all results in summary.tsv -cd "${TEST_DIR}" -jq -r '["workload","n_nodes","msg_size","busbw"], (.experiments[] as $exp | $exp.CONTEXTS[] as $context | -{ - experiment_name: $exp.name, - workload: $exp.workload_name, - n_nodes: $exp.n_nodes, - Context: $context.name -} + -($context.foms | from_entries ) -| [.workload, .n_nodes, .Size, ."Out of Place Bus Bandwidth"]) -| @tsv' results.latest.json >summary.tsv - -# Print just the 8GB message sizes -printf "\n--- SUMMARY for 8GB Message Sizes --\n" -jq -r '["workload","n_nodes","msg_size","busbw"], (.experiments[] as $exp | $exp.CONTEXTS[] as $context | -{ - experiment_name: $exp.name, - workload: $exp.workload_name, - n_nodes: $exp.n_nodes, - Context: $context.name -} + -($context.foms | from_entries ) -| select(.Size | tonumber > 8000000000) -| [.workload, .n_nodes, .Size, ."Out of Place Bus Bandwidth"]) -| @tsv' results.latest.json -printf "\nFor full results, see \"summary.tsv\"\n" - -printf "\n- To reactivate this ramble workspace, run -\n\n" -printf "source %s/ramble/env/bin/activate\n" "${SOFTWARE_INSTALL}" -printf ". %s/ramble/share/ramble/setup-env.sh\n" "${SOFTWARE_INSTALL}" -printf ". %s/spack/share/spack/setup-env.sh\n" "${SOFTWARE_INSTALL}" -printf "ramble workspace activate %s\n" "${TEST_DIR}" From bd4e4ceb310cc82be4ec7a8617f1416a19412928 Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:39:42 -0800 Subject: [PATCH 101/140] Delete examples/machine-learning/a3-ultragpu-8g/README.md --- .../machine-learning/a3-ultragpu-8g/README.md | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 examples/machine-learning/a3-ultragpu-8g/README.md diff --git a/examples/machine-learning/a3-ultragpu-8g/README.md b/examples/machine-learning/a3-ultragpu-8g/README.md deleted file mode 100644 index dfa3bb17c5..0000000000 --- a/examples/machine-learning/a3-ultragpu-8g/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# A3 Ultra Blueprints - -For further information on deploying an A3 Ultra cluster with Slurm, please -see: - -[Create A3 Ultra Slurm Cluster](https://cloud.google.com/ai-hypercomputer/docs/create/create-slurm-cluster) - -If you are unable to access these documents, please contact your -[Technical Account Manager (TAM)](https://cloud.google.com/tam). - -## Deploy A3 Ultra compute VM with custom startup-scripts - -Customers can deploy [a3ultra-vm.yaml] blueprint to deploy 2 A3 Ultra VMs. You -can also specify custom startup-scripts to run in the blueprint. - -[a3ultra-vm.yaml]: ./a3ultra-vm.yaml From a897c6a35e828d08e0a88776f4a89af442ec31ab Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:40:00 -0800 Subject: [PATCH 102/140] Delete examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-blueprint.yaml --- .../a3ultra-slurm-blueprint.yaml | 451 ------------------ 1 file changed, 451 deletions(-) delete mode 100644 examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-blueprint.yaml diff --git a/examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-blueprint.yaml b/examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-blueprint.yaml deleted file mode 100644 index 29b08add88..0000000000 --- a/examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-blueprint.yaml +++ /dev/null @@ -1,451 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -# This blueprint uses private preview functionality in limited availability, -# see README.md for further information - -# This blueprint requires a Cluster Toolkit binary built from a -# release >= 1.44.0 - -blueprint_name: a3ultra-slurm - -vars: - deployment_name: # supply deployment name - project_id: # supply project ID - region: # supply region - zone: # supply zone - a3u_cluster_size: # supply cluster size - a3u_reservation_name: # supply reservation name - # Image settings - base_image: - project: ubuntu-os-accelerator-images - family: ubuntu-accelerator-2204-amd64-with-nvidia-550 - image_build_machine_type: n2-standard-16 - build_slurm_from_git_ref: 6.8.7 - # Cluster env settings - # net0 and filestore ranges must not overlap - net0_range: 192.168.0.0/19 - filestore_ip_range: 192.168.32.0/24 - net1_range: 192.168.64.0/18 - rdma_net_range: 192.168.128.0/18 - # Cluster Settings - local_ssd_mountpoint: /mnt/localssd - instance_image: - project: $(vars.project_id) - family: $(vars.deployment_name)-u22 - disk_size_gb: 200 - nccl_plugin_version: v1.0.2 - -deployment_groups: -- group: image-env - modules: - - id: slurm-image-network - source: modules/network/vpc - - - id: slurm-build-script - source: modules/scripts/startup-script - settings: - install_ansible: true - docker: - enabled: true - runners: - - type: data - destination: /etc/cluster_toolkit/a3ultra-prod-slurm-image.yaml - source: ../.ghpc/artifacts/expanded_blueprint.yaml - - type: data - destination: /var/tmp/slurm_vars.json - content: | - { - "reboot": false, - "install_cuda": false, - "install_gcsfuse": true, - "install_lustre": false, - "install_ompi": true, - "update_kernel": false, - "monitoring_agent": "cloud-ops", - } - - type: shell - destination: install_slurm.sh - content: | - #!/bin/bash - set -e -o pipefail - ansible-pull \ - -U https://github.com/GoogleCloudPlatform/slurm-gcp -C $(vars.build_slurm_from_git_ref) \ - -i localhost, --limit localhost --connection=local \ - -e @/var/tmp/slurm_vars.json \ - ansible/playbook.yml - # this duplicates the ulimits configuration of the HPC VM Image - - type: data - destination: /etc/security/limits.d/99-unlimited.conf - content: | - * - memlock unlimited - * - nproc unlimited - * - stack unlimited - * - nofile 1048576 - * - cpu unlimited - * - rtprio unlimited - - type: data - destination: /etc/systemd/system/slurmd.service.d/file_ulimit.conf - content: | - [Service] - LimitNOFILE=infinity - - type: data - destination: /etc/netplan/60-cloud-mrdma-init.yaml - content: | - network: - ethernets: - primary: - match: - name: enp0s* - driver: gve - dhcp4: true - dhcp4-overrides: - use-domains: true - dhcp6: true - dhcp6-overrides: - use-domains: true - optional: true - secondary: - match: - driver: gve - dhcp4: true - dhcp4-overrides: - use-domains: false - use-dns: false - use-ntp: false - dhcp6: true - dhcp6-overrides: - use-domains: false - use-dns: false - use-ntp: false - optional: true - mrdma_devices: - match: - driver: mlx5_core - dhcp-identifier: mac - dhcp4: true - dhcp4-overrides: - use-domains: true - use-dns: false - use-ntp: false - optional: true - version: 2 - - type: ansible-local - destination: configure_gpu.yml - content: | - --- - - name: Install NVIDIA packages - hosts: all - become: true - vars: - distribution: "{{ ansible_distribution | lower }}{{ ansible_distribution_version | replace('.','') }}" - cuda_repo_url: https://developer.download.nvidia.com/compute/cuda/repos/{{ distribution }}/x86_64/cuda-keyring_1.1-1_all.deb - cuda_repo_filename: /tmp/{{ cuda_repo_url | basename }} - enable_nvidia_dcgm: false - nvidia_packages: - - cuda-toolkit-12-4 - - datacenter-gpu-manager - - libnvidia-nscq-550 - tasks: - - name: Download NVIDIA repository package - ansible.builtin.get_url: - url: "{{ cuda_repo_url }}" - dest: "{{ cuda_repo_filename }}" - - name: Install NVIDIA repository package - ansible.builtin.apt: - deb: "{{ cuda_repo_filename }}" - state: present - - name: Reduce NVIDIA repository priority - ansible.builtin.copy: - dest: /etc/apt/preferences.d/cuda-repository-pin-600 - mode: 0o0644 - owner: root - group: root - content: | - Package: nsight-compute - Pin: origin *ubuntu.com* - Pin-Priority: -1 - - Package: nsight-systems - Pin: origin *ubuntu.com* - Pin-Priority: -1 - - Package: * - Pin: release l=NVIDIA CUDA - Pin-Priority: 400 - - name: Install NVIDIA fabric and CUDA - ansible.builtin.apt: - name: "{{ item }}" - update_cache: true - loop: "{{ nvidia_packages }}" - - name: Freeze NVIDIA fabric and CUDA - ansible.builtin.dpkg_selections: - name: "{{ item }}" - selection: hold - loop: "{{ nvidia_packages }}" - post_tasks: - - name: Disable NVIDIA DCGM by default (enable during boot on GPU nodes) - ansible.builtin.service: - name: nvidia-dcgm.service - state: stopped - enabled: false - - type: ansible-local - destination: install_mellanox_drivers.yml - content: | - --- - - name: Update Netplan and Install Network Utils - hosts: all - become: true - tasks: - - name: Install Linux Modules Extra - ansible.builtin.package: - name: - - ibverbs-utils - state: present - - name: Apply netplan - ansible.builtin.command: netplan apply - -- group: image - modules: - - id: slurm-a3ultra-image - source: modules/packer/custom-image - kind: packer - settings: - disk_size: $(vars.disk_size_gb) - machine_type: $(vars.image_build_machine_type) - source_image_family: $(vars.base_image.family) - source_image_project_id: [$(vars.base_image.project)] - image_family: $(vars.instance_image.family) - omit_external_ip: false - use: - - slurm-image-network - - slurm-build-script - -- group: cluster-env - modules: - - id: a3ultra-slurm-net-0 - source: modules/network/vpc - settings: - network_name: $(vars.deployment_name)-net-0 - mtu: 8896 - enable_internal_traffic: false # Setting firewall below instead - subnetworks: - - subnet_name: $(vars.deployment_name)-sub-0 - subnet_region: $(vars.region) - subnet_ip: $(vars.net0_range) - firewall_rules: - - name: $(vars.deployment_name)-internal-0 - ranges: [$(vars.net0_range)] - allow: - - protocol: tcp - - protocol: udp - - protocol: icmp - - - id: a3ultra-slurm-net-1 - source: modules/network/vpc - settings: - network_name: $(vars.deployment_name)-net-1 - mtu: 8896 - enable_internal_traffic: false # Setting firewall below instead - subnetworks: - - subnet_name: $(vars.deployment_name)-sub-1 - subnet_region: $(vars.region) - subnet_ip: $(vars.net1_range) - firewall_rules: - - name: $(vars.deployment_name)-internal-1 - ranges: [$(vars.net1_range)] - allow: - - protocol: tcp - - protocol: udp - - protocol: icmp - - - id: a3ultra-slurm-rdma-net - source: modules/network/gpu-rdma-vpc - settings: - network_name: $(vars.deployment_name)-rdma-net - network_profile: https://www.googleapis.com/compute/beta/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-roce - network_routing_mode: REGIONAL - subnetworks_template: - name_prefix: $(vars.deployment_name)-mrdma-sub - count: 8 - ip_range: $(vars.rdma_net_range) - region: $(vars.region) - firewall_rules: - - name: $(vars.deployment_name)-internal-rdma - ranges: [$(vars.rdma_net_range)] - allow: - - protocol: tcp - - protocol: udp - - protocol: icmp - - - id: homefs - source: modules/file-system/filestore - use: - - a3ultra-slurm-net-0 - settings: - filestore_tier: HIGH_SCALE_SSD - size_gb: 10240 - local_mount: /home - reserved_ip_range: $(vars.filestore_ip_range) - deletion_protection: - enabled: true - reason: Avoid data loss - outputs: - - network_storage - -- group: cluster - modules: - - id: a3ultra_startup - source: modules/scripts/startup-script - settings: - local_ssd_filesystem: - mountpoint: $(vars.local_ssd_mountpoint) - permissions: "1777" # must quote numeric filesystem permissions! - docker: - enabled: true - world_writable: true - daemon_config: | - { - "data-root": "$(vars.local_ssd_mountpoint)/docker" - } - runners: - - type: data - destination: /etc/enroot/enroot.conf - content: | - ENROOT_RUNTIME_PATH $(vars.local_ssd_mountpoint)/${UID}/enroot/runtime - ENROOT_CACHE_PATH $(vars.local_ssd_mountpoint)/${UID}/enroot/cache - ENROOT_DATA_PATH $(vars.local_ssd_mountpoint)/${UID}/enroot/data - ENROOT_TEMP_PATH $(vars.local_ssd_mountpoint)/${UID}/enroot - - type: ansible-local - destination: nccl_plugin.yml - content: | - --- - - name: Install NCCL plugin for A3 Ultra series - hosts: all - become: true - tasks: - - name: Add SystemD unit for NCCL plugin installation - ansible.builtin.copy: - dest: /etc/systemd/system/nccl-plugin@.service - mode: 0o0644 - content: | - [Unit] - After=network-online.target - Before=slurmd.service - - [Service] - Type=oneshot - ExecStartPre=/usr/bin/rm -rf /usr/local/gib - ExecStartPre=/usr/bin/mkdir -p /usr/local/gib - ExecStartPre=/snap/bin/gcloud auth configure-docker --quiet us-docker.pkg.dev - ExecStart=/usr/bin/docker run --rm --name nccl-gib-installer --volume /usr/local/gib:/var/lib/gib \ - us-docker.pkg.dev/gce-ai-infra/gpudirect-gib/nccl-plugin-gib:%i install --install-nccl - - [Install] - WantedBy=slurmd.service - notify: - - Reload SystemD - handlers: - - name: Reload SystemD - ansible.builtin.systemd: - daemon_reload: true - post_tasks: - - name: Enable NCCL plugin SystemD unit - ansible.builtin.service: - name: nccl-plugin@$(vars.nccl_plugin_version).service - state: started - enabled: true - - - id: a3_ultra_nodeset - source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset - use: [a3ultra-slurm-net-0, a3ultra_startup] - settings: - bandwidth_tier: gvnic_enabled - machine_type: a3-ultragpu-8g - instance_image_custom: true - enable_public_ips: true - node_count_static: $(vars.a3u_cluster_size) - node_count_dynamic_max: 0 - enable_placement: false - disk_type: hyperdisk-balanced - on_host_maintenance: TERMINATE - reservation_name: $(vars.a3u_reservation_name) - additional_networks: - $(concat( - [{ - network=null, - subnetwork=a3ultra-slurm-net-1.subnetwork_self_link, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip="", - stack_type=null, - access_config=[], - ipv6_access_config=[], - alias_ip_range=[] - }], - a3ultra-slurm-rdma-net.subnetwork_interfaces - )) - - - id: a3_ultra_partition - source: community/modules/compute/schedmd-slurm-gcp-v6-partition - use: - - a3_ultra_nodeset - settings: - exclusive: false - partition_name: a3ultra - is_default: true - partition_conf: - ResumeTimeout: 900 - SuspendTimeout: 600 - - - id: slurm_login - source: community/modules/scheduler/schedmd-slurm-gcp-v6-login - use: [a3ultra-slurm-net-0] - settings: - instance_image_custom: true - disk_size_gb: 300 - enable_login_public_ips: true - machine_type: n2-standard-8 - - - id: controller_startup - source: modules/scripts/startup-script - settings: - runners: - - type: shell - destination: stage_scripts.sh - content: | - #!/bin/bash - SLURM_ROOT=/opt/apps/adm/slurm - PARTITION_NAME=$(a3_ultra_partition.partitions[0].partition_name) - mkdir -m 0755 -p "${SLURM_ROOT}/scripts" - mkdir -p "${SLURM_ROOT}/partition-${PARTITION_NAME}-epilog_slurmd.d" - ln -s "/slurm/scripts/tools/gpu-test" "${SLURM_ROOT}/partition-${PARTITION_NAME}-epilog_slurmd.d/gpu-test.epilog_slurmd" - - - id: slurm_controller - source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller - use: - - a3ultra-slurm-net-0 - - a3_ultra_partition - - slurm_login - - homefs - settings: - enable_controller_public_ips: true - instance_image_custom: true - disk_type: pd-extreme - disk_size_gb: 300 - machine_type: n2-standard-80 - controller_startup_script: $(controller_startup.startup_script) - enable_external_prolog_epilog: true From 5cad6083080ee85b4286a0691a3069808512554f Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:42:51 -0800 Subject: [PATCH 103/140] Delete examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-deployment.yaml --- .../a3ultra-slurm-deployment.yaml | 26 ------------------- 1 file changed, 26 deletions(-) delete mode 100644 examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-deployment.yaml diff --git a/examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-deployment.yaml b/examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-deployment.yaml deleted file mode 100644 index 6fa29af09e..0000000000 --- a/examples/machine-learning/a3-ultragpu-8g/a3ultra-slurm-deployment.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ---- -terraform_backend_defaults: - type: gcs - configuration: - bucket: # supply existing bucket to store Terraform state - -vars: - deployment_name: # supply unique deployment name - project_id: # supply existing project id - region: # supply region with a3-ultragpu-8g capacity in reservation - zone: # supply zone with a3-ultragpu-8g capacity in reservation - a3u_reservation_name: # supply a3-ultragpu-8g reservation name - a3u_cluster_size: # supply a3-ultragpu-8g reservation size From ec6d723b95f57e40ab85787bec3de873170086fb Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:51:31 -0800 Subject: [PATCH 104/140] Delete examples/machine-learning/a3-ultragpu-8g/a3ultra-vm.yaml --- .../a3-ultragpu-8g/a3ultra-vm.yaml | 151 ------------------ 1 file changed, 151 deletions(-) delete mode 100644 examples/machine-learning/a3-ultragpu-8g/a3ultra-vm.yaml diff --git a/examples/machine-learning/a3-ultragpu-8g/a3ultra-vm.yaml b/examples/machine-learning/a3-ultragpu-8g/a3ultra-vm.yaml deleted file mode 100644 index 25d7fd83bf..0000000000 --- a/examples/machine-learning/a3-ultragpu-8g/a3ultra-vm.yaml +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -blueprint_name: a3ultra-vm-instance - -vars: - project_id: # supply project ID - deployment_name: a3ultra-vm-instance - region: europe-west1 - zone: europe-west1-b - instance_image: - project: ubuntu-os-accelerator-images - family: ubuntu-accelerator-2204-amd64-with-nvidia-550 - net0_range: 192.168.0.0/19 - net1_range: 192.168.64.0/18 - filestore_ip_range: 192.168.32.0/24 - rdma_net_range: 192.168.128.0/18 - hostname_prefix: $(vars.deployment_name)-beowulf - -deployment_groups: -- group: primary - modules: - - - id: a3ultra-net-0 - source: modules/network/vpc - settings: - network_name: $(vars.deployment_name)-net-0 - mtu: 8896 - subnetworks: - - subnet_name: $(vars.deployment_name)-sub-0 - subnet_region: $(vars.region) - subnet_ip: $(vars.net0_range) - firewall_rules: - - name: $(vars.deployment_name)-internal-0 - ranges: [$(vars.net0_range)] - allow: - - protocol: tcp - - protocol: udp - - protocol: icmp - - - id: a3ultra-net-1 - source: modules/network/vpc - settings: - network_name: $(vars.deployment_name)-net-1 - mtu: 8896 - subnetworks: - - subnet_name: $(vars.deployment_name)-sub-1 - subnet_region: $(vars.region) - subnet_ip: $(vars.net1_range) - firewall_rules: - - name: $(vars.deployment_name)-internal-1 - ranges: [$(vars.net1_range)] - allow: - - protocol: tcp - - protocol: udp - - protocol: icmp - - - id: a3ultra-rdma-net - source: modules/network/gpu-rdma-vpc - settings: - network_name: $(vars.deployment_name)-rdma-net - network_profile: https://www.googleapis.com/compute/beta/projects/$(vars.project_id)/global/networkProfiles/$(vars.zone)-vpc-roce - network_routing_mode: REGIONAL - subnetworks_template: - name_prefix: $(vars.deployment_name)-mrdma-sub - count: 8 - ip_range: $(vars.rdma_net_range) - region: $(vars.region) - firewall_rules: - - name: $(vars.deployment_name)-internal-rdma - ranges: [$(vars.rdma_net_range)] - allow: - - protocol: tcp - - protocol: udp - - protocol: icmp - - - id: homefs - source: modules/file-system/filestore - use: [a3ultra-net-0] - settings: - filestore_tier: HIGH_SCALE_SSD - size_gb: 10240 - local_mount: /home - reserved_ip_range: $(vars.filestore_ip_range) - outputs: - - network_storage - - - id: startup-script - source: modules/scripts/startup-script - settings: - configure_ssh_host_patterns: - - $(vars.hostname_prefix)-* - - - id: a3ultra-vms - source: modules/compute/vm-instance - use: [startup-script, homefs] - settings: - machine_type: a3-ultragpu-8g - instance_count: 2 - name_prefix: $(vars.hostname_prefix) - disk_type: hyperdisk-balanced - automatic_restart: true - on_host_maintenance: TERMINATE - reservation_name: # supply reservation name - network_interfaces: - $(concat( - [{ - network=null, - subnetwork=a3ultra-net-0.subnetwork_self_link, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip=null, - stack_type=null, - access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], - ipv6_access_config=[], - alias_ip_range=[] - }, - { - network=null, - subnetwork=a3ultra-net-1.subnetwork_self_link, - subnetwork_project=vars.project_id, - nic_type="GVNIC", - queue_count=null, - network_ip=null, - stack_type=null, - access_config=[{nat_ip=null, public_ptr_domain_name=null, network_tier=null}], - ipv6_access_config=[], - alias_ip_range=[] - }], - a3ultra-rdma-net.subnetwork_interfaces, - )) - - - id: wait-for-vms - source: community/modules/scripts/wait-for-startup - settings: - instance_names: $(a3ultra-vms.name) - timeout: 7200 From 9e13c07fcedf29a502f68c6c5001873095947b95 Mon Sep 17 00:00:00 2001 From: Rohit Ramu Date: Tue, 7 Jan 2025 08:51:46 -0800 Subject: [PATCH 105/140] Delete examples/machine-learning/a3-ultragpu-8g/nccl-tests/README.md --- .../a3-ultragpu-8g/nccl-tests/README.md | 89 ------------------- 1 file changed, 89 deletions(-) delete mode 100644 examples/machine-learning/a3-ultragpu-8g/nccl-tests/README.md diff --git a/examples/machine-learning/a3-ultragpu-8g/nccl-tests/README.md b/examples/machine-learning/a3-ultragpu-8g/nccl-tests/README.md deleted file mode 100644 index 3f6dfab5c9..0000000000 --- a/examples/machine-learning/a3-ultragpu-8g/nccl-tests/README.md +++ /dev/null @@ -1,89 +0,0 @@ -The examples in this directory are used to show how enroot + pyxis can be used -to launch containerized workloads via Slurm. - -Contents: - -* `build-nccl-tests.sh`: A Slurm batch script for building the nccl-tests. -* `run-nccl-tests.sh`: A Slurm batch script for running the nccl-tests - `all_reduce_perf` benchmark. -* `import_container.sh`: Uses enroot to create a squashfs container image. Added - for reference only. enroot import happens within the `build-nccl-tests.sh`. - -# Running NCCL-Tests via Enroot/Pyxis - -In general the workflow to deploy GPUDirect-RDMA-enabled workloads via enroot-pyxis is -the following: - -1. Convert your container into a squashfs based container image -2. Set required environment variables -3. Run your application workload - -## TLDR - -For an end-to-end example, copy the `build-nccl-tests.sh` and -`run-nccl-tests.sh` to your login node. - -And run the following: - -```text -BUILD_JOB=$(sbatch --parsable build-nccl-tests.sh) # takes ~4 minutes -sbatch -d afterok:${BUILD_JOB} run-nccl-tests.sh # takes ~3 minutes -``` - -The latter should result in a slurm-XX.out file that contains the result of the nccl -`all_gather_perf` benchmark: - -```text -# -# out-of-place in-place -# size count type redop root time algbw busbw #wrong time algbw busbw #wrong -# (B) (elements) (us) (GB/s) (GB/s) (us) (GB/s) (GB/s) - 268435456 4194304 float none -1 XXXXX XXX.XX XXX.XX N/A XXXXXX XXX.XX XXX.XX 0 - 536870912 8388608 float none -1 XXXXX XXX.XX XXX.XX N/A XXXXXX XXX.XX XXX.XX 0 - 1073741824 16777216 float none -1 XXXXX XXX.XX XXX.XX N/A XXXXXX XXX.XX XXX.XX 0 - 2147483648 33554432 float none -1 XXXXX XXX.XX XXX.XX N/A XXXXXX XXX.XX XXX.XX 0 - 4294967296 67108864 float none -1 XXXXX XXX.XX XXX.XX N/A XXXXXX XXX.XX XXX.XX 0 - 8589934592 134217728 float none -1 XXXXX XXX.XX XXX.XX N/A XXXXXX XXX.XX XXX.XX 0 -# Out of bounds values : 0 OK -# Avg bus bandwidth : XXX.XX -# -``` - -For more details, follow the remainder of this README. - -## Detailed Instructions - -All of the following should be done on the login node of your slurm cluster, -and while somewhere on the shared Filestore filesystem (typically the user's -home directory). - -### Building NCCL-tests - -See build-nccl-tests.sh for an example. Within it, you will see that first we'll -create a squashfs version of the container using we want to launch using `enroot -import`. We do this because otherwise we'd be pulling the (typically more than -10GB) image multiple times from the source on each node, converting to sqsh each -time, etc, which would make the job launch longer. - -For building the nccl-tests binaries, we use `pyxis` to run the enroot container -and build the nccl-tests within that container to ensure the resulting binarier -are compatible with the container environment. - -Both of the above (importing and building) are accomplished by running: - -```text -sbatch build-nccl-tests.sh -``` - -### Running your application on a3-ultra instances - -For a complete example, run: - -```text -sbatch run-nccl-tests.sh -``` - -The output will appear in in a `slurm-.log` file. If the name of your a3-ultragpu -partition is different than "a3ultra", you will need to modify the `build-nccl-tests.sh` -and `run-nccl-tests.sh` scripts's `#SBATCH --partition` setting. Alternatively, you -can run `sbatch -p