Skip to content

Commit

Permalink
Merge pull request GoogleCloudPlatform#3466 from chengcongdu/develop
Browse files Browse the repository at this point in the history
update a3mega nccl plugin to 1.0.7 and rxdm to 1.0.13_1
  • Loading branch information
chengcongdu authored Dec 26, 2024
2 parents 517adbc + 4969446 commit e17bb15
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions modules/compute/gke-node-pool/gpu_direct.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ locals {
"a3-megagpu-8g" = {
# Manifest to be installed for enabling TCPXO on a3-megagpu-8g machines
gpu_direct_manifests = [
"https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/fee883360a660f71ba07478db95d5c1325322f77/gpudirect-tcpxo/nccl-tcpxo-installer.yaml", # nccl_plugin v1.0.4 for tcpxo
"https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/fee883360a660f71ba07478db95d5c1325322f77/nri_device_injector/nri-device-injector.yaml", # nri_plugin
"https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/b324ec8994aa98ca320438dd2d01ff6d7f9165bb/gpudirect-tcpxo/nccl-tcpxo-installer.yaml", # nccl_plugin v1.0.7 for tcpxo
"https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/b324ec8994aa98ca320438dd2d01ff6d7f9165bb/nri_device_injector/nri-device-injector.yaml", # nri_plugin
]
updated_workload_path = replace(local.workload_path_tcpxo, ".yaml", "-tcpxo.yaml")
rxdm_version = "v1.0.10" # matching nccl-tcpxo-installer version v1.0.4
rxdm_version = "v1.0.13_1" # matching nccl-tcpxo-installer version v1.0.7
min_additional_networks = 8
major_minor_version_acceptable_map = {
"1.28" = "1.28.9-gke.1250000"
Expand Down

0 comments on commit e17bb15

Please sign in to comment.