From 8869d789690fcf9a2f33a3214cb275e5a8addaae Mon Sep 17 00:00:00 2001 From: Roman Fitzjalen Date: Wed, 12 Feb 2025 19:17:23 +0100 Subject: [PATCH] fix hostname -I for macOS #6497 (#6990) BUGFIX for Apple Silicon hostname https://github.com/microsoft/DeepSpeed/issues/6497 --------- Signed-off-by: Fabien Dupont Signed-off-by: Olatunji Ruwase Signed-off-by: Logan Adams Signed-off-by: inkcherry Signed-off-by: Roman Fitzjalen Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> Co-authored-by: Fabien Dupont Co-authored-by: Olatunji Ruwase Co-authored-by: Liangliang Ma <1906710196@qq.com> Co-authored-by: inkcherry Signed-off-by: gyou2021 --- deepspeed/comm/comm.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/deepspeed/comm/comm.py b/deepspeed/comm/comm.py index 1aa9b135115b..800bc6078c3a 100755 --- a/deepspeed/comm/comm.py +++ b/deepspeed/comm/comm.py @@ -704,9 +704,13 @@ def mpi_discovery(distributed_port=TORCH_DISTRIBUTED_DEFAULT_PORT, verbose=True) master_addr = None if rank == 0: import shlex - hostname_cmd = shlex.split("hostname -I") - result = subprocess.check_output(hostname_cmd) - master_addr = result.decode('utf-8').split()[0] + try: + hostname_cmd = shlex.split("hostname -I") + result = subprocess.check_output(hostname_cmd) + master_addr = result.decode('utf-8').split()[0] + except subprocess.CalledProcessError: # hostname -I not available (e.g. on macOS) + import socket + master_addr = socket.gethostbyname(socket.gethostname()) master_addr = comm.bcast(master_addr, root=0) # Determine local rank by assuming hostnames are unique