From 0edcf73831567da242aac491821a96d61089e76f Mon Sep 17 00:00:00 2001 From: Oliver Elbert Date: Wed, 28 Aug 2024 16:50:00 -0400 Subject: [PATCH 1/7] adding block merging to netcdf conversion --- external/dace | 2 +- external/gt4py | 2 +- ndsl/stencils/testing/serialbox_to_netcdf.py | 57 +++++++++++++++++--- 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/external/dace b/external/dace index ee5a6dfe..22982afe 160000 --- a/external/dace +++ b/external/dace @@ -1 +1 @@ -Subproject commit ee5a6dfe695f329c3882105b087f3563a0c80b81 +Subproject commit 22982afe133bccd906d5eeee448092f5f065ff6a diff --git a/external/gt4py b/external/gt4py index 32dde792..d6dfd6ff 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit 32dde792bde505807a5729261e4f1d12a1451bdb +Subproject commit d6dfd6ff46cc1d50b0fb6d05fb0b6271e4a1f5cc diff --git a/ndsl/stencils/testing/serialbox_to_netcdf.py b/ndsl/stencils/testing/serialbox_to_netcdf.py index 11814fff..c34b8fc3 100644 --- a/ndsl/stencils/testing/serialbox_to_netcdf.py +++ b/ndsl/stencils/testing/serialbox_to_netcdf.py @@ -32,6 +32,12 @@ def get_parser(): type=str, help="[Optional] Give the name of the data, will default to Generator_rankX", ) + parser.add_argument( + "-m", "--merge", + action='store_true', + default=False, + help="merges datastreams blocked into separate savepoints" + ) return parser @@ -58,7 +64,7 @@ def get_serializer(data_path: str, rank: int, data_name: Optional[str] = None): return serialbox.Serializer(serialbox.OpenModeKind.Read, data_path, name) -def main(data_path: str, output_path: str, data_name: Optional[str] = None): +def main(data_path: str, output_path: str, merge_blocks: bool, data_name: Optional[str] = None): os.makedirs(output_path, exist_ok=True) namelist_filename_in = os.path.join(data_path, "input.nml") @@ -69,9 +75,20 @@ def main(data_path: str, output_path: str, data_name: Optional[str] = None): if namelist_filename_out != namelist_filename_in: shutil.copyfile(os.path.join(data_path, "input.nml"), namelist_filename_out) namelist = f90nml.read(namelist_filename_out) - total_ranks = ( - 6 * namelist["fv_core_nml"]["layout"][0] * namelist["fv_core_nml"]["layout"][1] - ) + if namelist["fv_core_nml"]["grid_type"] <= 3: + total_ranks = ( + 6 * namelist["fv_core_nml"]["layout"][0] * namelist["fv_core_nml"]["layout"][1] + ) + else: + total_ranks = ( + namelist["fv_core_nml"]["layout"][0] * namelist["fv_core_nml"]["layout"][1] + ) + nx = int((namelist["fv_core_nml"]['npx'] - 1) / ( + namelist["fv_core_nml"]['layout'][0] + )) + ny = int((namelist["fv_core_nml"]['npy'] - 1) / ( + namelist["fv_core_nml"]['layout'][1] + )) # all ranks have the same names, just look at first one serializer_0 = get_serializer(data_path, rank=0, data_name=data_name) @@ -96,8 +113,33 @@ def main(data_path: str, output_path: str, data_name: Optional[str] = None): rank_data[name].append( read_serialized_data(serializer, savepoint, name) ) + if merge_blocks and len(rank_data[name] > 1): + full_data = np.array(rank_data[name]) + if len(full_data.shape) > 1: + if (nx * ny == full_data.shape[0] * full_data.shape[1]): + # If we have an (i, x) array from each block reshape it + new_shape = (nx, ny) + full_data.shape[2:] + full_data = full_data.reshape(new_shape) + elif full_data.shape[1] == namelist["fv_core_nml"]['npz']: + # If it's a k-array from each block just take one + full_data = full_data[0] + else: + return IndexError( + "Shape mismatch in block merging: " + f"{full_data.shape[0]} by {full_data.shape[1]} " + f"is not compatible with {nx} by {ny}" + ) + elif len(full_data.shape) == 1: + # if it's a scalar from each block then just take one + full_data = full_data[0] + else: + raise IndexError(f"{name} data appears to be empty") + rank_data[name] = [full_data] rank_list.append(rank_data) - n_savepoints = len(savepoints) # checking from last rank is fine + if merge_blocks: + n_savepoints = 1 + else: + n_savepoints = len(savepoints) # checking from last rank is fine data_vars = {} if n_savepoints > 0: encoding = {} @@ -166,7 +208,10 @@ def entry_point(): parser = get_parser() args = parser.parse_args() main( - data_path=args.data_path, output_path=args.output_path, data_name=args.data_name + data_path=args.data_path, + output_path=args.output_path, + merge_blocks=args.merge, + data_name=args.data_name, ) From 82df2292d6a844505d25f6a4a7d2185f6227f5fb Mon Sep 17 00:00:00 2001 From: Oliver Elbert Date: Thu, 29 Aug 2024 16:58:50 -0400 Subject: [PATCH 2/7] bigfix --- ndsl/stencils/testing/serialbox_to_netcdf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ndsl/stencils/testing/serialbox_to_netcdf.py b/ndsl/stencils/testing/serialbox_to_netcdf.py index c34b8fc3..f812fd43 100644 --- a/ndsl/stencils/testing/serialbox_to_netcdf.py +++ b/ndsl/stencils/testing/serialbox_to_netcdf.py @@ -113,6 +113,7 @@ def main(data_path: str, output_path: str, merge_blocks: bool, data_name: Option rank_data[name].append( read_serialized_data(serializer, savepoint, name) ) + nblocks = len(rank_data.name) if merge_blocks and len(rank_data[name] > 1): full_data = np.array(rank_data[name]) if len(full_data.shape) > 1: @@ -120,11 +121,12 @@ def main(data_path: str, output_path: str, merge_blocks: bool, data_name: Option # If we have an (i, x) array from each block reshape it new_shape = (nx, ny) + full_data.shape[2:] full_data = full_data.reshape(new_shape) - elif full_data.shape[1] == namelist["fv_core_nml"]['npz']: - # If it's a k-array from each block just take one + elif full_data.shape[0] == nblocks: + # We have one array for all blocks + # could be a k-array or something else, so we take one copy full_data = full_data[0] else: - return IndexError( + raise IndexError( "Shape mismatch in block merging: " f"{full_data.shape[0]} by {full_data.shape[1]} " f"is not compatible with {nx} by {ny}" From 50a1f93a013d132a312ee1e720bb19afd65c70ba Mon Sep 17 00:00:00 2001 From: oelbert Date: Fri, 30 Aug 2024 12:35:02 -0400 Subject: [PATCH 3/7] bug --- ndsl/stencils/testing/serialbox_to_netcdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ndsl/stencils/testing/serialbox_to_netcdf.py b/ndsl/stencils/testing/serialbox_to_netcdf.py index f812fd43..07df7ca7 100644 --- a/ndsl/stencils/testing/serialbox_to_netcdf.py +++ b/ndsl/stencils/testing/serialbox_to_netcdf.py @@ -113,7 +113,7 @@ def main(data_path: str, output_path: str, merge_blocks: bool, data_name: Option rank_data[name].append( read_serialized_data(serializer, savepoint, name) ) - nblocks = len(rank_data.name) + nblocks = len(rank_data[name]) if merge_blocks and len(rank_data[name] > 1): full_data = np.array(rank_data[name]) if len(full_data.shape) > 1: From 50607ca96f938fef777ef81cfc56a16a009a50a3 Mon Sep 17 00:00:00 2001 From: oelbert Date: Fri, 30 Aug 2024 13:44:11 -0400 Subject: [PATCH 4/7] uninspiring revelation --- ndsl/stencils/testing/serialbox_to_netcdf.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ndsl/stencils/testing/serialbox_to_netcdf.py b/ndsl/stencils/testing/serialbox_to_netcdf.py index 07df7ca7..a6c9db34 100644 --- a/ndsl/stencils/testing/serialbox_to_netcdf.py +++ b/ndsl/stencils/testing/serialbox_to_netcdf.py @@ -114,23 +114,24 @@ def main(data_path: str, output_path: str, merge_blocks: bool, data_name: Option read_serialized_data(serializer, savepoint, name) ) nblocks = len(rank_data[name]) - if merge_blocks and len(rank_data[name] > 1): + if merge_blocks and len(rank_data[name]) > 1: full_data = np.array(rank_data[name]) if len(full_data.shape) > 1: if (nx * ny == full_data.shape[0] * full_data.shape[1]): # If we have an (i, x) array from each block reshape it new_shape = (nx, ny) + full_data.shape[2:] full_data = full_data.reshape(new_shape) - elif full_data.shape[0] == nblocks: + else: # We have one array for all blocks # could be a k-array or something else, so we take one copy + # TODO: is there a decent check for this? full_data = full_data[0] - else: - raise IndexError( - "Shape mismatch in block merging: " - f"{full_data.shape[0]} by {full_data.shape[1]} " - f"is not compatible with {nx} by {ny}" - ) + #else: + # raise IndexError( + # "Shape mismatch in block merging: " + # f"{full_data.shape[0]} by {full_data.shape[1]} " + # f"is not compatible with {nx} by {ny}" + # ) elif len(full_data.shape) == 1: # if it's a scalar from each block then just take one full_data = full_data[0] From 24ddc5d65e762d410a3f6b914a7547399ae37db9 Mon Sep 17 00:00:00 2001 From: Oliver Elbert Date: Tue, 3 Sep 2024 10:25:48 -0400 Subject: [PATCH 5/7] lint --- ndsl/stencils/testing/serialbox_to_netcdf.py | 32 ++++++++++++-------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/ndsl/stencils/testing/serialbox_to_netcdf.py b/ndsl/stencils/testing/serialbox_to_netcdf.py index 07df7ca7..71010f8c 100644 --- a/ndsl/stencils/testing/serialbox_to_netcdf.py +++ b/ndsl/stencils/testing/serialbox_to_netcdf.py @@ -33,10 +33,11 @@ def get_parser(): help="[Optional] Give the name of the data, will default to Generator_rankX", ) parser.add_argument( - "-m", "--merge", - action='store_true', + "-m", + "--merge", + action="store_true", default=False, - help="merges datastreams blocked into separate savepoints" + help="merges datastreams blocked into separate savepoints", ) return parser @@ -64,7 +65,12 @@ def get_serializer(data_path: str, rank: int, data_name: Optional[str] = None): return serialbox.Serializer(serialbox.OpenModeKind.Read, data_path, name) -def main(data_path: str, output_path: str, merge_blocks: bool, data_name: Optional[str] = None): +def main( + data_path: str, + output_path: str, + merge_blocks: bool, + data_name: Optional[str] = None, +): os.makedirs(output_path, exist_ok=True) namelist_filename_in = os.path.join(data_path, "input.nml") @@ -77,18 +83,20 @@ def main(data_path: str, output_path: str, merge_blocks: bool, data_name: Option namelist = f90nml.read(namelist_filename_out) if namelist["fv_core_nml"]["grid_type"] <= 3: total_ranks = ( - 6 * namelist["fv_core_nml"]["layout"][0] * namelist["fv_core_nml"]["layout"][1] + 6 + * namelist["fv_core_nml"]["layout"][0] + * namelist["fv_core_nml"]["layout"][1] ) else: total_ranks = ( namelist["fv_core_nml"]["layout"][0] * namelist["fv_core_nml"]["layout"][1] ) - nx = int((namelist["fv_core_nml"]['npx'] - 1) / ( - namelist["fv_core_nml"]['layout'][0] - )) - ny = int((namelist["fv_core_nml"]['npy'] - 1) / ( - namelist["fv_core_nml"]['layout'][1] - )) + nx = int( + (namelist["fv_core_nml"]["npx"] - 1) / (namelist["fv_core_nml"]["layout"][0]) + ) + ny = int( + (namelist["fv_core_nml"]["npy"] - 1) / (namelist["fv_core_nml"]["layout"][1]) + ) # all ranks have the same names, just look at first one serializer_0 = get_serializer(data_path, rank=0, data_name=data_name) @@ -117,7 +125,7 @@ def main(data_path: str, output_path: str, merge_blocks: bool, data_name: Option if merge_blocks and len(rank_data[name] > 1): full_data = np.array(rank_data[name]) if len(full_data.shape) > 1: - if (nx * ny == full_data.shape[0] * full_data.shape[1]): + if nx * ny == full_data.shape[0] * full_data.shape[1]: # If we have an (i, x) array from each block reshape it new_shape = (nx, ny) + full_data.shape[2:] full_data = full_data.reshape(new_shape) From 6588154a61cb29c0ed5ecc2b2b211faf26f5f70c Mon Sep 17 00:00:00 2001 From: Oliver Elbert Date: Tue, 3 Sep 2024 10:38:02 -0400 Subject: [PATCH 6/7] removing dead code --- ndsl/stencils/testing/serialbox_to_netcdf.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ndsl/stencils/testing/serialbox_to_netcdf.py b/ndsl/stencils/testing/serialbox_to_netcdf.py index 66954a3a..a29139c5 100644 --- a/ndsl/stencils/testing/serialbox_to_netcdf.py +++ b/ndsl/stencils/testing/serialbox_to_netcdf.py @@ -134,12 +134,6 @@ def main( # could be a k-array or something else, so we take one copy # TODO: is there a decent check for this? full_data = full_data[0] - #else: - # raise IndexError( - # "Shape mismatch in block merging: " - # f"{full_data.shape[0]} by {full_data.shape[1]} " - # f"is not compatible with {nx} by {ny}" - # ) elif len(full_data.shape) == 1: # if it's a scalar from each block then just take one full_data = full_data[0] From cb4ce981e9cd8ab0ae43360a988251ade6321e8c Mon Sep 17 00:00:00 2001 From: Oliver Elbert Date: Tue, 3 Sep 2024 10:40:44 -0400 Subject: [PATCH 7/7] revert externals --- external/dace | 2 +- external/gt4py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/dace b/external/dace index 22982afe..ee5a6dfe 160000 --- a/external/dace +++ b/external/dace @@ -1 +1 @@ -Subproject commit 22982afe133bccd906d5eeee448092f5f065ff6a +Subproject commit ee5a6dfe695f329c3882105b087f3563a0c80b81 diff --git a/external/gt4py b/external/gt4py index d6dfd6ff..32dde792 160000 --- a/external/gt4py +++ b/external/gt4py @@ -1 +1 @@ -Subproject commit d6dfd6ff46cc1d50b0fb6d05fb0b6271e4a1f5cc +Subproject commit 32dde792bde505807a5729261e4f1d12a1451bdb