From 7e05db365d0039b60a0bff323bcfea3196195423 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 6 Feb 2023 14:34:57 -0500 Subject: [PATCH 1/8] Add zero-diff column in print_totals gcpy/benchmark.py - Now use f-strings to simplify print statements - Use numpy.array_equal to test if the Ref and Dev numpy arrays (after masking, if necessary) are equal. - Print a column for zero-diff results Signed-off-by: Bob Yantosca --- gcpy/util.py | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/gcpy/util.py b/gcpy/util.py index 26319f0e..53c3325a 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -227,34 +227,34 @@ def print_totals( # ================================================================== # Sum the Ref array (or set to NaN if missing) # ================================================================== + refarr = ref.values if ref_is_all_nan: total_ref = np.nan else: - if masks is None: - total_ref = np.sum(ref.values) - else: - arr = np.ma.masked_array(ref.values, masks["Ref_TropMask"]) - total_ref = np.sum(arr) + if masks is not None: + refarr = np.ma.masked_array(refarr, masks["Ref_TropMask"]) + total_ref = np.sum(refarr) # ================================================================== # Sum the Dev array (or set to NaN if missing) # ================================================================== + devarr = dev.values if dev_is_all_nan: total_dev = np.nan else: - if masks is None: - total_dev = np.sum(dev.values) - else: - arr = np.ma.masked_array(dev.values, masks["Dev_TropMask"]) - total_dev = np.sum(arr) + if masks is not None: + devarr = np.ma.masked_array(devarr, masks["Dev_TropMask"]) + total_dev = np.sum(devarr) # ================================================================== # Compute differences (or set to NaN if missing) # ================================================================== if ref_is_all_nan or dev_is_all_nan: diff = np.nan + zero_diff = False else: diff = total_dev - total_ref + zero_diff = np.array_equal(refarr, devarr) # ================================================================== # Compute % differences (or set to NaN if missing) @@ -270,10 +270,7 @@ def print_totals( # ================================================================== # Write output to file # ================================================================== - print( - f"{display_name.ljust(18)} : {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f}", file=f - ) - + print(f"{display_name.ljust(14)} : {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f} {zero_diff}", file=f) def get_species_categories( benchmark_type="FullChemBenchmark" @@ -2119,3 +2116,20 @@ def read_config_file(config_file, quiet=False): raise Exception(msg) from err return config + + +def is_zero_diff(refdr, devdr): + """ + Returns True if two DataArray objects contain identical data, + or False otherwise + + Args: + ----- + refdr (xarray DataArray) + The "Ref" DataArray object to be tested. + devdr (xarray DataArray) + The "Dev" DataArray object to be tested + """ + if not np.array_equal(refdr.values, devdr.values): + return False + return True From d1aa8bf28d70d1e94434797578ae23dfa00d1e5a Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 6 Feb 2023 15:15:01 -0500 Subject: [PATCH 2/8] Now print if Dev and Ref are identical at top of mass table file gcpy/benchmark.py - Update try/catch statement for file open - Use xr.Dataset.equals to test if the Ref and Dev datasets are identical to each other or not. (NOTE: This will likely fail when testing GCC vs GCHP, since the coordinates and data variables are different). This was the best way to proceed because in routine print_totals, the test for zero-diff is done per variable. - Now print out identicality or difference of Dev vs Ref in the table header gcpy/util.py - Restored the display name to a width of 19, as this is also used by the emissions tables. Signed-off-by: Bob Yantosca --- gcpy/benchmark.py | 39 ++++++++++++++++++--------------------- gcpy/util.py | 3 ++- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py index 851ebc1a..d1f7597b 100644 --- a/gcpy/benchmark.py +++ b/gcpy/benchmark.py @@ -433,32 +433,29 @@ def create_global_mass_table( # Create file try: f = open(outfilename, "w") - except FileNotFoundError: - msg = "Could not open {} for writing!".format(outfilename) + except (IOError, OSError, FileNotFoundError) as e: + msg = f"Could not open {outfilename} for writing!" raise FileNotFoundError(msg) + # Determine if the two data sets are identical + diff_str="### Dev differs from Ref" + if xr.Dataset.equals(refdata, devdata): + diff_str="### Dev is identical to Ref" + # Title strings + title1 = f"### Global mass (Gg) {label} (Trop + Strat)" if trop_only: - title1 = "### Global mass (Gg) {} (Trop only)".format(label) - else: - title1 = "### Global mass (Gg) {} (Trop + Strat)".format(label) - title2 = "### Ref = {}; Dev = {}".format(refstr, devstr) + title1 = f"### Global mass (Gg) {label} (Trop only)" + title2 = f"### Ref = {refstr}; Dev = {devstr}" # Print header to file print("#" * 83, file=f) - print("{}{}".format(title1.ljust(80), "###"), file=f) - print("{}{}".format(title2.ljust(80), "###"), file=f) + print(f"{title1 : <80}{'###'}", file=f) + print(f"{title2 : <80}{'###'}", file=f) + print(f"{'###' : <80}{'###'}", file=f) + print(f"{diff_str : <80}{'###'}", file=f) print("#" * 83, file=f) - print( - "{}{}{}{}{}".format( - " ".ljust(19), - "Ref".rjust(20), - "Dev".rjust(20), - "Dev - Ref".rjust(14), - "% diff".rjust(10), - ), - file=f, - ) + print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f) # ================================================================== # Print global masses for all species @@ -521,7 +518,7 @@ def create_global_mass_table( delta_p=met_and_masks["Dev_Delta_P"], box_height=met_and_masks["Dev_BxHeight"], ) - + # ============================================================== # Print global masses for Ref and Dev # (we will mask out tropospheric boxes in util.print_totals) @@ -531,13 +528,13 @@ def create_global_mass_table( refarray, devarray, f, - masks=met_and_masks, + masks=met_and_masks ) else: util.print_totals( refarray, devarray, - f, + f ) # ================================================================== diff --git a/gcpy/util.py b/gcpy/util.py index 53c3325a..aa46b313 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -270,7 +270,8 @@ def print_totals( # ================================================================== # Write output to file # ================================================================== - print(f"{display_name.ljust(14)} : {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f} {zero_diff}", file=f) + print(f"{display_name.ljust(19)} : {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f} {zero_diff}", file=f) + def get_species_categories( benchmark_type="FullChemBenchmark" From d3d5568b17d4cd4f975f93e27144dbcaa6e6204b Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Mon, 6 Feb 2023 16:40:40 -0500 Subject: [PATCH 3/8] Display if Dev and Ref are identical in emissions & inventory tables gcpy/benchmark.py - In create_benchmark_emission_tables: - Make sure columns and headers line up - Now compute if each diagnostic for an emissions species or inventory is identical, and display that information in the header - Improve error trapping when opening the file for output - In create_benchmark_mass_tables: - Make sure columns and headers line up gcpy/util.py - In routine print_totals, make sure the header line with Ref & Dev matches up with the output of create_benchmark_emissions_tables and create_benchmark_mass_tables - Removed is_zero_diff function, we will add another function like it later. Signed-off-by: Bob Yantosca --- gcpy/benchmark.py | 63 ++++++++++++++++++++++++++--------------------- gcpy/util.py | 21 ++-------------- 2 files changed, 37 insertions(+), 47 deletions(-) diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py index d1f7597b..532caac3 100644 --- a/gcpy/benchmark.py +++ b/gcpy/benchmark.py @@ -176,10 +176,11 @@ def create_total_emissions_table( # ================================================================= # Open the file for output # ================================================================= + # Create file try: f = open(outfilename, "w") - except FileNotFoundError: - msg = "Could not open {} for writing!".format(outfilename) + except (IOError, OSError, FileNotFoundError) as e: + msg = f"Could not open {outfilename} for writing!" raise FileNotFoundError(msg) # ================================================================= @@ -220,29 +221,35 @@ def create_total_emissions_table( # Title strings if "Inv" in template: - print("Computing inventory totals for {}".format(species_name)) - title1 = "### Emissions totals for inventory {} [Tg]".format( - species_name) + print(f"Computing inventory totals for {species_name}") + title0 = f"for inventory {species_name}" + title1 = f"### Emissions totals {title0} [Tg]" else: - print("Computing emissions totals for {}".format(species_name)) - title1 = "### Emissions totals for species {} [Tg]".format(species_name) - - title2 = "### Ref = {}; Dev = {}".format(refstr, devstr) + print(f"Computing emissions totals for {species_name}") + title0 = f"for species {species_name}" + title1 = f"### Emissions totals {title0} [Tg]" + + title2 = f"### Ref = {refstr}; Dev = {devstr}" + + # Determine if the all DataArrays for a given species or + # have identical data, and define a display string. + diagnames = [v for v in varnames if species_name in v] + diff_ct = 0 + for v in diagnames: + if np.array_equal(refdata[v].values, devdata[v].values): + diff_ct += 1 + diff_str = f"### Dev differs from Ref {title0}" + if diff_ct == len(diagnames): + diff_str = f"### Dev is identical to Ref {title0}" # Print header to file - print("#" * 83, file=f) - print("{}{}".format(title1.ljust(80), "###"), file=f) - print("{}{}".format(title2.ljust(80), "###"), file=f) - print("#" * 83, file=f) - print( - "{}{}{}{}{}".format( - " ".ljust(19), - "Ref".rjust(20), - "Dev".rjust(20), - "Dev - Ref".rjust(14), - "% diff".rjust(10), - ), - file=f) + print("#" * 91, file=f) + print(f"{title1 : <88}{'###'}", file=f) + print(f"{title2 : <88}{'###'}", file=f) + print(f"{'###' : <88}{'###'}", file=f) + print(f"{diff_str : <88}{'###'}", file=f) + print("#" * 91, file=f) + print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f) # ============================================================= # Loop over all emissions variables corresponding to this @@ -449,12 +456,12 @@ def create_global_mass_table( title2 = f"### Ref = {refstr}; Dev = {devstr}" # Print header to file - print("#" * 83, file=f) - print(f"{title1 : <80}{'###'}", file=f) - print(f"{title2 : <80}{'###'}", file=f) - print(f"{'###' : <80}{'###'}", file=f) - print(f"{diff_str : <80}{'###'}", file=f) - print("#" * 83, file=f) + print("#" * 91, file=f) + print(f"{title1 : <88}{'###'}", file=f) + print(f"{title2 : <88}{'###'}", file=f) + print(f"{'###' : <88}{'###'}", file=f) + print(f"{diff_str : <88}{'###'}", file=f) + print("#" * 91, file=f) print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f) # ================================================================== diff --git a/gcpy/util.py b/gcpy/util.py index aa46b313..0232722b 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -222,7 +222,7 @@ def print_totals( # Special handling for totals if "_TOTAL" in diagnostic_name.upper(): - print("-"*83, file=f) + print("-"*90, file=f) # ================================================================== # Sum the Ref array (or set to NaN if missing) @@ -270,7 +270,7 @@ def print_totals( # ================================================================== # Write output to file # ================================================================== - print(f"{display_name.ljust(19)} : {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f} {zero_diff}", file=f) + print(f"{display_name.ljust(19)}: {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f} {zero_diff}", file=f) def get_species_categories( @@ -2117,20 +2117,3 @@ def read_config_file(config_file, quiet=False): raise Exception(msg) from err return config - - -def is_zero_diff(refdr, devdr): - """ - Returns True if two DataArray objects contain identical data, - or False otherwise - - Args: - ----- - refdr (xarray DataArray) - The "Ref" DataArray object to be tested. - devdr (xarray DataArray) - The "Dev" DataArray object to be tested - """ - if not np.array_equal(refdr.values, devdr.values): - return False - return True From 53b75ad17b61ee907bd56145551aa6901d43f02d Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Tue, 7 Feb 2023 15:53:42 -0500 Subject: [PATCH 4/8] Add list of species that differ to top of emissions, mass tables gcpy/benchmark.py - In routines create_benchmark_mass_tables and create_benchmark_emission_tables - Add placeholder text for the list of species diffs - Define "diff_list" list to hold the list of species names for species w/ nonzero diffs - Remove references to np.array_equal and xr.dataset.equal - Header lines are now 89 chars wide - Now pass diff_list to util.print_totals - Trim trailing whitespace - Comment out code in create_benchmark_summary_table for now, we will add this functionality in soon. gcpy/util.py - Now import textwrap.wrap function - In function "print_totals" - Now accepts and returns "diff_list" - Add more error checks - Compute total_ref and total_dev in 64-bit floating point - Added function "unique_values" - Added function "insert_text_into_file" Signed-off-by: Bob Yantosca --- gcpy/benchmark.py | 206 +++++++++++++++++++++++++++++++++++++--------- gcpy/util.py | 131 ++++++++++++++++++++++++++--- 2 files changed, 288 insertions(+), 49 deletions(-) diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py index 532caac3..71ca17d2 100644 --- a/gcpy/benchmark.py +++ b/gcpy/benchmark.py @@ -1,4 +1,4 @@ -""" +6""" Specific utilities for creating plots from GEOS-Chem benchmark simulations. """ @@ -180,8 +180,16 @@ def create_total_emissions_table( try: f = open(outfilename, "w") except (IOError, OSError, FileNotFoundError) as e: - msg = f"Could not open {outfilename} for writing!" - raise FileNotFoundError(msg) + raise e(f"Could not open {outfilename} for writing!") + + # Write a placeholder to the file that denotes where + # the list of species with differences will be written + placeholder = "@%% insert diff_list here %%@" + print(f"Species that differ between {refstr} and {devstr}", file=f) + print(f"{placeholder}\n\n", file=f) + + # Define a list for differences + diff_list = [] # ================================================================= # Loop through all of the species are in species_dict @@ -231,25 +239,12 @@ def create_total_emissions_table( title2 = f"### Ref = {refstr}; Dev = {devstr}" - # Determine if the all DataArrays for a given species or - # have identical data, and define a display string. - diagnames = [v for v in varnames if species_name in v] - diff_ct = 0 - for v in diagnames: - if np.array_equal(refdata[v].values, devdata[v].values): - diff_ct += 1 - diff_str = f"### Dev differs from Ref {title0}" - if diff_ct == len(diagnames): - diff_str = f"### Dev is identical to Ref {title0}" - # Print header to file - print("#" * 91, file=f) - print(f"{title1 : <88}{'###'}", file=f) - print(f"{title2 : <88}{'###'}", file=f) - print(f"{'###' : <88}{'###'}", file=f) - print(f"{diff_str : <88}{'###'}", file=f) - print("#" * 91, file=f) - print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f) + print("#" * 89, file=f) + print(f"{title1 : <86}{'###'}", file=f) + print(f"{title2 : <86}{'###'}", file=f) + print("#" * 89, file=f) + print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f) # ============================================================= # Loop over all emissions variables corresponding to this @@ -335,17 +330,30 @@ def create_total_emissions_table( # ========================================================== # Print emission totals for Ref and Dev # ========================================================== - util.print_totals(refarray, devarray, f) + util.print_totals( + refarray, + devarray, + f, + diff_list + ) # Add newlines before going to the next species print(file=f) print(file=f) # ================================================================= - # Close file + # Cleanup and quit # ================================================================= + + # Close file f.close() + # Reopen file and insert list of species with nonzero diffs + util.insert_text_into_file( + filename=outfilename, + search_text=placeholder, + replace_text=util.unique_values(diff_list, drop=[None]) + ) def create_global_mass_table( refdata, @@ -444,10 +452,8 @@ def create_global_mass_table( msg = f"Could not open {outfilename} for writing!" raise FileNotFoundError(msg) - # Determine if the two data sets are identical - diff_str="### Dev differs from Ref" - if xr.Dataset.equals(refdata, devdata): - diff_str="### Dev is identical to Ref" + # Define a list for differences + diff_list = [] # Title strings title1 = f"### Global mass (Gg) {label} (Trop + Strat)" @@ -456,18 +462,25 @@ def create_global_mass_table( title2 = f"### Ref = {refstr}; Dev = {devstr}" # Print header to file - print("#" * 91, file=f) - print(f"{title1 : <88}{'###'}", file=f) - print(f"{title2 : <88}{'###'}", file=f) - print(f"{'###' : <88}{'###'}", file=f) - print(f"{diff_str : <88}{'###'}", file=f) - print("#" * 91, file=f) - print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f) + print("#" * 89, file=f) + print(f"{title1 : <86}{'###'}", file=f) + print(f"{title2 : <86}{'###'}", file=f) + print("#" * 89, file=f) + + # Write a placeholder to the file that denotes where + # the list of species with differences will be written + placeholder = "@%% insert diff_list here %%@" + print(f"\nSpecies that differ between {refstr} and {devstr}", file=f) + print(f"{placeholder}\n\n", file=f) + + # Column headers + print(f"{title2}", file=f) + print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f) # ================================================================== # Print global masses for all species # - # NOTE: By this point, all species will be in both Ref and Dev' + # NOTE: By this point, all secies will be in both Ref and Dev' # because we have added them in the calling routine # ================================================================== for v in varlist: @@ -525,7 +538,7 @@ def create_global_mass_table( delta_p=met_and_masks["Dev_Delta_P"], box_height=met_and_masks["Dev_BxHeight"], ) - + # ============================================================== # Print global masses for Ref and Dev # (we will mask out tropospheric boxes in util.print_totals) @@ -535,20 +548,31 @@ def create_global_mass_table( refarray, devarray, f, + diff_list, masks=met_and_masks ) else: util.print_totals( refarray, devarray, - f + f, + diff_list ) # ================================================================== - # Close files + # Cleanup and quit # ================================================================== + + # Close file f.close() + # Reopen file and insert list of species with nonzero diffs + util.insert_text_into_file( + filename=outfilename, + search_text=placeholder, + replace_text=util.unique_values(diff_list, drop=[None]) + ) + def make_benchmark_conc_plots( ref, @@ -4367,3 +4391,109 @@ def get_species_database_dir(config): else: msg = f"Could not find the {spcdb_dir}/species_database.yml file!" raise FileNotFoundError(msg) + + +def create_benchmark_summary_table( + refpath, + refstr, + devpath, + devstr, + dst="./benchmark", + overwrite=False, + outfilename="Summary.txt", + verbose=False, + spcdb_dir=os.path.dirname(__file__) +): + """ + Creates a table of global masses for a list of species in contained in + two data sets. The data sets, which typically represent output from two + different model versions, are usually contained in netCDF data files. + + Args: + refdata: xarray Dataset + The first data set to be compared (aka "Reference"). + refstr: str + A string that can be used to identify refdata + (e.g. a model v2ersion number or other identifier). + devdata: xarray Dataset + The second data set to be compared (aka "Development"). + devstr: str + A string that can be used to identify the data set specified + by devfile (e.g. a model version number or other identifier). + varlist: list of strings + List of species concentation variable names to include + in the list of global totals. + met_and_masks: dict of xarray DataArray + Dictionary containing the meterological variables and + masks for the Ref and Dev datasets. + label: str + Label to go in the header string. Can be used to + pass the month & year. + + Keyword Args (optional): + trop_only: bool + Set this switch to True if you wish to print totals + only for the troposphere. + Default value: False (i.e. print whole-atmosphere totals). + outfilename: str + Name of the text file which will contain the table of + emissions totals. + Default value: "GlobalMass_TropStrat.txt" + verbose: bool + Set this switch to True if you wish to print out extra + informational messages. + Default value: False + spcdb_dir: str + Directory of species_datbase.yml file + Default value: Directory of GCPy code repository + + Remarks: + This method is mainly intended for model benchmarking purposes, + rather than as a general-purpose tool. + + Species properties (such as molecular weights) are read from a + YAML file called "species_database.yml". + """ + + # ================================================================== + # Initialization and data read + # ================================================================== + if os.path.isdir(dst) and not overwrite: + msg = "Directory {} exists. Pass overwrite=True to overwrite " \ + + "files in that directory, if any." + msg = msg.format(dst) + raise ValueError(msg) + if not os.path.isdir(dst): + os.mkdir(dst) + +# # ================================================================== +# # Close files +# # ================================================================== +# +# # Get a list of files in the Ref path +# ref_files = [] +# for (path, names, files) in os.walk(refpath): +# for rf in files: +# ref_files.append(os.path.join(path, rf)) +# +# # Get a list of files in the Ref path +# dev_files = [] +# for (path, names, files) in os.walk(devpath): +# for df in files: +# dev_files.append(os.path.join(path, df) +# +# # ================================================================== +# # Open file for output +# # ================================================================== +# +# # Create file +# try: +# f = open(os.path.join(dst, outfilename), "w") +# except (IOError, OSError, FileNotFoundError) as e: +# msg = f"Could not open {outfilename} for writing!" +# raise e(msg) +# +# # ================================================================== +# # Close files +# # ================================================================== +# f.close() diff --git a/gcpy/util.py b/gcpy/util.py index 0232722b..ea1ebba9 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -10,6 +10,7 @@ import numpy as np import xarray as xr from PyPDF2 import PdfFileWriter, PdfFileReader +from textwrap import wrap def convert_lon( data, @@ -161,7 +162,8 @@ def print_totals( ref, dev, f, - masks=None + diff_list, + masks=None, ): """ Computes and prints Ref and Dev totals (as well as the difference @@ -194,9 +196,11 @@ def print_totals( # Make sure that both Ref and Dev are xarray DataArray objects if not isinstance(ref, xr.DataArray): - raise TypeError("The ref argument must be an xarray DataArray!") + raise TypeError("The 'ref' argument must be an xarray DataArray!") if not isinstance(dev, xr.DataArray): - raise TypeError("The dev argument must be an xarray DataArray!") + raise TypeError("The 'dev' argument must be an xarray DataArray!") + if not isinstance(diff_list, list): + raise TypeError("The 'diff_list' argument must be a list!") # Determine if either Ref or Dev have all NaN values: ref_is_all_nan = np.isnan(ref.values).all() @@ -220,6 +224,12 @@ def print_totals( # Create the display name by editing the diagnostic name display_name = create_display_name(diagnostic_name) + # Get the species name from the display name + species_name = display_name + c = species_name.find(" ") + if c > 0: + species_name = display_name[0:c] + # Special handling for totals if "_TOTAL" in diagnostic_name.upper(): print("-"*90, file=f) @@ -233,7 +243,7 @@ def print_totals( else: if masks is not None: refarr = np.ma.masked_array(refarr, masks["Ref_TropMask"]) - total_ref = np.sum(refarr) + total_ref = np.sum(refarr, dtype=np.float64) # ================================================================== # Sum the Dev array (or set to NaN if missing) @@ -244,17 +254,25 @@ def print_totals( else: if masks is not None: devarr = np.ma.masked_array(devarr, masks["Dev_TropMask"]) - total_dev = np.sum(devarr) + total_dev = np.sum(devarr, dtype=np.float64) # ================================================================== # Compute differences (or set to NaN if missing) # ================================================================== if ref_is_all_nan or dev_is_all_nan: diff = np.nan - zero_diff = False else: diff = total_dev - total_ref - zero_diff = np.array_equal(refarr, devarr) + has_diffs = abs(diff) > np.float64(0.0) + + # Append to the list of differences. If no differences then append + # None. Duplicates can be stripped out in the calling routine. + if has_diffs: + diff_str = " * " + diff_list.append(species_name) + else: + diff_str = "" + diff_list.append(None) # ================================================================== # Compute % differences (or set to NaN if missing) @@ -268,9 +286,11 @@ def print_totals( pctdiff = np.nan # ================================================================== - # Write output to file + # Write output to file and return # ================================================================== - print(f"{display_name.ljust(19)}: {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f} {zero_diff}", file=f) + print(f"{display_name.ljust(19)}: {total_ref:18.6f} {total_dev:18.6f} {diff:12.6f} {pctdiff:8.3f} {diff_str}", file=f) + + return diff_list def get_species_categories( @@ -706,9 +726,9 @@ def slice_by_lev_and_time( raise ValueError(msg) # NOTE: isel no longer seems to work on a Dataset, so - # first createthe DataArray object, then use isel on it. + # first createthe DataArray object, then use isel on it. # -- Bob Yantosca (19 Jan 2023) - dr = ds[varname] + dr = ds[varname] vdims = dr.dims if ("time" in vdims and dr.time.size > 0) and "lev" in vdims: if flip: @@ -2117,3 +2137,92 @@ def read_config_file(config_file, quiet=False): raise Exception(msg) from err return config + + +def unique_values( + this_list, + drop=None, +): + """ + Given a list, returns a sorted list of unique values. + + Args: + ----- + this_list : list + Input list (may contain duplicate values) + + drop: list of str + List of variable names to exclude + + Returns: + -------- + unique: list + List of unique values from this_list + """ + if not isinstance(this_list, list): + raise ValueError("Argument 'this_list' must be a list object!") + if not isinstance(drop, list): + raise ValueError("Argument 'drop' must be a list object!") + + unique = list(set(this_list)) + + if drop is not None: + for d in drop: + unique.remove(d) + + unique.sort() + + return unique + + +def insert_text_into_file( + filename, + search_text, + replace_text, + width=80 +): + """ + Convenience routine to insert text into a file. The best way + to do this is to read the contents of the file, manipulate the + text, and then overwrite the file. + + Args: + ----- + filename: str + The file with text to be replaced. + + search_text: str + Text string in the file that will be replaced. + + replace_text: str or list of str + Text that will replace 'search_text' + + width: int + Will "word-wrap" the text in 'replace_text' to this width + """ + if not isinstance(search_text, str): + raise ValueError("Argument 'search_text' needs to be a string!") + if not isinstance(replace_text, str): + if isinstance(replace_text, list): + replace_text = ' '.join(replace_text) + else: + raise ValueError( + "Argument 'replace_text' needs to be a list or a string" + ) + + # Word-wrap the replacement text + replace_text = wrap(replace_text, width=width) + replace_text = '\n'.join(replace_text) + + with open(filename, "r") as f: + filedata = f.read() + f.close() + + filedata = filedata.replace( + search_text, + replace_text + ) + + with open(filename, "w") as f: + f.write(filedata) + f.close() From c27c7d28484ea41257468b8fcbced3e762dc6b3a Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Wed, 8 Feb 2023 16:36:34 -0500 Subject: [PATCH 5/8] Further updates for printing information about diffs btw Dev & Ref benchmark/run_benchmark.py - Now call "make_benchmark_summary_table" for GCC vs GCC, GCHP vs GCC, and GCHP vs GCHP benchmarks. benchmark/1mo_benchmark.yml - Added "summary_table" switch under "plot_options" gcpy/benchmark.py - Removed typo at top of script - In create_benchmark_emissions_table and create_global_mass_table: - Write an alternate message if the string with species differences is longer than ~80 chars - Added new function "diff_list_to_text" - Added new function "make_directory" - Updated "create_benchmrk_summary_table" to call get_filepaths for either GCHP or GCC Ref and/or Dev data gcpy/util.py - In routine print_totals: - Now keep track of which species have differences btw Dev & Ref in "diff_list", which is passed to the calling routine - Added new function "wrap_text" - Added new function "array_equals" CHANGELOG.md - Updated accordingly Signed-off-by: Bob Yantosca --- CHANGELOG.md | 5 + benchmark/1mo_benchmark.yml | 1 + benchmark/run_benchmark.py | 131 ++++++++++++--- gcpy/benchmark.py | 311 +++++++++++++++++++++++++++--------- gcpy/util.py | 150 +++++++++++++---- 5 files changed, 465 insertions(+), 133 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d414db8..37327b9c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## Unreleased +### Added +- Benchmark summary table output (intended for 1hr & 1mo benchmarks) +- Species/emissions/inventories that differ between Dev & Ref versions are now printed at the top of the benchmark emissions, inventory, and global mass tables. if there are too many species with diffs, an alternate message is printed. +- New functions in `benchmark.py` and `util.py` to facilitate printing of the species/emissions/inventories that differ between Dev & Ref versions. + ## [1.3.2] -- 2022-10-25 ### Fixes diff --git a/benchmark/1mo_benchmark.yml b/benchmark/1mo_benchmark.yml index 4f5f2f67..3a15f591 100644 --- a/benchmark/1mo_benchmark.yml +++ b/benchmark/1mo_benchmark.yml @@ -108,6 +108,7 @@ options: ops_budget_table: False OH_metrics: True ste_table: True # GCC only + summary_table: True plot_options: # Plot concentrations and emissions by category? by_spc_cat: True by_hco_cat: True diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py index db5b2be3..e7c14e92 100755 --- a/benchmark/run_benchmark.py +++ b/benchmark/run_benchmark.py @@ -642,29 +642,6 @@ def run_benchmark_default(config): if config["options"]["outputs"]["OH_metrics"]: print("\n%%% Creating GCC vs. GCC OH metrics table %%%") - # Use this for benchmarks prior to GEOS-Chem 13.0.0 - # # Diagnostic collection files to read - # col = "ConcAfterChem" - # ref = get_filepath(gcc_vs_gcc_refdir, col, gcc_ref_date) - # dev = get_filepath(gcc_vs_gcc_devdir, col, gcc_dev_date) - # - # # Meteorology data needed for calculations - # col = "StateMet" - # refmet = get_filepath(gcc_vs_gcc_refdir, col, gcc_ref_date) - # devmet = get_filepath(gcc_vs_gcc_devdir, col, gcc_dev_date) - # - # # Print OH metrics - # bmk.make_benchmark_oh_metrics( - # ref, - # refmet, - # config["data"]["ref"]["gcc"]["version"], - # dev, - # devmet, - # config["data"]["dev"]["gcc"]["version"], - # dst=gcc_vs_gcc_tablesdir, - # overwrite=True - # ) - # Filepaths ref = get_filepath(gcc_vs_gcc_refdir, "Metrics", gcc_ref_date) dev = get_filepath(gcc_vs_gcc_devdir, "Metrics", gcc_dev_date) @@ -703,6 +680,48 @@ def run_benchmark_default(config): month=gcc_dev_date.astype(datetime).month, ) + # ================================================================== + # GCC vs. GCC summary table + # ================================================================== + if config["options"]["outputs"]["summary_table"]: + print("\n%%% Creating GCC vs. GCC summary table %%%") + + # Diagnostic collections to check + collections = [ + 'AerosolMass', + 'Aerosols', + 'Emissions', + 'JValues', + 'Metrics', + 'SpeciesConc', + 'StateMet', + ] + + # Print summary of which collections are identical + # between Ref & Dev, and which are not identical. + bmk.create_benchmark_summary_table( + gcc_vs_gcc_refdir, + config["data"]["ref"]["gcc"]["version"], + gcc_ref_date, + gcc_vs_gcc_devdir, + config["data"]["dev"]["gcc"]["version"], + gcc_dev_date, + collections = [ + 'AerosolMass', + 'Aerosols', + 'Emissions', + 'JValues', + 'Metrics', + 'SpeciesConc', + 'StateMet' + ], + dst=gcc_vs_gcc_tablesdir, + outfilename="Summary.txt", + overwrite=True, + verbose=False, + ) + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCC benchmark plots and tables # @@ -1014,6 +1033,38 @@ def run_benchmark_default(config): title = "\n%%% Skipping GCHP vs. GCC Strat-Trop Exchange table %%%" print(title) + + # ================================================================== + # GCHP vs. GCC summary table + # ================================================================== + if config["options"]["outputs"]["summary_table"]: + print("\n%%% Creating GCHP vs. GCC summary table %%%") + + # Print summary of which collections are identical + # between Ref & Dev, and which are not identical. + bmk.create_benchmark_summary_table( + gchp_vs_gcc_refdir, + config["data"]["dev"]["gcc"]["version"], + gcc_dev_date, + gchp_vs_gcc_devdir, + config["data"]["dev"]["gchp"]["version"], + gchp_dev_date, + collections=[ + 'AerosolMass', + 'Aerosols', + 'Emissions', + 'JValues', + 'Metrics', + 'SpeciesConc', + 'StateMet', + ], + dst=gchp_vs_gcc_tablesdir, + outfilename="Summary.txt", + overwrite=True, + verbose=False, + dev_gchp=True + ) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCHP benchmark plots and tables # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1376,6 +1427,38 @@ def run_benchmark_default(config): if config["options"]["outputs"]["ste_table"]: print("\n%%% Skipping GCHP vs. GCHP Strat-Trop Exchange table %%%") + # ================================================================== + # GCHP vs. GCHP summary table + # ================================================================== + if config["options"]["outputs"]["summary_table"]: + print("\n%%% Creating GCHP vs. GCHP summary table %%%") + + # Print summary of which collections are identical + # between Ref & Dev, and which are not identical. + bmk.create_benchmark_summary_table( + gchp_vs_gchp_refdir, + config["data"]["ref"]["gchp"]["version"], + gchp_ref_date, + gchp_vs_gchp_devdir, + config["data"]["dev"]["gchp"]["version"], + gchp_dev_date, + collections=[ + 'AerosolMass', + 'Aerosols', + 'Emissions', + 'JValues', + 'Metrics', + 'SpeciesConc', + 'StateMet', + ], + dst=gchp_vs_gchp_tablesdir, + outfilename="Summary.txt", + overwrite=True, + verbose=False, + ref_gchp=True, + dev_gchp=True, + ) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Create GCHP vs GCC difference of differences benchmark plots # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1423,7 +1506,7 @@ def run_benchmark_default(config): # ================================================================== # Print a message indicating that the benchmarks finished # ================================================================== - print("\n %%%% All requested benchmark plots/tables created! %%%%") + print("\n%%%% All requested benchmark plots/tables created! %%%%") def main(): diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py index 71ca17d2..c92bc03b 100644 --- a/gcpy/benchmark.py +++ b/gcpy/benchmark.py @@ -1,4 +1,4 @@ -6""" +""" Specific utilities for creating plots from GEOS-Chem benchmark simulations. """ @@ -185,7 +185,10 @@ def create_total_emissions_table( # Write a placeholder to the file that denotes where # the list of species with differences will be written placeholder = "@%% insert diff_list here %%@" - print(f"Species that differ between {refstr} and {devstr}", file=f) + if "Inv" in template: + print(f"Inventories that differ btw {refstr} and {devstr}:", file=f) + else: + print(f"Species that differ btw {refstr} and {devstr}:", file=f) print(f"{placeholder}\n\n", file=f) # Define a list for differences @@ -262,7 +265,7 @@ def create_total_emissions_table( # If no properties are found, then skip to next species if species_properties is None: - print("No properties found for {} ... skippping".format(spc_name)) + print(f"No properties found for {spc_name} ... skippping") continue # Convert units of Ref and Dev and save to numpy ndarray objects @@ -348,11 +351,12 @@ def create_total_emissions_table( # Close file f.close() - # Reopen file and insert list of species with nonzero diffs + # Reopen file and replace placeholder with list of diffs util.insert_text_into_file( filename=outfilename, search_text=placeholder, - replace_text=util.unique_values(diff_list, drop=[None]) + replace_text=diff_list_to_text(diff_list), + width=90 ) def create_global_mass_table( @@ -460,21 +464,23 @@ def create_global_mass_table( if trop_only: title1 = f"### Global mass (Gg) {label} (Trop only)" title2 = f"### Ref = {refstr}; Dev = {devstr}" + title3 = f"### Species that differ btw {refstr} and {devstr}:" + + # Write a placeholder to the file that denotes where + # the list of species with differences will be written + placeholder = "@%% insert diff_list here %%@" + title4 = f"{placeholder}" # Print header to file print("#" * 89, file=f) print(f"{title1 : <86}{'###'}", file=f) print(f"{title2 : <86}{'###'}", file=f) + print(f"{'###' : <86}{'###'}", file=f) + print(f"{title3 : <86}{'###'}", file=f) + print(f"{placeholder}", file=f) print("#" * 89, file=f) - # Write a placeholder to the file that denotes where - # the list of species with differences will be written - placeholder = "@%% insert diff_list here %%@" - print(f"\nSpecies that differ between {refstr} and {devstr}", file=f) - print(f"{placeholder}\n\n", file=f) - # Column headers - print(f"{title2}", file=f) print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f) # ================================================================== @@ -566,11 +572,15 @@ def create_global_mass_table( # Close file f.close() - # Reopen file and insert list of species with nonzero diffs + # Reopen file and replace placeholder text by diff_text util.insert_text_into_file( filename=outfilename, search_text=placeholder, - replace_text=util.unique_values(diff_list, drop=[None]) + replace_text=diff_list_to_text( + diff_list, + fancy_format=True + ), + width=100 # Force it not to wrap ) @@ -4396,49 +4406,61 @@ def get_species_database_dir(config): def create_benchmark_summary_table( refpath, refstr, + refdate, devpath, devstr, + devdate, + collections, dst="./benchmark", overwrite=False, outfilename="Summary.txt", verbose=False, - spcdb_dir=os.path.dirname(__file__) + spcdb_dir=os.path.dirname(__file__), + ref_gchp=False, + dev_gchp=False ): """ - Creates a table of global masses for a list of species in contained in - two data sets. The data sets, which typically represent output from two - different model versions, are usually contained in netCDF data files. + Creates a benchmark summary table that shows which data collections + have difference. Useful for scanning the 1-hr and 1-month benchmark + outputs. Args: - refdata: xarray Dataset - The first data set to be compared (aka "Reference"). + refpath: str + Path to the first data set to be compared (aka "Ref"). refstr: str A string that can be used to identify refdata - (e.g. a model v2ersion number or other identifier). - devdata: xarray Dataset - The second data set to be compared (aka "Development"). + (e.g. a model version number or other identifier). + refdate: np.datetime64 + Date/time stamp used by the "Ref" data files. + ref_gchp: bool + Set to True if the "Ref" data comes from a GCHP run. + Default value: False + devpath: str + Path to the second data set to be compared (aka "Dev"). devstr: str A string that can be used to identify the data set specified by devfile (e.g. a model version number or other identifier). - varlist: list of strings - List of species concentation variable names to include - in the list of global totals. - met_and_masks: dict of xarray DataArray - Dictionary containing the meterological variables and - masks for the Ref and Dev datasets. - label: str - Label to go in the header string. Can be used to - pass the month & year. + dev_gchp: bool + Set to True if the "Ref" data comes from a GCHP run. + Default value: False + devdate: np.datetime64 + Date/time stamp used by the "Dev" data files. + collections: list of strings + List of diagnostic collections to examine. Keyword Args (optional): - trop_only: bool - Set this switch to True if you wish to print totals - only for the troposphere. - Default value: False (i.e. print whole-atmosphere totals). + dst: str + A string denoting the destination folder where the file + containing emissions totals will be written. + Default value: "./benchmark" + overwrite: bool + Set this flag to True to overwrite files in the + destination folder (specified by the dst argument). + Default value: False outfilename: str Name of the text file which will contain the table of emissions totals. - Default value: "GlobalMass_TropStrat.txt" + Default value: "Summary.txt" verbose: bool Set this switch to True if you wish to print out extra informational messages. @@ -4456,44 +4478,183 @@ def create_benchmark_summary_table( """ # ================================================================== - # Initialization and data read + # Open file for output # ================================================================== - if os.path.isdir(dst) and not overwrite: - msg = "Directory {} exists. Pass overwrite=True to overwrite " \ - + "files in that directory, if any." - msg = msg.format(dst) + + # Create the directory for output + make_directory(dst, overwrite) + + # Create file + try: + f = open(os.path.join(dst, outfilename), "w") + except (IOError, OSError, FileNotFoundError) as e: + msg = f"Could not open {outfilename} for writing!" + raise e(msg) + + # Title strings + title1 = f"### Benchmark summary table" + title2 = f"### Ref = {refstr}; Dev = {devstr}" + + # Write a placeholder to the file that denotes where + # the list of species with differences will be written + placeholder = "@%% insert diff_list here %%@" + title4 = f"{placeholder}" + + # Print header to file + print("#" * 80, file=f) + print(f"{title1 : <77}{'###'}", file=f) + print(f"{title2 : <77}{'###'}", file=f) + print("#" * 80, file=f) + print(file=f) + + # ================================================================== + # Read data and look differences btw Ref & Dev versions + # ================================================================== + + # Variables to skip + skip_vars = gcon.skip_these_vars + skip_vars.append("corner_lats") + skip_vars.append("corner_lons") + + # Pick the proper function to read the data + reader = util.dataset_reader( + multi_files=False, + verbose=verbose + ) + + # Make a directory to store the list of species that differ + diff_dict = {} + + # Loop over diagnostic files + for col in collections: + + # Read Ref data + refdata = reader( + util.get_filepath( + refpath, + col, + refdate, + is_gchp=ref_gchp + ), + drop_variables=skip_vars + ).load() + + # Get Dev data + devdata = reader( + util.get_filepath( + devpath, + col, + devdate, + is_gchp=dev_gchp + ), + drop_variables=skip_vars + ).load() + + # Make sure that Ref and Dev datasets have the same variables. + # Variables that are in Ref but not in Dev will be added to Dev + # with all missing values (NaNs). And vice-versa. + [refdata, devdata] = util.add_missing_variables( + refdata, + devdata + ) + + # Find all common variables between the two datasets + vardict = util.compare_varnames( + refdata, + devdata, + quiet=True + ) + + # List of differences for this collection + diff_list = [] + + # Keep track of which variables are different + # Loop over the common variables + for v in vardict["commonvarsData"]: + if not util.array_equals(refdata[v], devdata[v]): + diff_list.append(v) + + # Drop duplicate values from diff_list + diff_list = util.unique_values(diff_list, drop=[None]) + + if len(diff_list) == 0: + print("-" * 79, file=f) + print(f"{col}: {devstr} is identical to {refstr}", file=f) + print(file=f) + else: + c = 0 + print("-" * 79, file=f) + print(f"{col}: {devstr} differs from {refstr}", file=f) + print("\n Diagnostics that differ", file=f) + for i, v in enumerate(diff_list): + print(f" {v}", file=f) + if i > 10: + print(f" ... and {len(diff_list) - 10} others", file=f) + break + print(file=f) + + # ================================================================== + # Close files + # ================================================================== + f.close() + + +def diff_list_to_text( + diff_list, + fancy_format=False +): + """ + Converts a list of species/emissions/inventories/diagnostics that + show differences between GEOS-Chem versions ot a printable text + string. + + Args: + ----- + diff_list : list + List to be converted into text. "None" values will be dropped. + fancy_format: bool + Set to True if you wish output text to be bookended with '###'. + + Returns: + diff_text : str + String with concatenated list values. + """ + if not isinstance(diff_list, list): + raise ValueError("Argument 'diff_list' must be a list!") + + # Strip out duplicates from diff_list + # Prepare a message about species differences (or alternate msg) + diff_list = util.unique_values(diff_list, drop=[None]) + diff_text = util.wrap_text(diff_list, width=85) + if len(diff_text) > 85: + diff_text = "... Too many diffs to print (see below for details)" + + if fancy_format: + diff_text = f"### {diff_text : <82}{'###'}" + + return diff_text.strip() + + +def make_directory( + dir_name, + overwrite +): + """ + Creates a directory where benchmark plots/tables will be placed. + + Args: + ----- + dir_name : str + Name of the directory to be created. + overwrite : bool + Set to True if you wish to overwrite prior contents in + the directory 'dir_name' + """ + + if os.path.isdir(dir_name) and not overwrite: + msg = f"Directory {dir_name} exists!\n" + msg += "Pass overwrite=True to overwrite files in that directory." raise ValueError(msg) - if not os.path.isdir(dst): - os.mkdir(dst) -# # ================================================================== -# # Close files -# # ================================================================== -# -# # Get a list of files in the Ref path -# ref_files = [] -# for (path, names, files) in os.walk(refpath): -# for rf in files: -# ref_files.append(os.path.join(path, rf)) -# -# # Get a list of files in the Ref path -# dev_files = [] -# for (path, names, files) in os.walk(devpath): -# for df in files: -# dev_files.append(os.path.join(path, df) -# -# # ================================================================== -# # Open file for output -# # ================================================================== -# -# # Create file -# try: -# f = open(os.path.join(dst, outfilename), "w") -# except (IOError, OSError, FileNotFoundError) as e: -# msg = f"Could not open {outfilename} for writing!" -# raise e(msg) -# -# # ================================================================== -# # Close files -# # ================================================================== -# f.close() + if not os.path.isdir(dir_name): + os.mkdir(dir_name) diff --git a/gcpy/util.py b/gcpy/util.py index ea1ebba9..e6a85033 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -842,6 +842,8 @@ def compare_varnames( commonvars3D List of variables that are common to refdata and devdata, and that have lat, lon, and level dimensions. + commonvarsData List of all commmon 2D or 3D data variables, + excluding index variables. refonly List of 2D or 3D variables that are only present in refdata. devonly List of 2D or 3D variables that are only @@ -854,30 +856,36 @@ def compare_varnames( devonly = [v for v in devvars if v not in refvars] dimmismatch = [v for v in commonvars if refdata[v].ndim != devdata[v].ndim] commonvarsOther = [ - v - for v in commonvars - if ( + v for v in commonvars if ( + ( ("lat" not in refdata[v].dims or "Xdim" not in refdata[v].dims) - and ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims) - and ("lev" not in refdata[v].dims) + and + ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims) + and + ("lev" not in refdata[v].dims) + ) + or + ( + ("hyam" in v or "hybm" in v) # Omit these from plottable data + ) ) ] commonvars2D = [ - v - for v in commonvars - if ( - ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims) - and ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims) - and ("lev" not in refdata[v].dims) + v for v in commonvars if ( + ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims) + and + ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims) + and + ("lev" not in refdata[v].dims) ) ] commonvars3D = [ - v - for v in commonvars - if ( - ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims) - and ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims) - and ("lev" in refdata[v].dims) + v for v in commonvars if ( + ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims) + and + ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims) + and + ("lev" in refdata[v].dims) ) ] @@ -903,18 +911,20 @@ def compare_varnames( print("All variables have same dimensions in ref and dev") # For safety's sake, remove the 0-D and 1-D variables from - # refonly and devonly. This will ensure that refonly and - # devonly will only contain variables that can be plotted. + # commonvarsData, refonly, and devonly. This will ensure that + # these lists will only contain variables that can be plotted. + commonvarsData = [v for v in commonvars if v not in commonvarsOther] refonly = [v for v in refonly if v not in commonvarsOther] devonly = [v for v in devonly if v not in commonvarsOther] return { "commonvars": commonvars, - "commonvarsOther": commonvarsOther, "commonvars2D": commonvars2D, "commonvars3D": commonvars3D, + "commonvarsData": commonvarsData, + "commonvarsOther": commonvarsOther, "refonly": refonly, - "devonly": devonly, + "devonly": devonly } @@ -2168,13 +2178,46 @@ def unique_values( if drop is not None: for d in drop: - unique.remove(d) + if d in unique: + unique.remove(d) unique.sort() return unique +def wrap_text( + text, + width=80 +): + """ + Wraps text so that it fits within a certain line width. + + Args: + ----- + text: str or list of str + Input text to be word-wrapped. + width: int + Line width, in characters. + Default value: 80 + + Returns: + -------- + Original text reformatted so that it fits within lines + of 'width' characters or less. + """ + if not isinstance(text, str): + if isinstance(text, list): + text = ' '.join(text) # List -> str conversion + else: + raise ValueError("Argument 'text' must be either str or list!") + + text = wrap(text, width=width) + text = '\n'.join(text) + + return text + + def insert_text_into_file( filename, search_text, @@ -2190,29 +2233,27 @@ def insert_text_into_file( ----- filename: str The file with text to be replaced. - search_text: str Text string in the file that will be replaced. - replace_text: str or list of str Text that will replace 'search_text' - width: int Will "word-wrap" the text in 'replace_text' to this width """ if not isinstance(search_text, str): raise ValueError("Argument 'search_text' needs to be a string!") - if not isinstance(replace_text, str): - if isinstance(replace_text, list): - replace_text = ' '.join(replace_text) - else: - raise ValueError( - "Argument 'replace_text' needs to be a list or a string" - ) + if not isinstance(replace_text, str) and \ + not isinstance(replace_text, list): + raise ValueError( + "Argument 'replace_text' needs to be a list or a string" + ) # Word-wrap the replacement text - replace_text = wrap(replace_text, width=width) - replace_text = '\n'.join(replace_text) + # (does list -> str conversion if necessary) + replace_text = wrap_text( + replace_text, + width=width + ) with open(filename, "r") as f: filedata = f.read() @@ -2226,3 +2267,44 @@ def insert_text_into_file( with open(filename, "w") as f: f.write(filedata) f.close() + + +def array_equals( + refdata, + devdata +): + """ + Tests two arrays for equality. Useful for checking which + species have nonzero differences in benchmark output. + + Args: + ----- + refdata: xarray DataArray or numpy ndarray + The first array to be checked. + devdata: xarray DataArray or numpy ndarray + The second array to be checked. + + Returns: + -------- + True if both arrays are equal; False if not + """ + if not isinstance(refdata, np.ndarray): + if isinstance(refdata, xr.DataArray): + refdata = refdata.values + else: + raise ValueError( + "Argument 'refdata' must be an xarray DataArray or numpy ndarray!" + ) + if not isinstance(devdata, np.ndarray): + if isinstance(devdata, xr.DataArray): + devdata = devdata.values + else: + raise ValueError( + "Argument 'devdata' must be an xarray DataArray or numpy ndarray!" + ) + + # This method will work if the arrays hve different dimensions + # but an element-by-element search will not! + refsum = np.sum(refdata, dtype=np.float64) + devsum = np.sum(devdata, dtype=np.float64) + return np.abs(devsum - refsum) > np.float64(0.0) From af33f24d71d20e9b72c61a5d1dbcddf6982c8629 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 9 Feb 2023 13:08:19 -0500 Subject: [PATCH 6/8] Rework logic in compare_varnames gcpy.util.py - In function compare_varnames - Add variable names to commonvarsData if they have lon/Xdim or lat/Ydim coordinates. Plottable data variables need at least these dimensions. - commonvarsOther are variables that are not in commonvarsData - commonvars2D and commonvars3D are plottable data variables that either do not have lev/ilev dimensions, or do have them. This fixes an issue where 2D plottable data variables were getting lumped together with the index variables. Signed-off-by: Bob Yantosca --- gcpy/util.py | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/gcpy/util.py b/gcpy/util.py index e6a85033..4b765496 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -843,7 +843,8 @@ def compare_varnames( refdata and devdata, and that have lat, lon, and level dimensions. commonvarsData List of all commmon 2D or 3D data variables, - excluding index variables. + excluding index variables. This is the + list of "plottable" variables. refonly List of 2D or 3D variables that are only present in refdata. devonly List of 2D or 3D variables that are only @@ -855,37 +856,29 @@ def compare_varnames( refonly = [v for v in refvars if v not in devvars] devonly = [v for v in devvars if v not in refvars] dimmismatch = [v for v in commonvars if refdata[v].ndim != devdata[v].ndim] - commonvarsOther = [ + # Assume plottable data has lon and lat + # This is OK for purposes of benchmarking + # -- Bob Yantosca (09 Feb 2023) + commonvarsData = [ v for v in commonvars if ( - ( - ("lat" not in refdata[v].dims or "Xdim" not in refdata[v].dims) - and - ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims) + ("lat" in refdata[v].dims or "Ydim" in refdata[v].dims) and - ("lev" not in refdata[v].dims) - ) - or - ( - ("hyam" in v or "hybm" in v) # Omit these from plottable data - ) + ("lon" in refdata[v].dims or "Xdim" in refdata[v].dims) ) + ] + commonvarsOther = [ + v for v in commonvars if ( + v not in commonvarsData + ) ] commonvars2D = [ v for v in commonvars if ( - ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims) - and - ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims) - and - ("lev" not in refdata[v].dims) + (v in commonvarsData) and ("lev" not in refdata[v].dims) ) ] commonvars3D = [ v for v in commonvars if ( - ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims) - and - ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims) - and - ("lev" in refdata[v].dims) + (v in commonvarsData) and ("lev" in refdata[v].dims) ) ] From b1b9295d1616e910f2ef76ef353f74dc3a2e0f81 Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 9 Feb 2023 13:55:46 -0500 Subject: [PATCH 7/8] Fixed incorrect logic in util.array_equals; updates in benchmark.py gcpy/util.py - In routine array_equals: - Add dtype parameter to select numeric type for the comparison - Return the inverse of ABS(devsum-refsum) > 0, since this is the condition for not equals. gcpy/benchmark.py - Skip reading "AREA" when computing the summary table - Pass dtype=np.float32 to util.array_equals Signed-off-by: Bob Yantosca --- gcpy/benchmark.py | 12 ++++++++---- gcpy/util.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py index c92bc03b..fc5c07c9 100644 --- a/gcpy/benchmark.py +++ b/gcpy/benchmark.py @@ -4513,8 +4513,7 @@ def create_benchmark_summary_table( # Variables to skip skip_vars = gcon.skip_these_vars - skip_vars.append("corner_lats") - skip_vars.append("corner_lons") + skip_vars.append("AREA") # Pick the proper function to read the data reader = util.dataset_reader( @@ -4569,9 +4568,14 @@ def create_benchmark_summary_table( diff_list = [] # Keep track of which variables are different - # Loop over the common variables + # NOTE: Use 32-point float for comparisons since this is + # the precision used for History diagnostics. for v in vardict["commonvarsData"]: - if not util.array_equals(refdata[v], devdata[v]): + if not util.array_equals( + refdata[v], + devdata[v], + dtype=np.float32 + ): diff_list.append(v) # Drop duplicate values from diff_list diff --git a/gcpy/util.py b/gcpy/util.py index 4b765496..d9590f9c 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -2264,7 +2264,8 @@ def insert_text_into_file( def array_equals( refdata, - devdata + devdata, + dtype=np.float64 ): """ Tests two arrays for equality. Useful for checking which @@ -2276,6 +2277,9 @@ def array_equals( The first array to be checked. devdata: xarray DataArray or numpy ndarray The second array to be checked. + dtype : np.float32 or np.float64 + The precision that will be used to make the evaluation. + Default: np.float64 Returns: -------- @@ -2298,6 +2302,6 @@ def array_equals( # This method will work if the arrays hve different dimensions # but an element-by-element search will not! - refsum = np.sum(refdata, dtype=np.float64) - devsum = np.sum(devdata, dtype=np.float64) - return np.abs(devsum - refsum) > np.float64(0.0) + refsum = np.nansum(refdata, dtype=dtype) + devsum = np.nansum(devdata, dtype=dtype) + return (not np.abs(devsum - refsum) > dtype(0.0)) From 0155dd0c669d2c6112f8666d4324bb6685a00a3d Mon Sep 17 00:00:00 2001 From: Bob Yantosca Date: Thu, 9 Feb 2023 14:27:14 -0500 Subject: [PATCH 8/8] Update the messages at the top of tables to just show one line gcpy/benchmark.py: - Updated routine diff_list_to_text so that it also accepts refstr & devstr args. It now returns either line saying "dev & ref are identical" or "dev & ref show X differences" - Now pass devstr and refstr in calls to diff_list_to_text - Trimmed whitespace Signed-off-by: Bob Yantosca --- gcpy/benchmark.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py index fc5c07c9..89735343 100644 --- a/gcpy/benchmark.py +++ b/gcpy/benchmark.py @@ -184,11 +184,7 @@ def create_total_emissions_table( # Write a placeholder to the file that denotes where # the list of species with differences will be written - placeholder = "@%% insert diff_list here %%@" - if "Inv" in template: - print(f"Inventories that differ btw {refstr} and {devstr}:", file=f) - else: - print(f"Species that differ btw {refstr} and {devstr}:", file=f) + placeholder = "@%% insert diff status here %%@" print(f"{placeholder}\n\n", file=f) # Define a list for differences @@ -355,7 +351,10 @@ def create_total_emissions_table( util.insert_text_into_file( filename=outfilename, search_text=placeholder, - replace_text=diff_list_to_text(diff_list), + replace_text=diff_list_to_text( + refstr, + devstr, + diff_list), width=90 ) @@ -464,19 +463,16 @@ def create_global_mass_table( if trop_only: title1 = f"### Global mass (Gg) {label} (Trop only)" title2 = f"### Ref = {refstr}; Dev = {devstr}" - title3 = f"### Species that differ btw {refstr} and {devstr}:" # Write a placeholder to the file that denotes where # the list of species with differences will be written - placeholder = "@%% insert diff_list here %%@" - title4 = f"{placeholder}" + placeholder = "@%% insert diff status here %%@" # Print header to file print("#" * 89, file=f) print(f"{title1 : <86}{'###'}", file=f) print(f"{title2 : <86}{'###'}", file=f) print(f"{'###' : <86}{'###'}", file=f) - print(f"{title3 : <86}{'###'}", file=f) print(f"{placeholder}", file=f) print("#" * 89, file=f) @@ -577,6 +573,8 @@ def create_global_mass_table( filename=outfilename, search_text=placeholder, replace_text=diff_list_to_text( + refstr, + devstr, diff_list, fancy_format=True ), @@ -4604,6 +4602,8 @@ def create_benchmark_summary_table( def diff_list_to_text( + refstr, + devstr, diff_list, fancy_format=False ): @@ -4629,13 +4629,21 @@ def diff_list_to_text( # Strip out duplicates from diff_list # Prepare a message about species differences (or alternate msg) diff_list = util.unique_values(diff_list, drop=[None]) - diff_text = util.wrap_text(diff_list, width=85) - if len(diff_text) > 85: - diff_text = "... Too many diffs to print (see below for details)" + # Print the text + n_diff = len(diff_list) + if n_diff > 0: + diff_text = f"{devstr} and {refstr} show {n_diff} differences" + else: + diff_text = f"{devstr} and {refstr} are identical" + diff_text = util.wrap_text( + diff_text, + width=83 + ) + if fancy_format: diff_text = f"### {diff_text : <82}{'###'}" - + return diff_text.strip()