From 7e05db365d0039b60a0bff323bcfea3196195423 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 6 Feb 2023 14:34:57 -0500
Subject: [PATCH 1/8] Add zero-diff column in print_totals

gcpy/benchmark.py
- Now use f-strings to simplify print statements
- Use numpy.array_equal to test if the Ref and Dev numpy arrays (after
  masking, if necessary) are equal.
- Print a column for zero-diff results

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/gcpy/util.py b/gcpy/util.py
index 26319f0e..53c3325a 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -227,34 +227,34 @@ def print_totals(
     # ==================================================================
     # Sum the Ref array (or set to NaN if missing)
     # ==================================================================
+    refarr = ref.values
     if ref_is_all_nan:
         total_ref = np.nan
     else:
-        if masks is None:
-            total_ref = np.sum(ref.values)
-        else:
-            arr = np.ma.masked_array(ref.values, masks["Ref_TropMask"])
-            total_ref = np.sum(arr)
+        if masks is not None:
+            refarr = np.ma.masked_array(refarr, masks["Ref_TropMask"])
+        total_ref = np.sum(refarr)
 
     # ==================================================================
     # Sum the Dev array (or set to NaN if missing)
     # ==================================================================
+    devarr = dev.values
     if dev_is_all_nan:
         total_dev = np.nan
     else:
-        if masks is None:
-            total_dev = np.sum(dev.values)
-        else:
-            arr = np.ma.masked_array(dev.values, masks["Dev_TropMask"])
-            total_dev = np.sum(arr)
+        if masks is not None:
+            devarr = np.ma.masked_array(devarr, masks["Dev_TropMask"])
+        total_dev = np.sum(devarr)
 
     # ==================================================================
     # Compute differences (or set to NaN if missing)
     # ==================================================================
     if ref_is_all_nan or dev_is_all_nan:
         diff = np.nan
+        zero_diff = False
     else:
         diff = total_dev - total_ref
+        zero_diff = np.array_equal(refarr, devarr)
 
     # ==================================================================
     # Compute % differences (or set to NaN if missing)
@@ -270,10 +270,7 @@ def print_totals(
     # ==================================================================
     # Write output to file
     # ==================================================================
-    print(
-        f"{display_name.ljust(18)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}", file=f
-    )
-
+    print(f"{display_name.ljust(14)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
 
 def get_species_categories(
         benchmark_type="FullChemBenchmark"
@@ -2119,3 +2116,20 @@ def read_config_file(config_file, quiet=False):
         raise Exception(msg) from err
 
     return config
+
+
+def is_zero_diff(refdr, devdr):
+    """
+    Returns True if two DataArray objects contain identical data,
+    or False otherwise
+
+    Args:
+    -----
+    refdr (xarray DataArray)
+        The "Ref" DataArray object to be tested.
+    devdr (xarray DataArray)
+        The "Dev" DataArray object to be tested
+    """
+    if not np.array_equal(refdr.values, devdr.values):
+        return False
+    return True

From d1aa8bf28d70d1e94434797578ae23dfa00d1e5a Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 6 Feb 2023 15:15:01 -0500
Subject: [PATCH 2/8] Now print if Dev and Ref are identical at top of mass
 table file

gcpy/benchmark.py
- Update try/catch statement for file open
- Use xr.Dataset.equals to test if the Ref and Dev datasets are
  identical to each other or not.  (NOTE: This will likely fail when
  testing GCC vs GCHP, since the coordinates and data variables are
  different).  This was the best way to proceed because in routine
  print_totals, the test for zero-diff is done per variable.
- Now print out identicality or difference of Dev vs Ref in the
  table header

gcpy/util.py
- Restored the display name to a width of 19, as this is also used
  by the emissions tables.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 39 ++++++++++++++++++---------------------
 gcpy/util.py      |  3 ++-
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index 851ebc1a..d1f7597b 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -433,32 +433,29 @@ def create_global_mass_table(
     # Create file
     try:
         f = open(outfilename, "w")
-    except FileNotFoundError:
-        msg = "Could not open {} for writing!".format(outfilename)
+    except (IOError, OSError, FileNotFoundError) as e:
+        msg = f"Could not open {outfilename} for writing!"
         raise FileNotFoundError(msg)
 
+    # Determine if the two data sets are identical
+    diff_str="### Dev differs from Ref"
+    if xr.Dataset.equals(refdata, devdata):
+        diff_str="### Dev is identical to Ref"
+
     # Title strings
+    title1 = f"### Global mass (Gg) {label} (Trop + Strat)"
     if trop_only:
-        title1 = "### Global mass (Gg) {} (Trop only)".format(label)
-    else:
-        title1 = "### Global mass (Gg) {} (Trop + Strat)".format(label)
-    title2 = "### Ref = {}; Dev = {}".format(refstr, devstr)
+        title1 = f"### Global mass (Gg) {label} (Trop only)"
+    title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
     # Print header to file
     print("#" * 83, file=f)
-    print("{}{}".format(title1.ljust(80), "###"), file=f)
-    print("{}{}".format(title2.ljust(80), "###"), file=f)
+    print(f"{title1 : <80}{'###'}", file=f)
+    print(f"{title2 : <80}{'###'}", file=f)
+    print(f"{'###' : <80}{'###'}", file=f)
+    print(f"{diff_str : <80}{'###'}", file=f)
     print("#" * 83, file=f)
-    print(
-        "{}{}{}{}{}".format(
-            " ".ljust(19),
-            "Ref".rjust(20),
-            "Dev".rjust(20),
-            "Dev - Ref".rjust(14),
-            "% diff".rjust(10),
-        ),
-        file=f,
-    )
+    print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
 
     # ==================================================================
     # Print global masses for all species
@@ -521,7 +518,7 @@ def create_global_mass_table(
                 delta_p=met_and_masks["Dev_Delta_P"],
                 box_height=met_and_masks["Dev_BxHeight"],
             )
-
+            
         # ==============================================================
         # Print global masses for Ref and Dev
         # (we will mask out tropospheric boxes in util.print_totals)
@@ -531,13 +528,13 @@ def create_global_mass_table(
                 refarray,
                 devarray,
                 f,
-                masks=met_and_masks,
+                masks=met_and_masks
             )
         else:
             util.print_totals(
                 refarray,
                 devarray,
-                f,
+                f
             )
 
     # ==================================================================
diff --git a/gcpy/util.py b/gcpy/util.py
index 53c3325a..aa46b313 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -270,7 +270,8 @@ def print_totals(
     # ==================================================================
     # Write output to file
     # ==================================================================
-    print(f"{display_name.ljust(14)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
+    print(f"{display_name.ljust(19)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
+
 
 def get_species_categories(
         benchmark_type="FullChemBenchmark"

From d3d5568b17d4cd4f975f93e27144dbcaa6e6204b Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Mon, 6 Feb 2023 16:40:40 -0500
Subject: [PATCH 3/8] Display if Dev and Ref are identical in emissions &
 inventory tables

gcpy/benchmark.py
- In create_benchmark_emission_tables:
  - Make sure columns and headers line up
  - Now compute if each diagnostic for an emissions species or
    inventory is identical, and display that information in the header
  - Improve error trapping when opening the file for output
- In create_benchmark_mass_tables:
  - Make sure columns and headers line up

gcpy/util.py
- In routine print_totals, make sure the header line with Ref & Dev
  matches up with the output of create_benchmark_emissions_tables
  and create_benchmark_mass_tables
- Removed is_zero_diff function, we will add another function like it
  later.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 63 ++++++++++++++++++++++++++---------------------
 gcpy/util.py      | 21 ++--------------
 2 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index d1f7597b..532caac3 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -176,10 +176,11 @@ def create_total_emissions_table(
     # =================================================================
     # Open the file for output
     # =================================================================
+    # Create file
     try:
         f = open(outfilename, "w")
-    except FileNotFoundError:
-        msg = "Could not open {} for writing!".format(outfilename)
+    except (IOError, OSError, FileNotFoundError) as e:
+        msg = f"Could not open {outfilename} for writing!"
         raise FileNotFoundError(msg)
 
     # =================================================================
@@ -220,29 +221,35 @@ def create_total_emissions_table(
 
         # Title strings
         if "Inv" in template:
-            print("Computing inventory totals for {}".format(species_name))
-            title1 = "### Emissions totals for inventory {} [Tg]".format(
-                species_name)
+            print(f"Computing inventory totals for {species_name}")
+            title0 = f"for inventory {species_name}"
+            title1 = f"### Emissions totals {title0} [Tg]"
         else:
-            print("Computing emissions totals for {}".format(species_name))
-            title1 = "### Emissions totals for species {} [Tg]".format(species_name)
-
-        title2 = "### Ref = {}; Dev = {}".format(refstr, devstr)
+            print(f"Computing emissions totals for {species_name}")
+            title0 = f"for species {species_name}"
+            title1 = f"### Emissions totals {title0} [Tg]"
+
+        title2 = f"### Ref = {refstr}; Dev = {devstr}"
+
+        # Determine if the all DataArrays for a given species or
+        # have identical data, and define a display string.
+        diagnames = [v for v in varnames if species_name in v]
+        diff_ct = 0
+        for v in diagnames:
+            if np.array_equal(refdata[v].values, devdata[v].values):
+                diff_ct += 1
+        diff_str = f"### Dev differs from Ref {title0}"
+        if diff_ct == len(diagnames):
+            diff_str = f"### Dev is identical to Ref {title0}"
 
         # Print header to file
-        print("#" * 83, file=f)
-        print("{}{}".format(title1.ljust(80), "###"), file=f)
-        print("{}{}".format(title2.ljust(80), "###"), file=f)
-        print("#" * 83, file=f)
-        print(
-            "{}{}{}{}{}".format(
-                " ".ljust(19),
-                "Ref".rjust(20),
-                "Dev".rjust(20),
-                "Dev - Ref".rjust(14),
-                "% diff".rjust(10),
-            ),
-            file=f)
+        print("#" * 91, file=f)
+        print(f"{title1 : <88}{'###'}", file=f)
+        print(f"{title2 : <88}{'###'}", file=f)
+        print(f"{'###' : <88}{'###'}", file=f)
+        print(f"{diff_str : <88}{'###'}", file=f)
+        print("#" * 91, file=f)
+        print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
 
         # =============================================================
         # Loop over all emissions variables corresponding to this
@@ -449,12 +456,12 @@ def create_global_mass_table(
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
     # Print header to file
-    print("#" * 83, file=f)
-    print(f"{title1 : <80}{'###'}", file=f)
-    print(f"{title2 : <80}{'###'}", file=f)
-    print(f"{'###' : <80}{'###'}", file=f)
-    print(f"{diff_str : <80}{'###'}", file=f)
-    print("#" * 83, file=f)
+    print("#" * 91, file=f)
+    print(f"{title1 : <88}{'###'}", file=f)
+    print(f"{title2 : <88}{'###'}", file=f)
+    print(f"{'###' : <88}{'###'}", file=f)
+    print(f"{diff_str : <88}{'###'}", file=f)
+    print("#" * 91, file=f)
     print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
 
     # ==================================================================
diff --git a/gcpy/util.py b/gcpy/util.py
index aa46b313..0232722b 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -222,7 +222,7 @@ def print_totals(
 
     # Special handling for totals
     if "_TOTAL" in diagnostic_name.upper():
-        print("-"*83, file=f)
+        print("-"*90, file=f)
 
     # ==================================================================
     # Sum the Ref array (or set to NaN if missing)
@@ -270,7 +270,7 @@ def print_totals(
     # ==================================================================
     # Write output to file
     # ==================================================================
-    print(f"{display_name.ljust(19)} : {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
+    print(f"{display_name.ljust(19)}: {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
 
 
 def get_species_categories(
@@ -2117,20 +2117,3 @@ def read_config_file(config_file, quiet=False):
         raise Exception(msg) from err
 
     return config
-
-
-def is_zero_diff(refdr, devdr):
-    """
-    Returns True if two DataArray objects contain identical data,
-    or False otherwise
-
-    Args:
-    -----
-    refdr (xarray DataArray)
-        The "Ref" DataArray object to be tested.
-    devdr (xarray DataArray)
-        The "Dev" DataArray object to be tested
-    """
-    if not np.array_equal(refdr.values, devdr.values):
-        return False
-    return True

From 53b75ad17b61ee907bd56145551aa6901d43f02d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Tue, 7 Feb 2023 15:53:42 -0500
Subject: [PATCH 4/8] Add list of species that differ to top of emissions, mass
 tables

gcpy/benchmark.py
- In routines create_benchmark_mass_tables and
  create_benchmark_emission_tables
  - Add placeholder text for the list of species diffs
  - Define "diff_list" list to hold the list of species names
    for species w/ nonzero diffs
  - Remove references to np.array_equal and xr.dataset.equal
  - Header lines are now 89 chars wide
  - Now pass diff_list to util.print_totals
  - Trim trailing whitespace
- Comment out code in create_benchmark_summary_table for now,
  we will add this functionality in soon.

gcpy/util.py
- Now import textwrap.wrap function
- In function "print_totals"
  - Now accepts and returns "diff_list"
  - Add more error checks
  - Compute total_ref and total_dev in 64-bit floating point
- Added function "unique_values"
- Added function "insert_text_into_file"

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 206 +++++++++++++++++++++++++++++++++++++---------
 gcpy/util.py      | 131 ++++++++++++++++++++++++++---
 2 files changed, 288 insertions(+), 49 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index 532caac3..71ca17d2 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -1,4 +1,4 @@
-"""
+6"""
 Specific utilities for creating plots from GEOS-Chem benchmark simulations.
 """
 
@@ -180,8 +180,16 @@ def create_total_emissions_table(
     try:
         f = open(outfilename, "w")
     except (IOError, OSError, FileNotFoundError) as e:
-        msg = f"Could not open {outfilename} for writing!"
-        raise FileNotFoundError(msg)
+        raise e(f"Could not open {outfilename} for writing!")
+
+    # Write a placeholder to the file that denotes where
+    # the list of species with differences will be written
+    placeholder = "@%% insert diff_list here %%@"
+    print(f"Species that differ between {refstr} and {devstr}", file=f)
+    print(f"{placeholder}\n\n", file=f)
+
+    # Define a list for differences
+    diff_list = []
 
     # =================================================================
     # Loop through all of the species are in species_dict
@@ -231,25 +239,12 @@ def create_total_emissions_table(
 
         title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
-        # Determine if the all DataArrays for a given species or
-        # have identical data, and define a display string.
-        diagnames = [v for v in varnames if species_name in v]
-        diff_ct = 0
-        for v in diagnames:
-            if np.array_equal(refdata[v].values, devdata[v].values):
-                diff_ct += 1
-        diff_str = f"### Dev differs from Ref {title0}"
-        if diff_ct == len(diagnames):
-            diff_str = f"### Dev is identical to Ref {title0}"
-
         # Print header to file
-        print("#" * 91, file=f)
-        print(f"{title1 : <88}{'###'}", file=f)
-        print(f"{title2 : <88}{'###'}", file=f)
-        print(f"{'###' : <88}{'###'}", file=f)
-        print(f"{diff_str : <88}{'###'}", file=f)
-        print("#" * 91, file=f)
-        print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
+        print("#" * 89, file=f)
+        print(f"{title1 : <86}{'###'}", file=f)
+        print(f"{title2 : <86}{'###'}", file=f)
+        print("#" * 89, file=f)
+        print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f)
 
         # =============================================================
         # Loop over all emissions variables corresponding to this
@@ -335,17 +330,30 @@ def create_total_emissions_table(
             # ==========================================================
             # Print emission totals for Ref and Dev
             # ==========================================================
-            util.print_totals(refarray, devarray, f)
+            util.print_totals(
+                refarray,
+                devarray,
+                f,
+                diff_list
+            )
 
         # Add newlines before going to the next species
         print(file=f)
         print(file=f)
 
     # =================================================================
-    # Close file
+    # Cleanup and quit
     # =================================================================
+
+    # Close file
     f.close()
 
+    # Reopen file and insert list of species with nonzero diffs
+    util.insert_text_into_file(
+        filename=outfilename,
+        search_text=placeholder,
+        replace_text=util.unique_values(diff_list, drop=[None])
+    )
 
 def create_global_mass_table(
         refdata,
@@ -444,10 +452,8 @@ def create_global_mass_table(
         msg = f"Could not open {outfilename} for writing!"
         raise FileNotFoundError(msg)
 
-    # Determine if the two data sets are identical
-    diff_str="### Dev differs from Ref"
-    if xr.Dataset.equals(refdata, devdata):
-        diff_str="### Dev is identical to Ref"
+    # Define a list for differences
+    diff_list = []
 
     # Title strings
     title1 = f"### Global mass (Gg) {label} (Trop + Strat)"
@@ -456,18 +462,25 @@ def create_global_mass_table(
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
 
     # Print header to file
-    print("#" * 91, file=f)
-    print(f"{title1 : <88}{'###'}", file=f)
-    print(f"{title2 : <88}{'###'}", file=f)
-    print(f"{'###' : <88}{'###'}", file=f)
-    print(f"{diff_str : <88}{'###'}", file=f)
-    print("#" * 91, file=f)
-    print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'no-diff'}", file=f)
+    print("#" * 89, file=f)
+    print(f"{title1 : <86}{'###'}", file=f)
+    print(f"{title2 : <86}{'###'}", file=f)
+    print("#" * 89, file=f)
+
+    # Write a placeholder to the file that denotes where
+    # the list of species with differences will be written
+    placeholder = "@%% insert diff_list here %%@"
+    print(f"\nSpecies that differ between {refstr} and {devstr}", file=f)
+    print(f"{placeholder}\n\n", file=f)
+
+    # Column headers
+    print(f"{title2}", file=f)
+    print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f)
 
     # ==================================================================
     # Print global masses for all species
     #
-    # NOTE: By this point, all species will be in both Ref and Dev'
+    # NOTE: By this point, all secies will be in both Ref and Dev'
     # because we have added them in the calling routine
     # ==================================================================
     for v in varlist:
@@ -525,7 +538,7 @@ def create_global_mass_table(
                 delta_p=met_and_masks["Dev_Delta_P"],
                 box_height=met_and_masks["Dev_BxHeight"],
             )
-            
+
         # ==============================================================
         # Print global masses for Ref and Dev
         # (we will mask out tropospheric boxes in util.print_totals)
@@ -535,20 +548,31 @@ def create_global_mass_table(
                 refarray,
                 devarray,
                 f,
+                diff_list,
                 masks=met_and_masks
             )
         else:
             util.print_totals(
                 refarray,
                 devarray,
-                f
+                f,
+                diff_list
             )
 
     # ==================================================================
-    # Close files
+    # Cleanup and quit
     # ==================================================================
+
+    # Close file
     f.close()
 
+    # Reopen file and insert list of species with nonzero diffs
+    util.insert_text_into_file(
+        filename=outfilename,
+        search_text=placeholder,
+        replace_text=util.unique_values(diff_list, drop=[None])
+    )
+
 
 def make_benchmark_conc_plots(
         ref,
@@ -4367,3 +4391,109 @@ def get_species_database_dir(config):
     else:
         msg = f"Could not find the {spcdb_dir}/species_database.yml file!"
         raise FileNotFoundError(msg)
+
+
+def create_benchmark_summary_table(
+        refpath,
+        refstr,
+        devpath,
+        devstr,
+        dst="./benchmark",
+        overwrite=False,
+        outfilename="Summary.txt",
+        verbose=False,
+        spcdb_dir=os.path.dirname(__file__)
+):
+    """
+    Creates a table of global masses for a list of species in contained in
+    two data sets.  The data sets,  which typically represent output from two
+    different model versions, are usually contained in netCDF data files.
+
+    Args:
+        refdata: xarray Dataset
+            The first data set to be compared (aka "Reference").
+        refstr: str
+            A string that can be used to identify refdata
+            (e.g. a model v2ersion number or other identifier).
+        devdata: xarray Dataset
+            The second data set to be compared (aka "Development").
+        devstr: str
+            A string that can be used to identify the data set specified
+            by devfile (e.g. a model version number or other identifier).
+        varlist: list of strings
+            List of species concentation variable names to include
+            in the list of global totals.
+        met_and_masks: dict of xarray DataArray
+            Dictionary containing the meterological variables and
+            masks for the Ref and Dev datasets.
+        label: str
+            Label to go in the header string.  Can be used to
+            pass the month & year.
+
+    Keyword Args (optional):
+        trop_only: bool
+            Set this switch to True if you wish to print totals
+            only for the troposphere.
+            Default value: False (i.e. print whole-atmosphere totals).
+        outfilename: str
+            Name of the text file which will contain the table of
+            emissions totals.
+            Default value: "GlobalMass_TropStrat.txt"
+        verbose: bool
+            Set this switch to True if you wish to print out extra
+            informational messages.
+            Default value: False
+        spcdb_dir: str
+            Directory of species_datbase.yml file
+            Default value: Directory of GCPy code repository
+
+    Remarks:
+        This method is mainly intended for model benchmarking purposes,
+        rather than as a general-purpose tool.
+
+        Species properties (such as molecular weights) are read from a
+        YAML file called "species_database.yml".
+    """
+
+    # ==================================================================
+    # Initialization and data read
+    # ==================================================================
+    if os.path.isdir(dst) and not overwrite:
+        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
+            + "files in that directory, if any."
+        msg = msg.format(dst)
+        raise ValueError(msg)
+    if not os.path.isdir(dst):
+        os.mkdir(dst)
+
+#    # ==================================================================
+#    # Close files
+#    # ==================================================================
+#
+#    # Get a list of files in the Ref path
+#    ref_files = []
+#    for (path, names, files) in os.walk(refpath):
+#        for rf in files:
+#            ref_files.append(os.path.join(path, rf))
+#
+#    # Get a list of files in the Ref path
+#    dev_files = []
+#    for (path, names, files) in os.walk(devpath):
+#        for df in files:
+#            dev_files.append(os.path.join(path, df)
+#
+#    # ==================================================================
+#    # Open file for output
+#    # ==================================================================
+#
+#    # Create file
+#    try:
+#        f = open(os.path.join(dst, outfilename), "w")
+#    except (IOError, OSError, FileNotFoundError) as e:
+#        msg = f"Could not open {outfilename} for writing!"
+#        raise e(msg)
+#
+#    # ==================================================================
+#    # Close files
+#    # ==================================================================
+#    f.close()
diff --git a/gcpy/util.py b/gcpy/util.py
index 0232722b..ea1ebba9 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -10,6 +10,7 @@
 import numpy as np
 import xarray as xr
 from PyPDF2 import PdfFileWriter, PdfFileReader
+from textwrap import wrap
 
 def convert_lon(
         data,
@@ -161,7 +162,8 @@ def print_totals(
         ref,
         dev,
         f,
-        masks=None
+        diff_list,
+        masks=None,
 ):
     """
     Computes and prints Ref and Dev totals (as well as the difference
@@ -194,9 +196,11 @@ def print_totals(
 
     # Make sure that both Ref and Dev are xarray DataArray objects
     if not isinstance(ref, xr.DataArray):
-        raise TypeError("The ref argument must be an xarray DataArray!")
+        raise TypeError("The 'ref' argument must be an xarray DataArray!")
     if not isinstance(dev, xr.DataArray):
-        raise TypeError("The dev argument must be an xarray DataArray!")
+        raise TypeError("The 'dev' argument must be an xarray DataArray!")
+    if not isinstance(diff_list, list):
+        raise TypeError("The 'diff_list' argument must be a list!")
 
     # Determine if either Ref or Dev have all NaN values:
     ref_is_all_nan = np.isnan(ref.values).all()
@@ -220,6 +224,12 @@ def print_totals(
     # Create the display name by editing the diagnostic name
     display_name = create_display_name(diagnostic_name)
 
+    # Get the species name from the display name
+    species_name = display_name
+    c = species_name.find(" ")
+    if c > 0:
+        species_name = display_name[0:c]
+
     # Special handling for totals
     if "_TOTAL" in diagnostic_name.upper():
         print("-"*90, file=f)
@@ -233,7 +243,7 @@ def print_totals(
     else:
         if masks is not None:
             refarr = np.ma.masked_array(refarr, masks["Ref_TropMask"])
-        total_ref = np.sum(refarr)
+        total_ref = np.sum(refarr, dtype=np.float64)
 
     # ==================================================================
     # Sum the Dev array (or set to NaN if missing)
@@ -244,17 +254,25 @@ def print_totals(
     else:
         if masks is not None:
             devarr = np.ma.masked_array(devarr, masks["Dev_TropMask"])
-        total_dev = np.sum(devarr)
+        total_dev = np.sum(devarr, dtype=np.float64)
 
     # ==================================================================
     # Compute differences (or set to NaN if missing)
     # ==================================================================
     if ref_is_all_nan or dev_is_all_nan:
         diff = np.nan
-        zero_diff = False
     else:
         diff = total_dev - total_ref
-        zero_diff = np.array_equal(refarr, devarr)
+    has_diffs = abs(diff) > np.float64(0.0)
+
+    # Append to the list of differences.  If no differences then append
+    # None.  Duplicates can be stripped out in the calling routine.
+    if has_diffs:
+        diff_str = " * "
+        diff_list.append(species_name)
+    else:
+        diff_str = ""
+        diff_list.append(None)
 
     # ==================================================================
     # Compute % differences (or set to NaN if missing)
@@ -268,9 +286,11 @@ def print_totals(
             pctdiff = np.nan
 
     # ==================================================================
-    # Write output to file
+    # Write output to file and return
     # ==================================================================
-    print(f"{display_name.ljust(19)}: {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {zero_diff}", file=f)
+    print(f"{display_name.ljust(19)}: {total_ref:18.6f}  {total_dev:18.6f}  {diff:12.6f}  {pctdiff:8.3f}  {diff_str}", file=f)
+
+    return diff_list
 
 
 def get_species_categories(
@@ -706,9 +726,9 @@ def slice_by_lev_and_time(
         raise ValueError(msg)
 
     # NOTE: isel no longer seems to work on a Dataset, so
-    # first createthe DataArray object, then use isel on it. 
+    # first createthe DataArray object, then use isel on it.
     #  -- Bob Yantosca (19 Jan 2023)
-    dr = ds[varname]    
+    dr = ds[varname]
     vdims = dr.dims
     if ("time" in vdims and dr.time.size > 0) and "lev" in vdims:
         if flip:
@@ -2117,3 +2137,92 @@ def read_config_file(config_file, quiet=False):
         raise Exception(msg) from err
 
     return config
+
+
+def unique_values(
+        this_list,
+        drop=None,
+):
+    """
+    Given a list, returns a sorted list of unique values.
+
+    Args:
+    -----
+    this_list : list
+        Input list (may contain duplicate values)
+
+    drop: list of str
+        List of variable names to exclude
+
+    Returns:
+    --------
+    unique: list
+        List of unique values from this_list
+    """
+    if not isinstance(this_list, list):
+        raise ValueError("Argument 'this_list' must be a list object!")
+    if not isinstance(drop, list):
+        raise ValueError("Argument 'drop' must be a list object!")
+
+    unique = list(set(this_list))
+
+    if drop is not None:
+        for d in drop:
+            unique.remove(d)
+
+    unique.sort()
+
+    return unique
+
+
+def insert_text_into_file(
+        filename,
+        search_text,
+        replace_text,
+        width=80
+):
+    """
+    Convenience routine to insert text into a file.  The best way
+    to do this is to read the contents of the file, manipulate the
+    text, and then overwrite the file.
+
+    Args:
+    -----
+    filename: str
+        The file with text to be replaced.
+
+    search_text: str
+        Text string in the file that will be replaced.
+
+    replace_text: str or list of str
+        Text that will replace 'search_text'
+
+    width: int
+        Will "word-wrap" the text in 'replace_text' to this width
+    """
+    if not isinstance(search_text, str):
+        raise ValueError("Argument 'search_text' needs to be a string!")
+    if not isinstance(replace_text, str):
+        if isinstance(replace_text, list):
+            replace_text = ' '.join(replace_text)
+        else:
+            raise ValueError(
+                "Argument 'replace_text' needs to be a list or a string"
+            )
+
+    # Word-wrap the replacement text
+    replace_text = wrap(replace_text, width=width)
+    replace_text = '\n'.join(replace_text)
+
+    with open(filename, "r") as f:
+        filedata = f.read()
+        f.close()
+
+    filedata = filedata.replace(
+        search_text,
+        replace_text
+    )
+
+    with open(filename, "w") as f:
+        f.write(filedata)
+        f.close()

From c27c7d28484ea41257468b8fcbced3e762dc6b3a Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Wed, 8 Feb 2023 16:36:34 -0500
Subject: [PATCH 5/8] Further updates for printing information about diffs btw
 Dev & Ref

benchmark/run_benchmark.py
- Now call "make_benchmark_summary_table" for GCC vs GCC, GCHP vs GCC,
  and GCHP vs GCHP benchmarks.

benchmark/1mo_benchmark.yml
- Added "summary_table" switch under "plot_options"

gcpy/benchmark.py
- Removed typo at top of script
- In create_benchmark_emissions_table and create_global_mass_table:
  - Write an alternate message if the string with species differences
    is longer than ~80 chars
- Added new function "diff_list_to_text"
- Added new function "make_directory"
- Updated "create_benchmrk_summary_table" to call get_filepaths for
  either GCHP or GCC Ref and/or Dev data

gcpy/util.py
- In routine print_totals:
  - Now keep track of which species have differences btw Dev & Ref
    in "diff_list", which is passed to the calling routine
- Added new function "wrap_text"
- Added new function "array_equals"

CHANGELOG.md
- Updated accordingly

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 CHANGELOG.md                |   5 +
 benchmark/1mo_benchmark.yml |   1 +
 benchmark/run_benchmark.py  | 131 ++++++++++++---
 gcpy/benchmark.py           | 311 +++++++++++++++++++++++++++---------
 gcpy/util.py                | 150 +++++++++++++----
 5 files changed, 465 insertions(+), 133 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d414db8..37327b9c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## Unreleased
 
+### Added
+- Benchmark summary table output (intended for 1hr & 1mo benchmarks)
+- Species/emissions/inventories that differ between Dev & Ref versions are now printed at the top of the benchmark emissions, inventory, and global mass tables.  if there are too many species with diffs, an alternate message is printed.
+- New functions in `benchmark.py` and `util.py` to facilitate printing of the species/emissions/inventories that differ between Dev & Ref versions.
+
 ## [1.3.2] -- 2022-10-25
 
 ### Fixes
diff --git a/benchmark/1mo_benchmark.yml b/benchmark/1mo_benchmark.yml
index 4f5f2f67..3a15f591 100644
--- a/benchmark/1mo_benchmark.yml
+++ b/benchmark/1mo_benchmark.yml
@@ -108,6 +108,7 @@ options:
     ops_budget_table: False
     OH_metrics: True
     ste_table: True # GCC only
+    summary_table: True
     plot_options: # Plot concentrations and emissions by category?
       by_spc_cat: True
       by_hco_cat: True
diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py
index db5b2be3..e7c14e92 100755
--- a/benchmark/run_benchmark.py
+++ b/benchmark/run_benchmark.py
@@ -642,29 +642,6 @@ def run_benchmark_default(config):
         if config["options"]["outputs"]["OH_metrics"]:
             print("\n%%% Creating GCC vs. GCC OH metrics table %%%")
 
-            # Use this for benchmarks prior to GEOS-Chem 13.0.0
-            #        # Diagnostic collection files to read
-            #        col  = "ConcAfterChem"
-            #        ref = get_filepath(gcc_vs_gcc_refdir, col, gcc_ref_date)
-            #        dev = get_filepath(gcc_vs_gcc_devdir, col, gcc_dev_date)
-            #
-            #        # Meteorology data needed for calculations
-            #        col = "StateMet"
-            #        refmet = get_filepath(gcc_vs_gcc_refdir, col, gcc_ref_date)
-            #        devmet = get_filepath(gcc_vs_gcc_devdir, col, gcc_dev_date)
-            #
-            #        # Print OH metrics
-            #        bmk.make_benchmark_oh_metrics(
-            #            ref,
-            #            refmet,
-            #            config["data"]["ref"]["gcc"]["version"],
-            #            dev,
-            #            devmet,
-            #            config["data"]["dev"]["gcc"]["version"],
-            #            dst=gcc_vs_gcc_tablesdir,
-            #            overwrite=True
-            #        )
-
             # Filepaths
             ref = get_filepath(gcc_vs_gcc_refdir, "Metrics", gcc_ref_date)
             dev = get_filepath(gcc_vs_gcc_devdir, "Metrics", gcc_dev_date)
@@ -703,6 +680,48 @@ def run_benchmark_default(config):
                     month=gcc_dev_date.astype(datetime).month,
                 )
 
+        # ==================================================================
+        # GCC vs. GCC summary table
+        # ==================================================================
+        if config["options"]["outputs"]["summary_table"]:
+            print("\n%%% Creating GCC vs. GCC summary table %%%")
+
+            # Diagnostic collections to check
+            collections = [
+                'AerosolMass',
+                'Aerosols',
+                'Emissions',
+                'JValues',
+                'Metrics',
+                'SpeciesConc',
+                'StateMet',
+            ]
+
+            # Print summary of which collections are identical
+            # between Ref & Dev, and which are not identical.
+            bmk.create_benchmark_summary_table(
+                gcc_vs_gcc_refdir,
+                config["data"]["ref"]["gcc"]["version"],
+                gcc_ref_date,
+                gcc_vs_gcc_devdir,
+                config["data"]["dev"]["gcc"]["version"],
+                gcc_dev_date,
+                collections = [
+                    'AerosolMass',
+                    'Aerosols',
+                    'Emissions',
+                    'JValues',
+                    'Metrics',
+                    'SpeciesConc',
+                    'StateMet'
+                ],
+                dst=gcc_vs_gcc_tablesdir,
+                outfilename="Summary.txt",
+                overwrite=True,
+                verbose=False,
+            )
+
+
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCC benchmark plots and tables
     #
@@ -1014,6 +1033,38 @@ def run_benchmark_default(config):
             title = "\n%%% Skipping GCHP vs. GCC Strat-Trop Exchange table %%%"
             print(title)
 
+
+        # ==================================================================
+        # GCHP vs. GCC summary table
+        # ==================================================================
+        if config["options"]["outputs"]["summary_table"]:
+            print("\n%%% Creating GCHP vs. GCC summary table %%%")
+
+            # Print summary of which collections are identical
+            # between Ref & Dev, and which are not identical.
+            bmk.create_benchmark_summary_table(
+                gchp_vs_gcc_refdir,
+                config["data"]["dev"]["gcc"]["version"],
+                gcc_dev_date,
+                gchp_vs_gcc_devdir,
+                config["data"]["dev"]["gchp"]["version"],
+                gchp_dev_date,
+                collections=[
+                    'AerosolMass',
+                    'Aerosols',
+                    'Emissions',
+                    'JValues',
+                    'Metrics',
+                    'SpeciesConc',
+                    'StateMet',
+                ],
+                dst=gchp_vs_gcc_tablesdir,
+                outfilename="Summary.txt",
+                overwrite=True,
+                verbose=False,
+                dev_gchp=True
+            )
+
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCHP benchmark plots and tables
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1376,6 +1427,38 @@ def run_benchmark_default(config):
         if config["options"]["outputs"]["ste_table"]:
             print("\n%%% Skipping GCHP vs. GCHP Strat-Trop Exchange table %%%")
 
+        # ==================================================================
+        # GCHP vs. GCHP summary table
+        # ==================================================================
+        if config["options"]["outputs"]["summary_table"]:
+            print("\n%%% Creating GCHP vs. GCHP summary table %%%")
+
+            # Print summary of which collections are identical
+            # between Ref & Dev, and which are not identical.
+            bmk.create_benchmark_summary_table(
+                gchp_vs_gchp_refdir,
+                config["data"]["ref"]["gchp"]["version"],
+                gchp_ref_date,
+                gchp_vs_gchp_devdir,
+                config["data"]["dev"]["gchp"]["version"],
+                gchp_dev_date,
+                collections=[
+                    'AerosolMass',
+                    'Aerosols',
+                    'Emissions',
+                    'JValues',
+                    'Metrics',
+                    'SpeciesConc', 
+                    'StateMet',
+                ],
+                dst=gchp_vs_gchp_tablesdir,
+                outfilename="Summary.txt",
+                overwrite=True,
+                verbose=False,
+                ref_gchp=True,
+                dev_gchp=True,
+            )
+
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     # Create GCHP vs GCC difference of differences benchmark plots
     # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1423,7 +1506,7 @@ def run_benchmark_default(config):
     # ==================================================================
     # Print a message indicating that the benchmarks finished
     # ==================================================================
-    print("\n %%%% All requested benchmark plots/tables created! %%%%")
+    print("\n%%%% All requested benchmark plots/tables created! %%%%")
 
 
 def main():
diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index 71ca17d2..c92bc03b 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -1,4 +1,4 @@
-6"""
+"""
 Specific utilities for creating plots from GEOS-Chem benchmark simulations.
 """
 
@@ -185,7 +185,10 @@ def create_total_emissions_table(
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
     placeholder = "@%% insert diff_list here %%@"
-    print(f"Species that differ between {refstr} and {devstr}", file=f)
+    if "Inv" in template:
+        print(f"Inventories that differ btw {refstr} and {devstr}:", file=f)
+    else:
+        print(f"Species that differ btw {refstr} and {devstr}:", file=f)
     print(f"{placeholder}\n\n", file=f)
 
     # Define a list for differences
@@ -262,7 +265,7 @@ def create_total_emissions_table(
 
             # If no properties are found, then skip to next species
             if species_properties is None:
-                print("No properties found for {} ... skippping".format(spc_name))
+                print(f"No properties found for {spc_name} ... skippping")
                 continue
 
             # Convert units of Ref and Dev and save to numpy ndarray objects
@@ -348,11 +351,12 @@ def create_total_emissions_table(
     # Close file
     f.close()
 
-    # Reopen file and insert list of species with nonzero diffs
+    # Reopen file and replace placeholder with list of diffs
     util.insert_text_into_file(
         filename=outfilename,
         search_text=placeholder,
-        replace_text=util.unique_values(diff_list, drop=[None])
+        replace_text=diff_list_to_text(diff_list),
+        width=90
     )
 
 def create_global_mass_table(
@@ -460,21 +464,23 @@ def create_global_mass_table(
     if trop_only:
         title1 = f"### Global mass (Gg) {label} (Trop only)"
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
+    title3 = f"### Species that differ btw {refstr} and {devstr}:"
+
+    # Write a placeholder to the file that denotes where
+    # the list of species with differences will be written
+    placeholder = "@%% insert diff_list here %%@"
+    title4 = f"{placeholder}"
 
     # Print header to file
     print("#" * 89, file=f)
     print(f"{title1 : <86}{'###'}", file=f)
     print(f"{title2 : <86}{'###'}", file=f)
+    print(f"{'###'  : <86}{'###'}", file=f)
+    print(f"{title3 : <86}{'###'}", file=f)
+    print(f"{placeholder}", file=f)
     print("#" * 89, file=f)
 
-    # Write a placeholder to the file that denotes where
-    # the list of species with differences will be written
-    placeholder = "@%% insert diff_list here %%@"
-    print(f"\nSpecies that differ between {refstr} and {devstr}", file=f)
-    print(f"{placeholder}\n\n", file=f)
-
     # Column headers
-    print(f"{title2}", file=f)
     print(f"{'' : <19}{'Ref' : >20}{'Dev' : >20}{'Dev - Ref' : >14}{'% diff' : >10} {'diffs'}", file=f)
 
     # ==================================================================
@@ -566,11 +572,15 @@ def create_global_mass_table(
     # Close file
     f.close()
 
-    # Reopen file and insert list of species with nonzero diffs
+    # Reopen file and replace placeholder text by diff_text
     util.insert_text_into_file(
         filename=outfilename,
         search_text=placeholder,
-        replace_text=util.unique_values(diff_list, drop=[None])
+        replace_text=diff_list_to_text(
+            diff_list,
+            fancy_format=True
+        ),
+        width=100  # Force it not to wrap
     )
 
 
@@ -4396,49 +4406,61 @@ def get_species_database_dir(config):
 def create_benchmark_summary_table(
         refpath,
         refstr,
+        refdate,
         devpath,
         devstr,
+        devdate,
+        collections,
         dst="./benchmark",
         overwrite=False,
         outfilename="Summary.txt",
         verbose=False,
-        spcdb_dir=os.path.dirname(__file__)
+        spcdb_dir=os.path.dirname(__file__),
+        ref_gchp=False,
+        dev_gchp=False
 ):
     """
-    Creates a table of global masses for a list of species in contained in
-    two data sets.  The data sets,  which typically represent output from two
-    different model versions, are usually contained in netCDF data files.
+    Creates a benchmark summary table that shows which data collections
+    have difference.  Useful for scanning the 1-hr and 1-month benchmark
+    outputs.
 
     Args:
-        refdata: xarray Dataset
-            The first data set to be compared (aka "Reference").
+        refpath: str
+            Path to the first data set to be compared (aka "Ref").
         refstr: str
             A string that can be used to identify refdata
-            (e.g. a model v2ersion number or other identifier).
-        devdata: xarray Dataset
-            The second data set to be compared (aka "Development").
+            (e.g. a model version number or other identifier).
+        refdate: np.datetime64
+            Date/time stamp used by the "Ref" data files.
+        ref_gchp: bool
+            Set to True if the "Ref" data comes from a GCHP run.
+            Default value: False
+        devpath: str
+            Path to the second data set to be compared (aka "Dev").
         devstr: str
             A string that can be used to identify the data set specified
             by devfile (e.g. a model version number or other identifier).
-        varlist: list of strings
-            List of species concentation variable names to include
-            in the list of global totals.
-        met_and_masks: dict of xarray DataArray
-            Dictionary containing the meterological variables and
-            masks for the Ref and Dev datasets.
-        label: str
-            Label to go in the header string.  Can be used to
-            pass the month & year.
+        dev_gchp: bool
+            Set to True if the "Ref" data comes from a GCHP run.
+            Default value: False
+        devdate: np.datetime64
+            Date/time stamp used by the "Dev" data files.
+        collections: list of strings
+            List of diagnostic collections to examine.
 
     Keyword Args (optional):
-        trop_only: bool
-            Set this switch to True if you wish to print totals
-            only for the troposphere.
-            Default value: False (i.e. print whole-atmosphere totals).
+        dst: str
+            A string denoting the destination folder where the file
+            containing emissions totals will be written.
+            Default value: "./benchmark"
+        overwrite: bool
+            Set this flag to True to overwrite files in the
+            destination folder (specified by the dst argument).
+            Default value: False
         outfilename: str
             Name of the text file which will contain the table of
             emissions totals.
-            Default value: "GlobalMass_TropStrat.txt"
+            Default value: "Summary.txt"
         verbose: bool
             Set this switch to True if you wish to print out extra
             informational messages.
@@ -4456,44 +4478,183 @@ def create_benchmark_summary_table(
     """
 
     # ==================================================================
-    # Initialization and data read
+    # Open file for output
     # ==================================================================
-    if os.path.isdir(dst) and not overwrite:
-        msg = "Directory {} exists. Pass overwrite=True to overwrite " \
-            + "files in that directory, if any."
-        msg = msg.format(dst)
+
+    # Create the directory for output
+    make_directory(dst, overwrite)
+
+    # Create file
+    try:
+        f = open(os.path.join(dst, outfilename), "w")
+    except (IOError, OSError, FileNotFoundError) as e:
+        msg = f"Could not open {outfilename} for writing!"
+        raise e(msg)
+
+    # Title strings
+    title1 = f"### Benchmark summary table"
+    title2 = f"### Ref = {refstr}; Dev = {devstr}"
+
+    # Write a placeholder to the file that denotes where
+    # the list of species with differences will be written
+    placeholder = "@%% insert diff_list here %%@"
+    title4 = f"{placeholder}"
+
+    # Print header to file
+    print("#" * 80, file=f)
+    print(f"{title1 : <77}{'###'}", file=f)
+    print(f"{title2 : <77}{'###'}", file=f)
+    print("#" * 80, file=f)
+    print(file=f)
+
+    # ==================================================================
+    # Read data and look differences btw Ref & Dev versions
+    # ==================================================================
+
+    # Variables to skip
+    skip_vars = gcon.skip_these_vars
+    skip_vars.append("corner_lats")
+    skip_vars.append("corner_lons")
+
+    # Pick the proper function to read the data
+    reader = util.dataset_reader(
+        multi_files=False,
+        verbose=verbose
+    )
+
+    # Make a directory to store the list of species that differ
+    diff_dict = {}
+
+    # Loop over diagnostic files
+    for col in collections:
+
+        # Read Ref data
+        refdata = reader(
+            util.get_filepath(
+                refpath,
+                col,
+                refdate,
+                is_gchp=ref_gchp
+            ),
+            drop_variables=skip_vars
+        ).load()
+
+        # Get Dev data
+        devdata = reader(
+            util.get_filepath(
+                devpath,
+                col,
+                devdate,
+                is_gchp=dev_gchp
+            ),
+            drop_variables=skip_vars
+        ).load()
+
+        # Make sure that Ref and Dev datasets have the same variables.
+        # Variables that are in Ref but not in Dev will be added to Dev
+        # with all missing values (NaNs). And vice-versa.
+        [refdata, devdata] = util.add_missing_variables(
+            refdata,
+            devdata
+        )
+
+        # Find all common variables between the two datasets
+        vardict = util.compare_varnames(
+            refdata,
+            devdata,
+            quiet=True
+        )
+
+        # List of differences for this collection
+        diff_list = []
+
+        # Keep track of which variables are different
+        # Loop over the common variables
+        for v in vardict["commonvarsData"]:
+            if not util.array_equals(refdata[v], devdata[v]):
+                diff_list.append(v)
+
+        # Drop duplicate values from diff_list
+        diff_list = util.unique_values(diff_list, drop=[None])
+
+        if len(diff_list) == 0:
+            print("-" *  79, file=f)
+            print(f"{col}: {devstr} is identical to {refstr}", file=f)
+            print(file=f)
+        else:
+            c = 0
+            print("-" *  79, file=f)
+            print(f"{col}: {devstr} differs from {refstr}", file=f)
+            print("\n  Diagnostics that differ", file=f)
+            for i, v in enumerate(diff_list):
+                print(f"    {v}", file=f)
+                if i > 10:
+                    print(f"    ... and {len(diff_list) - 10} others", file=f)
+                    break
+            print(file=f)
+
+    # ==================================================================
+    # Close files
+    # ==================================================================
+    f.close()
+
+
+def diff_list_to_text(
+        diff_list,
+        fancy_format=False
+):
+    """
+    Converts a list of species/emissions/inventories/diagnostics that
+    show differences between GEOS-Chem versions ot a printable text
+    string.
+
+    Args:
+    -----
+    diff_list : list
+        List to be converted into text.  "None" values will be dropped.
+    fancy_format: bool
+        Set to True if you wish output text to be bookended with '###'.
+
+    Returns:
+    diff_text : str
+        String with concatenated list values.
+    """
+    if not isinstance(diff_list, list):
+        raise ValueError("Argument 'diff_list' must be a list!")
+
+    # Strip out duplicates from diff_list
+    # Prepare a message about species differences (or alternate msg)
+    diff_list = util.unique_values(diff_list, drop=[None])
+    diff_text = util.wrap_text(diff_list, width=85)
+    if len(diff_text) > 85:
+        diff_text = "... Too many diffs to print (see below for details)"
+
+    if fancy_format:
+        diff_text = f"### {diff_text : <82}{'###'}"
+
+    return diff_text.strip()
+
+
+def make_directory(
+        dir_name,
+        overwrite
+):
+    """
+    Creates a directory where benchmark plots/tables will be placed.
+
+    Args:
+    -----
+    dir_name : str
+        Name of the directory to be created.
+    overwrite : bool
+        Set to True if you wish to overwrite prior contents in
+        the directory 'dir_name'
+    """
+
+    if os.path.isdir(dir_name) and not overwrite:
+        msg = f"Directory {dir_name} exists!\n"
+        msg += "Pass overwrite=True to overwrite files in that directory."
         raise ValueError(msg)
-    if not os.path.isdir(dst):
-        os.mkdir(dst)
 
-#    # ==================================================================
-#    # Close files
-#    # ==================================================================
-#
-#    # Get a list of files in the Ref path
-#    ref_files = []
-#    for (path, names, files) in os.walk(refpath):
-#        for rf in files:
-#            ref_files.append(os.path.join(path, rf))
-#
-#    # Get a list of files in the Ref path
-#    dev_files = []
-#    for (path, names, files) in os.walk(devpath):
-#        for df in files:
-#            dev_files.append(os.path.join(path, df)
-#
-#    # ==================================================================
-#    # Open file for output
-#    # ==================================================================
-#
-#    # Create file
-#    try:
-#        f = open(os.path.join(dst, outfilename), "w")
-#    except (IOError, OSError, FileNotFoundError) as e:
-#        msg = f"Could not open {outfilename} for writing!"
-#        raise e(msg)
-#
-#    # ==================================================================
-#    # Close files
-#    # ==================================================================
-#    f.close()
+    if not os.path.isdir(dir_name):
+        os.mkdir(dir_name)
diff --git a/gcpy/util.py b/gcpy/util.py
index ea1ebba9..e6a85033 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -842,6 +842,8 @@ def compare_varnames(
             commonvars3D     List of variables that are common to
                              refdata and devdata, and that have lat,
                              lon, and level dimensions.
+            commonvarsData   List of all commmon 2D or 3D data variables,
+                             excluding index variables.
             refonly          List of 2D or 3D variables that are only
                              present in refdata.
             devonly          List of 2D or 3D variables that are only
@@ -854,30 +856,36 @@ def compare_varnames(
     devonly = [v for v in devvars if v not in refvars]
     dimmismatch = [v for v in commonvars if refdata[v].ndim != devdata[v].ndim]
     commonvarsOther = [
-        v
-        for v in commonvars
-        if (
+        v for v in commonvars if (
+          (
             ("lat" not in refdata[v].dims or "Xdim" not in refdata[v].dims)
-            and ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims)
-            and ("lev" not in refdata[v].dims)
+            and
+            ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims)
+            and
+            ("lev" not in refdata[v].dims)
+          )
+          or
+          (
+            ("hyam" in v or "hybm" in v)  # Omit these from plottable data
+          )
         )
     ]
     commonvars2D = [
-        v
-        for v in commonvars
-        if (
-            ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
-            and ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
-            and ("lev" not in refdata[v].dims)
+        v for v in commonvars if (
+          ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
+          and
+          ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
+          and
+          ("lev" not in refdata[v].dims)
         )
     ]
     commonvars3D = [
-        v
-        for v in commonvars
-        if (
-            ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
-            and ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
-            and ("lev" in refdata[v].dims)
+        v for v in commonvars if (
+          ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
+          and
+          ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
+          and
+          ("lev" in refdata[v].dims)
         )
     ]
 
@@ -903,18 +911,20 @@ def compare_varnames(
                     print("All variables have same dimensions in ref and dev")
 
     # For safety's sake, remove the 0-D and 1-D variables from
-    # refonly and devonly.  This will ensure that refonly and
-    # devonly will only contain variables that can be plotted.
+    # commonvarsData, refonly, and devonly.  This will ensure that
+    # these lists will only contain variables that can be plotted.
+    commonvarsData = [v for v in commonvars if v not in commonvarsOther]
     refonly = [v for v in refonly if v not in commonvarsOther]
     devonly = [v for v in devonly if v not in commonvarsOther]
 
     return {
         "commonvars": commonvars,
-        "commonvarsOther": commonvarsOther,
         "commonvars2D": commonvars2D,
         "commonvars3D": commonvars3D,
+        "commonvarsData": commonvarsData,
+        "commonvarsOther": commonvarsOther,
         "refonly": refonly,
-        "devonly": devonly,
+        "devonly": devonly
     }
 
 
@@ -2168,13 +2178,46 @@ def unique_values(
 
     if drop is not None:
         for d in drop:
-            unique.remove(d)
+            if d in unique:
+                unique.remove(d)
 
     unique.sort()
 
     return unique
 
 
+def wrap_text(
+        text,
+        width=80
+):
+    """
+    Wraps text so that it fits within a certain line width.
+
+    Args:
+    -----
+    text: str or list of str
+        Input text to be word-wrapped.
+    width: int
+        Line width, in characters.
+        Default value: 80
+
+    Returns:
+    --------
+    Original text reformatted so that it fits within lines
+    of 'width' characters or less.
+    """
+    if not isinstance(text, str):
+        if isinstance(text, list):
+            text = ' '.join(text)  # List -> str conversion
+        else:
+            raise ValueError("Argument 'text' must be either str or list!")
+
+    text = wrap(text, width=width)
+    text = '\n'.join(text)
+
+    return text
+
+
 def insert_text_into_file(
         filename,
         search_text,
@@ -2190,29 +2233,27 @@ def insert_text_into_file(
     -----
     filename: str
         The file with text to be replaced.
-
     search_text: str
         Text string in the file that will be replaced.
-
     replace_text: str or list of str
         Text that will replace 'search_text'
-
     width: int
         Will "word-wrap" the text in 'replace_text' to this width
     """
     if not isinstance(search_text, str):
         raise ValueError("Argument 'search_text' needs to be a string!")
-    if not isinstance(replace_text, str):
-        if isinstance(replace_text, list):
-            replace_text = ' '.join(replace_text)
-        else:
-            raise ValueError(
-                "Argument 'replace_text' needs to be a list or a string"
-            )
+    if not isinstance(replace_text, str) and \
+       not isinstance(replace_text, list):
+        raise ValueError(
+            "Argument 'replace_text' needs to be a list or a string"
+        )
 
     # Word-wrap the replacement text
-    replace_text = wrap(replace_text, width=width)
-    replace_text = '\n'.join(replace_text)
+    # (does list -> str conversion if necessary)
+    replace_text = wrap_text(
+        replace_text,
+        width=width
+    )
 
     with open(filename, "r") as f:
         filedata = f.read()
@@ -2226,3 +2267,44 @@ def insert_text_into_file(
     with open(filename, "w") as f:
         f.write(filedata)
         f.close()
+
+
+def array_equals(
+        refdata,
+        devdata
+):
+    """
+    Tests two arrays for equality.  Useful for checking which
+    species have nonzero differences in benchmark output.
+
+    Args:
+    -----
+    refdata: xarray DataArray or numpy ndarray
+        The first array to be checked.
+    devdata: xarray DataArray or numpy ndarray
+        The second array to be checked.
+
+    Returns:
+    --------
+    True if both arrays are equal; False if not
+    """
+    if not isinstance(refdata, np.ndarray):
+        if isinstance(refdata, xr.DataArray):
+            refdata = refdata.values
+        else:
+            raise ValueError(
+            "Argument 'refdata' must be an xarray DataArray or numpy ndarray!"
+            )
+    if not isinstance(devdata, np.ndarray):
+        if isinstance(devdata, xr.DataArray):
+            devdata = devdata.values
+        else:
+            raise ValueError(
+            "Argument 'devdata' must be an xarray DataArray or numpy ndarray!"
+            )
+
+    # This method will work if the arrays hve different dimensions
+    # but an element-by-element search will not!
+    refsum = np.sum(refdata, dtype=np.float64)
+    devsum = np.sum(devdata, dtype=np.float64)
+    return np.abs(devsum - refsum) > np.float64(0.0)

From af33f24d71d20e9b72c61a5d1dbcddf6982c8629 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 13:08:19 -0500
Subject: [PATCH 6/8] Rework logic in compare_varnames

gcpy.util.py
- In function compare_varnames
  - Add variable names to commonvarsData if they have lon/Xdim or
    lat/Ydim coordinates.  Plottable data variables need at least
    these dimensions.
  - commonvarsOther are variables that are not in commonvarsData
  - commonvars2D and commonvars3D are plottable data variables
    that either do not have lev/ilev dimensions, or do have them.

This fixes an issue where 2D plottable data variables were getting
lumped together with the index variables.

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/util.py | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/gcpy/util.py b/gcpy/util.py
index e6a85033..4b765496 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -843,7 +843,8 @@ def compare_varnames(
                              refdata and devdata, and that have lat,
                              lon, and level dimensions.
             commonvarsData   List of all commmon 2D or 3D data variables,
-                             excluding index variables.
+                             excluding index variables.  This is the
+                             list of "plottable" variables.
             refonly          List of 2D or 3D variables that are only
                              present in refdata.
             devonly          List of 2D or 3D variables that are only
@@ -855,37 +856,29 @@ def compare_varnames(
     refonly = [v for v in refvars if v not in devvars]
     devonly = [v for v in devvars if v not in refvars]
     dimmismatch = [v for v in commonvars if refdata[v].ndim != devdata[v].ndim]
-    commonvarsOther = [
+    # Assume plottable data has lon and lat
+    # This is OK for purposes of benchmarking
+    #  -- Bob Yantosca (09 Feb 2023)
+    commonvarsData = [
         v for v in commonvars if (
-          (
-            ("lat" not in refdata[v].dims or "Xdim" not in refdata[v].dims)
-            and
-            ("lon" not in refdata[v].dims or "Ydim" not in refdata[v].dims)
+            ("lat" in refdata[v].dims or "Ydim" in refdata[v].dims)
             and
-            ("lev" not in refdata[v].dims)
-          )
-          or
-          (
-            ("hyam" in v or "hybm" in v)  # Omit these from plottable data
-          )
+            ("lon" in refdata[v].dims or "Xdim" in refdata[v].dims)
         )
+    ]        
+    commonvarsOther = [
+        v for v in commonvars if (
+           v not in commonvarsData
+        )    
     ]
     commonvars2D = [
         v for v in commonvars if (
-          ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
-          and
-          ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
-          and
-          ("lev" not in refdata[v].dims)
+            (v in commonvarsData) and ("lev" not in refdata[v].dims)
         )
     ]
     commonvars3D = [
         v for v in commonvars if (
-          ("lat" in refdata[v].dims or "Xdim" in refdata[v].dims)
-          and
-          ("lon" in refdata[v].dims or "Ydim" in refdata[v].dims)
-          and
-          ("lev" in refdata[v].dims)
+            (v in commonvarsData) and ("lev" in refdata[v].dims)
         )
     ]
 

From b1b9295d1616e910f2ef76ef353f74dc3a2e0f81 Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 13:55:46 -0500
Subject: [PATCH 7/8] Fixed incorrect logic in util.array_equals; updates in
 benchmark.py

gcpy/util.py
- In routine array_equals:
  - Add dtype parameter to select numeric type for the comparison
  - Return the inverse of ABS(devsum-refsum) > 0, since this is the
    condition for not equals.

gcpy/benchmark.py
- Skip reading "AREA" when computing the summary table
- Pass dtype=np.float32 to util.array_equals

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 12 ++++++++----
 gcpy/util.py      | 12 ++++++++----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index c92bc03b..fc5c07c9 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -4513,8 +4513,7 @@ def create_benchmark_summary_table(
 
     # Variables to skip
     skip_vars = gcon.skip_these_vars
-    skip_vars.append("corner_lats")
-    skip_vars.append("corner_lons")
+    skip_vars.append("AREA")
 
     # Pick the proper function to read the data
     reader = util.dataset_reader(
@@ -4569,9 +4568,14 @@ def create_benchmark_summary_table(
         diff_list = []
 
         # Keep track of which variables are different
-        # Loop over the common variables
+        # NOTE: Use 32-point float for comparisons since this is
+        # the precision used for History diagnostics.
         for v in vardict["commonvarsData"]:
-            if not util.array_equals(refdata[v], devdata[v]):
+            if not util.array_equals(
+                    refdata[v],
+                    devdata[v],
+                    dtype=np.float32
+            ):
                 diff_list.append(v)
 
         # Drop duplicate values from diff_list
diff --git a/gcpy/util.py b/gcpy/util.py
index 4b765496..d9590f9c 100644
--- a/gcpy/util.py
+++ b/gcpy/util.py
@@ -2264,7 +2264,8 @@ def insert_text_into_file(
 
 def array_equals(
         refdata,
-        devdata
+        devdata,
+        dtype=np.float64
 ):
     """
     Tests two arrays for equality.  Useful for checking which
@@ -2276,6 +2277,9 @@ def array_equals(
         The first array to be checked.
     devdata: xarray DataArray or numpy ndarray
         The second array to be checked.
+    dtype : np.float32 or np.float64
+        The precision that will be used to make the evaluation.
+        Default: np.float64
 
     Returns:
     --------
@@ -2298,6 +2302,6 @@ def array_equals(
 
     # This method will work if the arrays hve different dimensions
     # but an element-by-element search will not!
-    refsum = np.sum(refdata, dtype=np.float64)
-    devsum = np.sum(devdata, dtype=np.float64)
-    return np.abs(devsum - refsum) > np.float64(0.0)
+    refsum = np.nansum(refdata, dtype=dtype)
+    devsum = np.nansum(devdata, dtype=dtype)
+    return (not np.abs(devsum - refsum) > dtype(0.0))

From 0155dd0c669d2c6112f8666d4324bb6685a00a3d Mon Sep 17 00:00:00 2001
From: Bob Yantosca <yantosca@seas.harvard.edu>
Date: Thu, 9 Feb 2023 14:27:14 -0500
Subject: [PATCH 8/8] Update the messages at the top of tables to just show one
 line

gcpy/benchmark.py:
- Updated routine diff_list_to_text so that it also accepts
  refstr & devstr args.  It now returns either line saying
  "dev & ref are identical" or "dev & ref show X differences"
- Now pass devstr and refstr in calls to diff_list_to_text
- Trimmed whitespace

Signed-off-by: Bob Yantosca <yantosca@seas.harvard.edu>
---
 gcpy/benchmark.py | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/gcpy/benchmark.py b/gcpy/benchmark.py
index fc5c07c9..89735343 100644
--- a/gcpy/benchmark.py
+++ b/gcpy/benchmark.py
@@ -184,11 +184,7 @@ def create_total_emissions_table(
 
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
-    placeholder = "@%% insert diff_list here %%@"
-    if "Inv" in template:
-        print(f"Inventories that differ btw {refstr} and {devstr}:", file=f)
-    else:
-        print(f"Species that differ btw {refstr} and {devstr}:", file=f)
+    placeholder = "@%% insert diff status here %%@"
     print(f"{placeholder}\n\n", file=f)
 
     # Define a list for differences
@@ -355,7 +351,10 @@ def create_total_emissions_table(
     util.insert_text_into_file(
         filename=outfilename,
         search_text=placeholder,
-        replace_text=diff_list_to_text(diff_list),
+        replace_text=diff_list_to_text(
+            refstr,
+            devstr,
+            diff_list),
         width=90
     )
 
@@ -464,19 +463,16 @@ def create_global_mass_table(
     if trop_only:
         title1 = f"### Global mass (Gg) {label} (Trop only)"
     title2 = f"### Ref = {refstr}; Dev = {devstr}"
-    title3 = f"### Species that differ btw {refstr} and {devstr}:"
 
     # Write a placeholder to the file that denotes where
     # the list of species with differences will be written
-    placeholder = "@%% insert diff_list here %%@"
-    title4 = f"{placeholder}"
+    placeholder = "@%% insert diff status here %%@"
 
     # Print header to file
     print("#" * 89, file=f)
     print(f"{title1 : <86}{'###'}", file=f)
     print(f"{title2 : <86}{'###'}", file=f)
     print(f"{'###'  : <86}{'###'}", file=f)
-    print(f"{title3 : <86}{'###'}", file=f)
     print(f"{placeholder}", file=f)
     print("#" * 89, file=f)
 
@@ -577,6 +573,8 @@ def create_global_mass_table(
         filename=outfilename,
         search_text=placeholder,
         replace_text=diff_list_to_text(
+            refstr,
+            devstr,
             diff_list,
             fancy_format=True
         ),
@@ -4604,6 +4602,8 @@ def create_benchmark_summary_table(
 
 
 def diff_list_to_text(
+        refstr,
+        devstr,
         diff_list,
         fancy_format=False
 ):
@@ -4629,13 +4629,21 @@ def diff_list_to_text(
     # Strip out duplicates from diff_list
     # Prepare a message about species differences (or alternate msg)
     diff_list = util.unique_values(diff_list, drop=[None])
-    diff_text = util.wrap_text(diff_list, width=85)
-    if len(diff_text) > 85:
-        diff_text = "... Too many diffs to print (see below for details)"
 
+    # Print the text
+    n_diff = len(diff_list)
+    if n_diff > 0:
+        diff_text = f"{devstr} and {refstr} show {n_diff} differences"
+    else:
+        diff_text = f"{devstr} and {refstr} are identical"
+    diff_text = util.wrap_text(
+        diff_text,
+        width=83
+    )
+        
     if fancy_format:
         diff_text = f"### {diff_text : <82}{'###'}"
-
+        
     return diff_text.strip()