From fc4bee8ee9ad1a632c43d2995578bd00f731d9b1 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Wed, 29 Jan 2025 09:33:24 +0000
Subject: [PATCH 01/18] eia_opendata_imports

---
 scripts/us_eia/opendata/README.md             |  52 +++-
 scripts/us_eia/opendata/download_bulk.py      |  61 ----
 .../us_eia/opendata/generate_jsonl_for_bq.py  |  33 +--
 scripts/us_eia/opendata/manifest.json         | 132 +++++++++
 scripts/us_eia/opendata/process.py            | 108 +++++++
 scripts/us_eia/opendata/process/README.md     |  11 +-
 scripts/us_eia/opendata/process/category.py   |   6 +-
 scripts/us_eia/opendata/process/coal.py       |  14 +-
 scripts/us_eia/opendata/process/common.py     | 271 +++++++++++++++---
 scripts/us_eia/opendata/process/elec.py       |  23 +-
 scripts/us_eia/opendata/process/nuclear.py    |  10 +-
 scripts/us_eia/opendata/process/pet.py        |   2 +-
 .../opendata/process/test_data/categories.csv |  10 +-
 .../process/test_data/categories.tmcf         |   1 +
 .../opendata/process/test_data/coal.csv       |  24 +-
 .../opendata/process/test_data/coal.tmcf      |   1 +
 .../opendata/process/test_data/elec.csv       |  20 +-
 .../opendata/process/test_data/elec.tmcf      |   1 +
 .../opendata/process/test_data/intl.csv       |  14 +-
 .../opendata/process/test_data/intl.tmcf      |   1 +
 .../us_eia/opendata/process/test_data/ng.csv  |  24 +-
 .../us_eia/opendata/process/test_data/ng.tmcf |   1 +
 .../opendata/process/test_data/nuc_status.csv |  26 +-
 .../process/test_data/nuc_status.tmcf         |   1 +
 .../us_eia/opendata/process/test_data/pet.csv |  16 +-
 .../opendata/process/test_data/pet.tmcf       |   1 +
 .../opendata/process/test_data/seds.csv       |  10 +-
 .../opendata/process/test_data/seds.tmcf      |   1 +
 .../opendata/process/test_data/total.csv      |   6 +-
 .../opendata/process/test_data/total.tmcf     |   1 +
 30 files changed, 644 insertions(+), 238 deletions(-)
 delete mode 100644 scripts/us_eia/opendata/download_bulk.py
 create mode 100644 scripts/us_eia/opendata/manifest.json
 create mode 100644 scripts/us_eia/opendata/process.py

diff --git a/scripts/us_eia/opendata/README.md b/scripts/us_eia/opendata/README.md
index f61b576b76..88dc7f38f7 100644
--- a/scripts/us_eia/opendata/README.md
+++ b/scripts/us_eia/opendata/README.md
@@ -6,12 +6,6 @@
 
 Each dataset available as a Zip-file of JSONL content. See [here](https://www.eia.gov/opendata/bulkfiles.php) for more details.
 
-To download the latest versions of ALL datasets available, run the following command. Files will be downloaded and extracted to a tmp_raw_data folder.
-
-```bash
-python3 download_bulk.py
-```
-
 ### Data Exploration
 
 To ease analysis of the datasets, see [`generate_jsonl_for_bq.py`](generate_jsonl_for_bq.py) for instructions to convert and import the data into BigQuery.
@@ -20,11 +14,47 @@ To ease analysis of the datasets, see [`generate_jsonl_for_bq.py`](generate_json
 
 This dataset is available for public use, license is available at https://www.eia.gov/about/copyrights_reuse.php
 
-### Import procedure
 
-- Download data 
+- Run the [processor](process/README.md)
+
+### Downloading and Processing Data
+
+
+    If you want to perform "only download", run the below command:
+
+        python3 process.py --dataset=INTL --mode=download
+        python3 process.py --dataset=ELEC --mode=download
+        python3 process.py --dataset=COAL --mode=download
+        python3 process.py --dataset=PET --mode=download
+        python3 process.py --dataset=NG --mode=download
+        python3 process.py --dataset=SEDS --mode=download
+        python3 process.py --dataset=NUC_STATUS --mode=download
+        python3 process.py --dataset=TOTAL --mode=download
+
+
+
+   If you want to perform "only process", run the below command:
+
+   Running this command generates input_fles and csv, mcf, tmcf, svg.mcf files.
+
+        python3 process.py --dataset=INTL --mode=process
+        python3 process.py --dataset=ELEC --mode=process
+        python3 process.py --dataset=COAL --mode=process
+        python3 process.py --dataset=PET --mode=process
+        python3 process.py --dataset=NG --mode=process
+        python3 process.py --dataset=SEDS --mode=process
+        python3 process.py --dataset=NUC_STATUS --mode=process
+        python3 process.py --dataset=TOTAL --mode=process
+        
+    To Download and process the data together, run the below command:
     ```bash
-    python3 download_bulk.py
-    ```
+    python3 process.py --dataset=TOTAL
+    python3 process.py --dataset=INTL
+    python3 process.py --dataset=ELEC
+    python3 process.py --dataset=COAL
+    python3 process.py --dataset=NG
+    python3 process.py --dataset=PET
+    python3 process.py --dataset=SEDS
+    python3 process.py --dataset=NUC_STATUS
 
-- Run the [processor](process/README.md)
\ No newline at end of file
+    ```
diff --git a/scripts/us_eia/opendata/download_bulk.py b/scripts/us_eia/opendata/download_bulk.py
deleted file mode 100644
index 554187e9b6..0000000000
--- a/scripts/us_eia/opendata/download_bulk.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2021 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Utility to download all EIA data from https://api.eia.gov/bulk/manifest.txt
-Files are stored in raw_data.
-
-Run this script in this folder:
-python3 download_bulk.py
-"""
-
-import io
-import zipfile
-
-import requests
-
-from absl import flags
-from absl import app
-
-MANIFEST_URL = "https://api.eia.gov/bulk/manifest.txt"
-
-FLAGS = flags.FLAGS
-flags.DEFINE_string('data_dir', 'tmp_raw_data', 'Data dir to download into')
-flags.DEFINE_list('datasets', [], 'Datasets to download. Everything, if empty.')
-
-
-def download_file(url: str, save_path: str):
-    print(f'Downloading {url} to {save_path}')
-    r = requests.get(url, stream=True)
-    z = zipfile.ZipFile(io.BytesIO(r.content))
-    z.extractall(save_path)
-
-
-def download_manifest():
-    return requests.get(MANIFEST_URL).json()
-
-
-def main(_):
-    assert FLAGS.data_dir
-    manifest_json = download_manifest()
-    datasets = manifest_json.get('dataset', {})
-    for dataset_name in datasets:
-        if FLAGS.datasets and dataset_name not in FLAGS.datasets:
-            continue
-        print(dataset_name)
-        dataset = datasets[dataset_name]
-        download_file(dataset['accessURL'], f'{FLAGS.data_dir}/{dataset_name}')
-
-
-if __name__ == '__main__':
-    app.run(main)
diff --git a/scripts/us_eia/opendata/generate_jsonl_for_bq.py b/scripts/us_eia/opendata/generate_jsonl_for_bq.py
index 27227de946..af1579277c 100644
--- a/scripts/us_eia/opendata/generate_jsonl_for_bq.py
+++ b/scripts/us_eia/opendata/generate_jsonl_for_bq.py
@@ -40,10 +40,9 @@
 IN_DATA_PATH = 'tmp_raw_data'
 OUT_DATA_PATH = 'tmp_bq_import'
 DATASETS = [
-    'AEO.2014', 'AEO.2015', 'AEO.2016', 'AEO.2017', 'AEO.2018', 'AEO.2019',
-    'AEO.2020', 'AEO.2021', 'COAL', 'EBA', 'ELEC', 'EMISS', 'IEO.2017',
-    'IEO.2019', 'INTL', 'NG', 'NUC_STATUS', 'PET', 'PET_IMPORTS', 'SEDS',
-    'STEO', 'TOTAL'
+    'AEO.2020', 'AEO.2021', 'AEO.2022', 'AEO.2023', 'AEO.IEO2', 'COAL', 'EBA',
+    'ELEC', 'EMISS', 'IEO', 'INTL', 'NG', 'NUC_STATUS', 'PET', 'PET_IMPORTS',
+    'SEDS', 'STEO', 'TOTAL'
 ]
 
 
@@ -77,17 +76,18 @@ def process_dataset(dataset, in_file_path, out_file_path):
         with open(out_file_path + '.series.jsonl', 'w+') as series_fp:
             with open(out_file_path + '.categories.jsonl', 'w+') as category_fp:
                 for line in data_fp:
-                    data = json.loads(line)
-                    series_id = data.get('series_id', None)
-                    if series_id:
-                        jsonl = extract_series_to_jsonl(line, dataset)
-                        series_fp.write(json.dumps(jsonl))
-                        series_fp.write('\n')
-                    category_id = data.get('category_id', None)
-                    if category_id:
-                        jsonl = extract_category_to_jsonl(line, dataset)
-                        category_fp.write(json.dumps(jsonl))
-                        category_fp.write('\n')
+                    if line.startswith('{'):
+                        data = json.loads(line)
+                        series_id = data.get('series_id', None)
+                        if series_id:
+                            jsonl = extract_series_to_jsonl(line, dataset)
+                            series_fp.write(json.dumps(jsonl))
+                            series_fp.write('\n')
+                        category_id = data.get('category_id', None)
+                        if category_id:
+                            jsonl = extract_category_to_jsonl(line, dataset)
+                            category_fp.write(json.dumps(jsonl))
+                            category_fp.write('\n')
 
 
 def process_single(subdir, file):
@@ -103,7 +103,8 @@ def process_all():
         for file in sorted(files):
             if not file.endswith('.txt'):
                 continue
-            print(f'Processing {subdir}/{file}')
+            print(f'Processing1 {subdir}/{file}')
+
             process_single(subdir, file)
 
 
diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json
new file mode 100644
index 0000000000..221c340c1f
--- /dev/null
+++ b/scripts/us_eia/opendata/manifest.json
@@ -0,0 +1,132 @@
+{
+  "import_specifications": [
+    {
+      "import_name": "EIA_Coal",
+      "curator_emails": [],
+      "provenance_url": "https://www.eia.gov/opendata/qb.php?category=717234",
+      "provenance_description": "Coal dataset has country, state-level level information .",
+      "scripts": [
+        "process.py --dataset=COAL"
+      ],
+      "import_inputs": [
+        {
+          "template_mcf": "tmp_raw_data/COAL/COAL.tmcf",
+          "cleaned_csv": "tmp_raw_data/COAL/COAL.csv"
+        }
+      ],
+      "cron_schedule": "0 6 1 2 *"
+    },
+    {
+      "import_name": "EIA_Electricity",
+      "curator_emails": [],
+      "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
+      "provenance_description": "Electricity dataset has country, state-level and plant-level information on electricity generation, consumption, sales etc by energy source and “sectors” (like residential, commercial, etc.).",
+      "scripts": [
+        "process.py --dataset=ELEC"
+      ],
+      "import_inputs": [
+        {
+          "template_mcf": "tmp_raw_data/ELEC/ELEC.tmcf",
+          "cleaned_csv": "tmp_raw_data/ELEC/ELEC.csv"
+        }
+      ],
+      "cron_schedule": "0 8 1 2 *"
+    },
+    {
+      "import_name": "EIA_NaturalGas",
+      "curator_emails": [],
+      "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
+      "provenance_description": "Natural gas dataset has country and state-level data.",
+      "scripts": [
+        "process.py --dataset=NG"
+      ],
+      "import_inputs": [
+        {
+          "template_mcf": "tmp_raw_data/NG/NG.tmcf",
+          "cleaned_csv": "tmp_raw_data/NG/NG.csv"
+        }
+      ],
+      "cron_schedule": "05 10 * * *"
+    },
+    {
+      "import_name": "EIA_NuclearOutages",
+      "curator_emails": [],
+      "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
+      "provenance_description": "Nuclear outage dataset has nuclear-plant and national data about Nuclear energy generation capacity and planned outages.",
+      "scripts": [
+        "process.py --dataset=NUC_STATUS"
+      ],
+      "import_inputs": [
+        {
+          "template_mcf": "tmp_raw_data/NUC_STATUS/NUC_STATUS.tmcf",
+          "cleaned_csv": "tmp_raw_data/NUC_STATUS/NUC_STATUS.csv"
+        }
+      ],
+      "cron_schedule": "01 9 * * *"
+    },
+    {
+      "import_name": "EIA_Petroleum",
+      "curator_emails": [],
+      "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
+      "provenance_description": "EIA Petroleum dataset has country and state-level data.",
+      "scripts": [
+        "process.py --dataset=PET"
+      ],
+      "import_inputs": [
+        {
+          "template_mcf": "tmp_raw_data/PET/PET.tmcf",
+          "cleaned_csv": "tmp_raw_data/PET/PET.csv"
+        }
+      ],
+      "cron_schedule": "5 9 2 2 *"
+    },
+    {
+      "import_name": "EIA_International",
+      "curator_emails": [],
+      "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
+      "provenance_description": "EIA International Energy dataset has country, continent and world-level data.",
+      "scripts": [
+        "process.py --dataset=INTL"
+      ],
+      "import_inputs": [
+        {
+          "template_mcf": "tmp_raw_data/INTL/INTL.tmcf",
+          "cleaned_csv": "tmp_raw_data/INTL/INTL.csv"
+        }
+      ],
+      "cron_schedule": "1 7 * 1,4,7,10 *"
+    },
+    {
+      "import_name": "EIA_SEDS",
+      "curator_emails": [],
+      "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
+      "provenance_description": "EIA SEDS International Energy dataset has US country-level and state-level data.",
+      "scripts": [
+        "process.py --dataset=SEDS"
+      ],
+      "import_inputs": [
+        {
+          "template_mcf": "tmp_raw_data/SEDS/SEDS.tmcf",
+          "cleaned_csv": "tmp_raw_data/SEDS/SEDS.csv"
+        }
+      ],
+      "cron_schedule": "0 0 1 1 *"
+    },
+    {
+      "import_name": "EIA_TotalEnergy",
+      "curator_emails": [],
+      "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
+      "provenance_description": "Total Energy dataset has US country-level data.",
+      "scripts": [
+        "process.py --dataset=TOTAL"
+      ],
+      "import_inputs": [
+        {
+          "template_mcf": "tmp_raw_data/TOTAL/TOTAL.tmcf",
+          "cleaned_csv": "tmp_raw_data/TOTAL/TOTAL.csv"
+        }
+      ],
+      "cron_schedule": "0 0 1 * *"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/scripts/us_eia/opendata/process.py b/scripts/us_eia/opendata/process.py
new file mode 100644
index 0000000000..3d67164ba8
--- /dev/null
+++ b/scripts/us_eia/opendata/process.py
@@ -0,0 +1,108 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Utility to download all EIA data from https://api.eia.gov/bulk/manifest.txt
+Files are stored in raw_data.
+
+Run this script in this folder:
+python3 process.py --dataset=INTL --mode=download
+
+Replace `INTL` with any of the other dataset codes
+"""
+
+import io
+import os
+import sys
+import zipfile
+import requests
+
+from absl import flags
+from absl import app
+from absl import logging
+
+from process import common, coal, elec, intl, ng, nuclear, pet, seds, total
+
+MANIFEST_URL = "https://api.eia.gov/bulk/manifest.txt"
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string('data_dir', 'tmp_raw_data', 'Data dir to download into')
+flags.DEFINE_string('dataset', '',
+                    'Datasets to download. Everything, if empty.')
+flags.DEFINE_string('mode', '', 'Options: download or process')
+
+## Value: (name, extract_fn, schema_fn)
+_DATASETS = {
+    'COAL': ('Coal', coal.extract_place_statvar, coal.generate_statvar_schema),
+    'ELEC': ('Electricity', elec.extract_place_statvar,
+             elec.generate_statvar_schema),
+    'INTL': ('Energy Overview (INTL)', intl.extract_place_statvar, None),
+    'PET': ('Petroleum', pet.extract_place_statvar, None),
+    'NG': ('Natural Gas', ng.extract_place_statvar, None),
+    'NUC_STATUS': ('Nuclear Outages', nuclear.extract_place_statvar,
+                   nuclear.generate_statvar_schema),
+    'SEDS': ('Consumption, Production, Prices and Expenditure (SEDS)',
+             seds.extract_place_statvar, None),
+    'TOTAL': ('Energy Overview (TOTAL)', total.extract_place_statvar, None)
+}
+
+
+def download_file(url: str, save_path: str):
+    try:
+        r = requests.get(url, stream=True)
+        z = zipfile.ZipFile(io.BytesIO(r.content))
+        z.extractall(save_path)
+    except Exception as e:
+        logging.fatal(f"error while downloading the file,{url} -{e}")
+
+
+def download_manifest():
+    try:
+        return requests.get(MANIFEST_URL).json()
+    except Exception as e:
+        logging.fatal(
+            f"error while downloading the manifest,{MANIFEST_URL} -{e}")
+
+
+def main(_):
+    mode = FLAGS.mode
+    assert FLAGS.data_dir
+    manifest_json = download_manifest()
+    datasets = manifest_json.get('dataset', {})
+    logging.info("================Calling main method")
+    for dataset_name in datasets:
+        if FLAGS.dataset and dataset_name not in FLAGS.dataset:
+            continue
+        dataset = datasets[dataset_name]
+        if mode == "" or mode == "download":
+            download_file(dataset['accessURL'],
+                          f'{FLAGS.data_dir}/{dataset_name}')
+        if mode == "" or mode == "process":
+            file_prefix = os.path.join(f'{FLAGS.data_dir}/{dataset_name}',
+                                       FLAGS.dataset)
+            logging.info("================Calling process method")
+            common.process(
+                dataset=FLAGS.dataset,
+                dataset_name=_DATASETS[FLAGS.dataset],
+                in_json=file_prefix + '.txt',
+                out_csv=file_prefix + '.csv',
+                out_sv_mcf=file_prefix + '.mcf',
+                out_svg_mcf=file_prefix + '.svg.mcf',
+                out_tmcf=file_prefix + '.tmcf',
+                extract_place_statvar_fn=_DATASETS[FLAGS.dataset][1],
+                generate_statvar_schema_fn=_DATASETS[FLAGS.dataset][2])
+            logging.info("================process completed")
+
+
+if __name__ == '__main__':
+    app.run(main)
diff --git a/scripts/us_eia/opendata/process/README.md b/scripts/us_eia/opendata/process/README.md
index 1e3a543ace..ad5c8d8bbe 100644
--- a/scripts/us_eia/opendata/process/README.md
+++ b/scripts/us_eia/opendata/process/README.md
@@ -58,16 +58,21 @@ takes a raw stat-var and generates a fully defined stat-var for it.
 
 Download and unzip the data files based on the
 [manifest](https://api.eia.gov/bulk/manifest.txt) by running the
-[`download_bulk.py`](https://github.com/datacommonsorg/data/blob/master/scripts/us_eia/opendata/download_bulk.py)
+[`python3 process.py --dataset=TOTAL`](https://github.com/datacommonsorg/data/blob/master/scripts/us_eia/opendata/process.py)
 script.
 
 To generate CSV, TMCF and stat-var MCF for a supported dataset:
 
 ```bash
-python3 main.py --data_dir=tmp_raw_data/ELEC --dataset=ELEC
+python3 process.py --dataset=INTL --mode=process
+        python3 process.py --dataset=ELEC --mode=process
+        python3 process.py --dataset=PET --mode=process
+        python3 process.py --dataset=NG --mode=process
+        python3 process.py --dataset=SEDS --mode=process
+        python3 process.py --dataset=NUC_STATUS --mode=process
+        python3 process.py --dataset=TOTAL --mode=process
 ```
 
-Replace `ELEC` with any of the other dataset codes listed above.
 
 To run tests:
 
diff --git a/scripts/us_eia/opendata/process/category.py b/scripts/us_eia/opendata/process/category.py
index 54720fb327..449c55b1a8 100644
--- a/scripts/us_eia/opendata/process/category.py
+++ b/scripts/us_eia/opendata/process/category.py
@@ -97,7 +97,7 @@ def trim_area_categories(svg_info, counters):
     # Delete "area" categories.
     for svg, (_, name) in list(svg_info.items()):
         if name and name.lower() == 'by area':
-            counters['info_deleted_area_categories'] += 1
+            counters.add_counter('info_deleted_area_categories', 1)
             del svg_info[svg]
 
     # Trim orphans, except for dataset_root.
@@ -107,7 +107,7 @@ def trim_area_categories(svg_info, counters):
         for svg, (parent, _) in list(svg_info.items()):
             if parent != dataset_root and parent not in svg_info:
                 run_again = True
-                counters['info_deleted_orphan_categories'] += 1
+                counters.add_counter('info_deleted_orphan_categories', 1)
                 del svg_info[svg]
 
 
@@ -139,7 +139,7 @@ def process_category(dataset, data, extract_place_statvar_fn, svg_info,
     for series in child_series:
         (_, raw_sv, _) = extract_place_statvar_fn(series, counters)
         if not raw_sv:
-            counters['error_extract_place_sv_for_category'] += 1
+            counters.add_counter('error_extract_place_sv_for_category', 1)
             continue
 
         if raw_sv not in sv_membership_map:
diff --git a/scripts/us_eia/opendata/process/coal.py b/scripts/us_eia/opendata/process/coal.py
index 6b11850b83..c19d90990f 100644
--- a/scripts/us_eia/opendata/process/coal.py
+++ b/scripts/us_eia/opendata/process/coal.py
@@ -184,8 +184,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
 
     Returns schema-ful stat-var ID if schema was generated, None otherwise.
     """
-    counters['generate_statvar_schema'] += 1
-
+    counters.add_counter('generate_statvar_schema', 1)
     # COAL.{Measure}.{ConsumingSector}.{Period}
     m = re.match(r"^COAL\.([^._]+_?[^._]+)\.([0-9]+)\.([AQM])$", raw_sv)
     if m:
@@ -193,14 +192,14 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
         consuming_sector = m.group(2)
         period = m.group(3)
     else:
-        counters['error_unparsable_raw_statvar'] += 1
+        counters.add_counter('error_unparsable_raw_statvar', 1)
         return None
-    counters[f'measure-{measure}'] += 1
+    counters.add_counter(f'measure-{measure}', 1)
 
     # Get popType and mprop based on measure.
     measure_pvs = _MEASURE_MAP.get(measure, None)
     if not measure_pvs:
-        counters[f'error_missing_measure-{measure}'] += 1
+        counters.add_counter(f'error_missing_measure-{measure}', 1)
         return None
 
     sv_id_parts = [common.PERIOD_MAP[period], measure_pvs[0]]
@@ -213,13 +212,14 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
     if consuming_sector:
         cs = _CONSUMING_SECTOR.get(consuming_sector, None)
         if not cs:
-            counters[f'error_missing_consuming_sector-{consumingSector}'] += 1
+            counters.add_counter(
+                f'error_missing_consuming_sector-{consuming_sector}', 1)
             return None
         sv_id_parts.append(cs)
         sv_pvs.append(f'consumingSector: dcs:{cs}')
 
     if measure not in _UNIT_MAP:
-        counters[f'error_missing_unit-{measure}'] += 1
+        counters.add_counter(f'error_missing_unit-{measure}', 1)
         return None
     (unit, sfactor) = _UNIT_MAP[measure]
 
diff --git a/scripts/us_eia/opendata/process/common.py b/scripts/us_eia/opendata/process/common.py
index 6624da1f2f..8954d80487 100644
--- a/scripts/us_eia/opendata/process/common.py
+++ b/scripts/us_eia/opendata/process/common.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Process EIA datasets to produce TMCF and CSV."""
-
+import os
+import sys
 import csv
 import json
 import logging
@@ -21,10 +22,17 @@
 from sys import path
 
 # For import util.alpha2_to_dcid
-path.insert(1, '../../../../')
-import util.alpha2_to_dcid as alpha2_to_dcid
-import util.name_to_alpha2 as name_to_alpha2
-
+# Setup path for import from data/util
+_MODULE_DIR = os.path.dirname(os.path.abspath(__file__))
+_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_DIR)
+_DATA_DIR = _SCRIPT_DIR.split('/data/')[0]
+sys.path.append(os.path.join(_DATA_DIR, 'data/util'))
+import alpha2_to_dcid as alpha2_to_dcid
+import name_to_alpha2 as name_to_alpha2
+
+import file_util
+from counters import Counters
 from . import category
 
 PERIOD_MAP = {
@@ -34,9 +42,155 @@
     'Q': 'Quarterly',
 }
 
+MMETHOD_MAPPING_DICT = {
+    # input source unit wise mapping to measurmentMethod
+    'Index1982-1984=100': 'BasePeriod1982_1984',
+    '2017=1.00000': 'BaseYear2017',
+    'Real(1982-1984)CentsPerKilowatthour': 'BasePeriod1982_1984',
+    'Real(1982-1984)DollarsPerGallon': 'BasePeriod1982_1984',
+    'Real(1982-1984)DollarsPerMillionBtu': 'BasePeriod1982_1984',
+    'Real(1982-1984)DollarsPerThousandCubicFeet': 'BasePeriod1982_1984',
+    'ThousandBtuPerChained(2017)Dollar': 'BasePeriod2017',
+    'BillionChained(2017)Dollars': 'BasePeriod2017',
+    'MetricTonsCarbonDioxidePerMillionChained(2017)Dollars': 'BasePeriod2017'
+}
+
+UNIT_MAPPING_DICT = {
+    # input source unit : DC unit
+    'Days':
+        'Day',
+    'ThousandsOfRegisteredVehicles':
+        '',
+    'RegisteredVehicle':
+        '',
+    'NumberOfDays':
+        'Day',
+    '$/ShortTon':
+        'USDollarPerShortTon',
+    'Dollars':
+        'USDollar',
+    'MillionBarrels':
+        'MillionsBarrels',
+    'ThousandBarrels':
+        'Barrel',
+    'ThousandDollars':
+        'USDollar',
+    '1000MetricTons':
+        'ThousandMetricTons',
+    'BillionKilowatthours':
+        'BillionKilowattHours',
+    'Terajoules':
+        'Terajoule',
+    'DollarsPerMillionBtu':
+        'USDollarPerMillionBtu',
+    'DollarsPerThousandCubicFeet':
+        'USDollarPerThousandCubicFeet',
+    'CentsPerKilowatthour':
+        'CentsPerKilowattHour',
+    'MillionKilowatthours':
+        'MillionKilowattHours',
+    'DollarsPerGallon':
+        'USDollarPerGallon',
+    'Kilowatthours':
+        'KilowattHour',
+    'Barrels':
+        'Barrel',
+    'MillionDollars':
+        'USDollar',
+    'BillionDollars':
+        'USDollar',
+    'DollarsPerPoundUraniumOxide':
+        'USDollarPerPoundUraniumOxide',
+    'ThousandKilowatts':
+        'Kilowatt',
+    'DollarsPerBarrel':
+        'USDollarPerBarrel',
+    'NumberOfCustomers':
+        '',
+    'NumberOfElements':
+        '',
+    'Thousand':
+        "",
+    'ThousandGallons':
+        'USGallon',
+    'MillionPounds':
+        'GBP',
+    'DollarsPerFoot':
+        'USDollarPerFoot',
+    'ThousandDollarsPerWell':
+        'ThousandUSDollarsPerWell',
+    'ThousandFeet':
+        'Foot',
+    'FeetPerWell':
+        'Foot',
+    'Cost':
+        'USDollar',
+    'Index1982-1984=100':
+        '',
+    '2017=1.00000':
+        '',
+    'NumberOfRigs':
+        '',
+    'Number':
+        '',
+    'Real(1982-1984)DollarsPerGallon':
+        'USDollarPerGallon',
+    'Real(1982-1984)DollarsPerMillionBtu':
+        'USDollarPerMillionBtu',
+    'DollarsPerMillionBtu':
+        'USDollarPerMillionBtu',
+    'Real(1982-1984)CentsPerKilowatthour':
+        'USCentPerKilowattHour',
+    'Real(1982-1984)DollarsPerThousandCubicFeet':
+        'USDollarPerThousandCubicFeet',
+    'MetricTonsCarbonDioxidePerMillionChained(2017)Dollars':
+        'MetricTonsCarbonDioxidePerMillionChainedUSDollars',
+    'ThousandBtuPerChained(2017)Dollar':
+        'BtuPerChainedUSDollar',
+    'BillionChained(2017)Dollars':
+        'ChainedUSDollar',
+    'CentsPerKilowatthour,IncludingTaxes':
+        'CentsPerKilowattHour',
+    'TrillionBtu':
+        'Btu',
+    'MillionGallons':
+        'USGallon',
+    'MillionPeople':
+        '',
+    'MillionNominalDollars':
+        'NominalUSDollar',
+    'NominalDollars':
+        'NominalUSDollar',
+    'DollarsPerGallonIncludingTaxes':
+        'USDollarPerGallon',
+    'DollarsPerGallonExcludingTaxes':
+        'USDollarPerGallon',
+    'DollarsPerMillionBtu,IncludingTaxes':
+        'USDollarPerMillionBtu'
+}
+
+UNIT_CONVERT_DICT = {
+    'ThousandCubicFeet': 1000,
+    'ThousandBtuPerChained(2017)Dollar': 1000,
+    'Thousand': 1000,
+    'ThousandFeet': 1000,
+    'ThousandDollars': 1000,
+    'ThousandGallons': 1000,
+    'ThousandBarrels': 1000,
+    'ThousandsOfRegisteredVehicles': 1000,
+    'MillionDollars': 1000000,
+    'MillionPeople': 1000000,
+    'MillionNominalDollars': 1000000,
+    'MillionGallons': 1000000,
+    'ThousandKilowatts': 1000,
+    'MillionPounds': 1000000,
+    'BillionDollars': 10000000000,
+    'BillionChained(2017)Dollars': 10000000000,
+    'TrillionBtu': 1000000000000
+}
 _COLUMNS = [
     'place', 'stat_var', 'date', 'value', 'unit', 'scaling_factor',
-    'eia_series_id'
+    'eia_series_id', 'measurementMethod'
 ]
 
 _TMCF_STRING = """
@@ -49,6 +203,7 @@
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
 """
 
 _DATE_RE = re.compile('[0-9WMQ]')
@@ -82,11 +237,13 @@ def _parse_date(d):
         m_or_q = d[4:]
 
         if m_or_q.startswith('Q'):
+            #print("withQ",yr + '-' + _QUARTER_MAP[m_or_q])
             # Quarterly
             if m_or_q in _QUARTER_MAP:
                 return yr + '-' + _QUARTER_MAP[m_or_q]
         else:
             # Monthly
+            #print("withOutQ",yr + '-' + m_or_q)
             return yr + '-' + m_or_q
 
     if len(d) == 8:
@@ -106,15 +263,28 @@ def _sv_dcid(raw_sv):
     return 'eia/' + raw_sv
 
 
-def _enumify(in_str):
-    return in_str.title().replace(' ', '')
+def _check_unit_with_mapping(in_str):
+    if in_str in UNIT_MAPPING_DICT:
+        in_str = UNIT_MAPPING_DICT[in_str]
+    return in_str
 
 
-def _print_counters(counters):
-    print('\nSTATS:')
-    for k in sorted(counters):
-        print(f"\t{k} = {counters[k]}")
-    print('')
+def _check_mMethod_with_mapping(in_str):
+    if in_str in MMETHOD_MAPPING_DICT:
+        in_str = MMETHOD_MAPPING_DICT[in_str]
+    else:
+        in_str = ""
+    return in_str
+
+
+def _unitConvert(unit, value):
+    if unit in UNIT_CONVERT_DICT:
+        value = float(value) * UNIT_CONVERT_DICT[unit]
+    return value
+
+
+def _enumify(in_str):
+    return in_str.title().replace(' ', '')
 
 
 def _find_dc_place(raw_place, is_us_place, counters):
@@ -147,7 +317,7 @@ def _find_dc_place(raw_place, is_us_place, counters):
                 return 'Earth'
 
     # logging.error('ERROR: unsupported place %s %r', raw_place, is_us_place)
-    counters[f'error_unsupported_places_{raw_place}'] += 1
+    counters.add_counter(f'error_unsupported_places_{raw_place}', 1)
     return None
 
 
@@ -216,28 +386,31 @@ def _maybe_parse_name(name, raw_place, is_us_place, counters):
 
     # If we didn't find the name for the place, likely the name doesn't include
     # the place (e.g., TOTAL).
-    counters['info_unmodified_names'] += 1
+    counters.add_counter('info_unmodified_names', 1)
     return cleanup_name(name)
 
 
 def _generate_sv_nodes(dataset, sv_map, sv_name_map, sv_membership_map,
                        sv_schemaful2raw, svg_info):
     nodes = []
-    for sv, mcf in sv_map.items():
-        raw_sv = sv_schemaful2raw[sv] if sv in sv_schemaful2raw else sv
+    try:
+        for sv, mcf in sv_map.items():
+            raw_sv = sv_schemaful2raw[sv] if sv in sv_schemaful2raw else sv
 
-        pvs = [mcf]
-        if raw_sv in sv_name_map:
-            pvs.append(f'name: "{sv_name_map[raw_sv]}"')
+            pvs = [mcf]
+            if raw_sv in sv_name_map:
+                pvs.append(f'name: "{sv_name_map[raw_sv]}"')
 
-        if dataset == 'NUC_STATUS':
-            pvs.append(f'memberOf: dcid:{category.NUC_STATUS_ROOT}')
-        if raw_sv in sv_membership_map:
-            for svg in sorted(sv_membership_map[raw_sv]):
-                if svg in svg_info:
-                    pvs.append(f'memberOf: dcid:{svg}')
+            if dataset == 'NUC_STATUS':
+                pvs.append(f'memberOf: dcid:{category.NUC_STATUS_ROOT}')
+            if raw_sv in sv_membership_map:
+                for svg in sorted(sv_membership_map[raw_sv]):
+                    if svg in svg_info:
+                        pvs.append(f'memberOf: dcid:{svg}')
 
-        nodes.append('\n'.join(pvs))
+            nodes.append('\n'.join(pvs))
+    except Exception as e:
+        logging.fatal(f"error while generating the SV nodes,{sv_name_map} -{e}")
 
     return nodes
 
@@ -286,16 +459,21 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf,
     counters = defaultdict(lambda: 0)
     sv_map = {}
     sv_name_map = {}
-    with open(in_json) as in_fp, open(out_csv, 'w', newline='') as csv_fp:
+    counters = Counters()
+    counters.add_counter('total', file_util.file_estimate_num_rows(in_json))
+    with file_util.FileIO(in_json) as in_fp, open(out_csv, 'w',
+                                                  newline='') as csv_fp:
+        #with open(in_json) as in_fp, open(out_csv, 'w', newline='') as csv_fp:
         csvwriter = csv.DictWriter(csv_fp, fieldnames=_COLUMNS)
         csvwriter.writeheader()
 
         for line in in_fp:
-            counters['info_lines_processed'] += 1
-            if counters['info_lines_processed'] % 100000 == 99999:
-                _print_counters(counters)
+            counters.add_counter('processed', 1)
 
+            if not line.startswith('{'):
+                continue
             data = json.loads(line)
+            logging.info(f"Loaded data: {data}")
 
             # Preliminary checks
             series_id = data.get('series_id', None)
@@ -303,28 +481,30 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf,
                 category.process_category(dataset, data,
                                           extract_place_statvar_fn, svg_info,
                                           sv_membership_map, counters)
-                counters['info_categories_processed'] += 1
+
                 continue
 
             time_series = data.get('data', None)
             if not time_series:
-                counters['error_missing_time_series'] += 1
+                counters.add_counter('error_missing_time_series', 1)
                 continue
 
             # Extract raw place and stat-var from series_id.
             (raw_place, raw_sv,
              is_us_place) = extract_place_statvar_fn(series_id, counters)
             if not raw_place or not raw_sv:
-                counters['error_extract_place_sv'] += 1
+                counters.add_counter('error_extract_place_sv', 1)
                 continue
 
             # Map raw place to DC place
             dc_place = _find_dc_place(raw_place, is_us_place, counters)
             if not dc_place:
-                counters['error_place_mapping'] += 1
+                counters.add_counter('error_place_mapping', 1)
                 continue
 
             raw_unit = _enumify(data.get('units', ''))
+            dc_unit = _check_unit_with_mapping(raw_unit)
+            m_method = _check_mMethod_with_mapping(raw_unit)
 
             if raw_sv not in sv_name_map:
                 name = _maybe_parse_name(data.get('name', ''), raw_place,
@@ -352,26 +532,27 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf,
                     # TODO: Handle some these better.
                     _ = float(v)
                 except Exception:
-                    counters['error_non_numeric_values'] += 1
+                    counters.add_counter('error_non_numeric_values', 1)
                     continue
 
                 dt = _parse_date(k)
                 if not dt:
                     logging.error('ERROR: failed to parse date "%s"', k)
-                    counters['error_date_parsing'] += 1
+                    counters.add_counter('error_date_parsing', 1)
                     continue
 
                 rows.append({
                     'place': f"dcid:{dc_place}",
                     'stat_var': f"dcid:{_sv_dcid(raw_sv)}",
                     'date': dt,
-                    'value': v,
+                    'value': _unitConvert(raw_unit, v),
                     'eia_series_id': series_id,
-                    'unit': raw_unit,
+                    'unit': dc_unit,
+                    'measurementMethod': m_method
                 })
 
             if not rows:
-                counters['error_empty_series'] += 1
+                counters.add_counter('error_empty_series', 1)
                 continue
 
             schema_sv = None
@@ -380,14 +561,13 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf,
                                                        counters)
             if schema_sv:
                 sv_schemaful2raw[schema_sv] = raw_sv
-                counters['info_schemaful_series'] += 1
+                counters.add_counter('info_schemaful_series', 1)
             else:
-                counters['info_schemaless_series'] += 1
+                counters.add_counter('info_schemaless_series', 1)
                 _generate_default_statvar(raw_sv, sv_map)
 
             csvwriter.writerows(rows)
-            counters['info_rows_output'] += len(rows)
-
+            counters.add_counter('info_rows_output', len(rows))
     category.trim_area_categories(svg_info, counters)
 
     with open(out_sv_mcf, 'w') as out_fp:
@@ -407,5 +587,4 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf,
     with open(out_tmcf, 'w') as out_fp:
         out_fp.write(_TMCF_STRING)
 
-    print('=== FINAL COUNTERS ===')
-    _print_counters(counters)
+    logging.info(f"FINAL COUNTERS ")
diff --git a/scripts/us_eia/opendata/process/elec.py b/scripts/us_eia/opendata/process/elec.py
index fade060fd3..1258b0344a 100644
--- a/scripts/us_eia/opendata/process/elec.py
+++ b/scripts/us_eia/opendata/process/elec.py
@@ -30,12 +30,15 @@ def extract_place_statvar(series_id, counters):
     """
 
     if series_id.startswith('ELEC.PLANT.'):
-        counters['error_unimplemented_plant_series'] += 1
+        counters.add_counter('error_unimplemented_plant_series', 1)
         return (None, None, None)
 
     # ELEC.{MEASURE}.{FUEL_TYPE}-{PLACE}-{PRODUCER_SECTOR}.{PERIOD}
+    #m = re.match(r"^ELEC\.([^.]+)\.([^-]+)-([^-]+)-([^.]+)\.([AQM])$",
+    #            series_id)
     m = re.match(r"^ELEC\.([^.]+)\.([^-]+)-([^-]+)-([^.]+)\.([AQM])$",
                  series_id)
+
     if m:
         measure = m.group(1)
         fuel_type = m.group(2)
@@ -47,7 +50,7 @@ def extract_place_statvar(series_id, counters):
         # ELEC.{MEASURE}.{PLACE}-{CONSUMER_SECTOR}.{PERIOD}
         m = re.match(r"^ELEC\.([^.]+)\.([^-]+)-([^.]+)\.([AQM])$", series_id)
         if not m:
-            counters['error_unparsable_series'] += 1
+            counters.add_counter('error_unparsable_series', 1)
             return (None, None)
 
         measure = m.group(1)
@@ -222,7 +225,7 @@ def extract_place_statvar(series_id, counters):
     'CONS_EG': (_PLACEHOLDER_FUEL_UNIT, '', 1000),
     'CONS_EG_BTU': ('MMBtu', '', 1000000),
     'COST': (_PLACEHOLDER_FUEL_UNIT, '', 1),
-    'COST_BTU': ('MMBtu', '', 1),
+    'COST_BTU': ('USDollarPerMMBtu', '', 1),
     'CUSTOMERS': ('', '', 1),
     'GEN': ('GigawattHour', '', 1),
     'PRICE': ('USCentPerKilowattHour', '', 1),
@@ -274,7 +277,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
         # ELEC.{MEASURE}.{CONSUMER_SECTOR}.{PERIOD}
         m = re.match(r"^ELEC\.([^.]+)\.([^.]+)\.([AQM])$", raw_sv)
         if not m:
-            counters['error_unparsable_raw_statvar'] += 1
+            counters.add_counter('error_unparsable_raw_statvar', 1)
             return None
         measure = m.group(1)
         consuming_sector = m.group(2)
@@ -285,7 +288,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
     # Get popType and mprop based on measure.
     measure_pvs = _MEASURE_MAP.get(measure, None)
     if not measure_pvs:
-        counters['error_missing_measure'] += 1
+        counters.add_counter('error_missing_measure', 1)
         return None
 
     sv_id_parts = [common.PERIOD_MAP[period], measure_pvs[0]]
@@ -300,7 +303,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
         if not es:
             logging.error('Missing energy source: %s from %s', fuel_type,
                           raw_sv)
-            counters['error_missing_fuel_type'] += 1
+            counters.add_counter('error_missing_fuel_type', 1)
             return None
         if es != 'ALL':
             sv_id_parts.append(es)
@@ -312,7 +315,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
     if producing_sector:
         ps = _PRODUCING_SECTOR.get(producing_sector, None)
         if not ps:
-            counters['error_missing_producing_sector'] += 1
+            counters.add_counter('error_missing_producing_sector', 1)
             return None
         if ps != 'ALL':
             sv_id_parts.append(ps)
@@ -324,20 +327,20 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
     if consuming_sector:
         cs = _CONSUMING_SECTOR.get(consuming_sector, None)
         if not cs:
-            counters['error_missing_consuming_sector'] += 1
+            counters.add_counter('error_missing_consuming_sector', 1)
             return None
         if cs != 'ALL':
             sv_id_parts.append(cs)
             sv_pvs.append(f'consumingSector: dcs:{cs}')
 
     if measure not in _UNIT_MAP:
-        counters['error_missing_unit'] += 1
+        counters.add_counter('error_missing_unit', 1)
         return None
     (unit, sfactor, multiplier) = _UNIT_MAP[measure]
 
     if unit == _PLACEHOLDER_FUEL_UNIT:
         if not fuel_type:
-            counters['error_missing_unit_fuel_type'] += 1
+            counters.add_counter('error_missing_unit_fuel_type', 1)
             return None
         unit = _get_fuel_unit(fuel_type)
         if measure == 'COST':
diff --git a/scripts/us_eia/opendata/process/nuclear.py b/scripts/us_eia/opendata/process/nuclear.py
index c21d253fe9..8ad2f0b291 100644
--- a/scripts/us_eia/opendata/process/nuclear.py
+++ b/scripts/us_eia/opendata/process/nuclear.py
@@ -102,7 +102,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
 
     Returns schema-ful stat-var ID if schema was generated, None otherwise.
     """
-    counters['generate_statvar_schema'] += 1
+    counters.add_counter('generate_statvar_schema', 1)
 
     # NUC_STATUS.{Measure}.{Period}
     m = re.match(r"^NUC_STATUS\.([^.]+)\.(D)$", raw_sv)
@@ -110,21 +110,21 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters):
         measure = m.group(1)
         period = m.group(2)
     else:
-        counters['error_unparsable_raw_statvar'] += 1
+        counters.add_counter('error_unparsable_raw_statvar', 1)
         return None
-    counters[f'measure-{measure}'] += 1
+    counters.add_counter(f'measure-{measure}', 1)
 
     # Get popType and mprop based on measure.
     measure_pvs = _SV_MAP.get(measure, None)
     if not measure_pvs:
-        counters[f'error_missing_measure-{measure}'] += 1
+        counters.add_counter(f'error_missing_measure-{measure}', 1)
         return None
 
     sv_id = measure_pvs[0]
     sv_pvs = measure_pvs[1:]
 
     if measure not in _UNIT_MAP:
-        counters[f'error_missing_unit-{measure}'] += 1
+        counters.add_counter(f'error_missing_unit-{measure}', 1)
         return None
     (unit, sfactor) = _UNIT_MAP[measure]
 
diff --git a/scripts/us_eia/opendata/process/pet.py b/scripts/us_eia/opendata/process/pet.py
index b795fb4e1c..9740a7bda7 100644
--- a/scripts/us_eia/opendata/process/pet.py
+++ b/scripts/us_eia/opendata/process/pet.py
@@ -1,4 +1,4 @@
-# Copyright 2021 Google LLC
+# Copyright 2024 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/scripts/us_eia/opendata/process/test_data/categories.csv b/scripts/us_eia/opendata/process/test_data/categories.csv
index 2678990eda..11d61d34c0 100644
--- a/scripts/us_eia/opendata/process/test_data/categories.csv
+++ b/scripts/us_eia/opendata/process/test_data/categories.csv
@@ -1,5 +1,5 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:country/USA,dcid:eia/NG.N9140_2.A,2020,30482049,MillionCubicFeet,,NG.N9140US2.A
-dcid:country/USA,dcid:eia/NG.N9140_2.A,2019,31099061,MillionCubicFeet,,NG.N9140US2.A
-dcid:country/USA,dcid:eia/NG.N9140_2.M,2021-02,3036972,MillionCubicFeet,,NG.N9140US2.M
-dcid:country/USA,dcid:eia/NG.N9140_2.M,2021-01,3286266,MillionCubicFeet,,NG.N9140US2.M
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:country/USA,dcid:eia/NG.N9140_2.A,2020,30482049,MillionCubicFeet,,NG.N9140US2.A,
+dcid:country/USA,dcid:eia/NG.N9140_2.A,2019,31099061,MillionCubicFeet,,NG.N9140US2.A,
+dcid:country/USA,dcid:eia/NG.N9140_2.M,2021-02,3036972,MillionCubicFeet,,NG.N9140US2.M,
+dcid:country/USA,dcid:eia/NG.N9140_2.M,2021-01,3286266,MillionCubicFeet,,NG.N9140US2.M,
diff --git a/scripts/us_eia/opendata/process/test_data/categories.tmcf b/scripts/us_eia/opendata/process/test_data/categories.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/categories.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/categories.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
diff --git a/scripts/us_eia/opendata/process/test_data/coal.csv b/scripts/us_eia/opendata/process/test_data/coal.csv
index 6b31796c42..c29aa9febf 100644
--- a/scripts/us_eia/opendata/process/test_data/coal.csv
+++ b/scripts/us_eia/opendata/process/test_data/coal.csv
@@ -1,12 +1,12 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:geoId/01,dcid:Quarterly_Average_AshContent_Coal_For_ElectricUtility,2020-12,6.744021229492053,,100,COAL.ASH_CONTENT.AL-1.Q
-dcid:geoId/01,dcid:Quarterly_Average_AshContent_Coal_For_ElectricUtility,2020-09,6.767757786979022,,100,COAL.ASH_CONTENT.AL-1.Q
-dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2002,0,,100,COAL.ASH_CONTENT.KY-8.A
-dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2001,0,,100,COAL.ASH_CONTENT.KY-8.A
-dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2000,0,,100,COAL.ASH_CONTENT.KY-8.A
-dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-09,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q
-dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-06,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q
-dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-03,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q
-dcid:geoId/36,dcid:Annual_Receipt_Coal_ElectricUtilityNonCogen,2008,8236048,dcid:ShortTon,,COAL.RECEIPTS.NY-2.A
-dcid:geoId/12,dcid:Quarterly_Stock_Coal_ElectricUtility,2008-03,4067084,dcid:ShortTon,,COAL.STOCKS.FL-1.Q
-dcid:geoId/46,dcid:Annual_Average_SulfurContent_Coal_For_ElectricUtility,2008,0.31,,100,COAL.SULFUR_CONTENT.SD-1.A
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:geoId/01,dcid:Quarterly_Average_AshContent_Coal_For_ElectricUtility,2020-12,6.744021229492053,,100,COAL.ASH_CONTENT.AL-1.Q,
+dcid:geoId/01,dcid:Quarterly_Average_AshContent_Coal_For_ElectricUtility,2020-09,6.767757786979022,,100,COAL.ASH_CONTENT.AL-1.Q,
+dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2002,0,,100,COAL.ASH_CONTENT.KY-8.A,
+dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2001,0,,100,COAL.ASH_CONTENT.KY-8.A,
+dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2000,0,,100,COAL.ASH_CONTENT.KY-8.A,
+dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-09,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q,
+dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-06,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q,
+dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-03,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q,
+dcid:geoId/36,dcid:Annual_Receipt_Coal_ElectricUtilityNonCogen,2008,8236048,dcid:ShortTon,,COAL.RECEIPTS.NY-2.A,
+dcid:geoId/12,dcid:Quarterly_Stock_Coal_ElectricUtility,2008-03,4067084,dcid:ShortTon,,COAL.STOCKS.FL-1.Q,
+dcid:geoId/46,dcid:Annual_Average_SulfurContent_Coal_For_ElectricUtility,2008,0.31,,100,COAL.SULFUR_CONTENT.SD-1.A,
diff --git a/scripts/us_eia/opendata/process/test_data/coal.tmcf b/scripts/us_eia/opendata/process/test_data/coal.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/coal.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/coal.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
diff --git a/scripts/us_eia/opendata/process/test_data/elec.csv b/scripts/us_eia/opendata/process/test_data/elec.csv
index 55071994a6..47c39f5acd 100644
--- a/scripts/us_eia/opendata/process/test_data/elec.csv
+++ b/scripts/us_eia/opendata/process/test_data/elec.csv
@@ -1,10 +1,10 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-02,33.52617,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M
-dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-01,33.77782,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M
-dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-03,0.0,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M
-dcid:geoId/25,dcid:Quarterly_RetailSales_Electricity_Residential,2021-03,1809.70299,dcid:GigawattHour,,ELEC.SALES.MA-RES.Q
-dcid:geoId/25,dcid:Quarterly_RetailSales_Electricity_Residential,2021-06,1956.15091,dcid:GigawattHour,,ELEC.SALES.MA-RES.Q
-dcid:geoId/05,dcid:Quarterly_Consumption_Fuel_ForElectricityGeneration_Coal_ElectricUtilityNonCogen,2021-06,6586120.0,dcid:MMBtu,,ELEC.CONS_EG_BTU.COW-AR-2.Q
-dcid:geoId/05,dcid:Quarterly_Consumption_Fuel_ForElectricityGeneration_Coal_ElectricUtilityNonCogen,2021-03,10431100.0,dcid:MMBtu,,ELEC.CONS_EG_BTU.COW-AR-2.Q
-dcid:geoId/06,dcid:Monthly_SalesRevenue_Electricity,2021-06,4523307770.0,dcid:USDollar,,ELEC.REV.CA-ALL.M
-dcid:geoId/06,dcid:Monthly_SalesRevenue_Electricity,2021-05,3461923010.0,dcid:USDollar,,ELEC.REV.CA-ALL.M
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-02,33.52617,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M,
+dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-01,33.77782,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M,
+dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-03,0.0,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M,
+dcid:geoId/25,dcid:Quarterly_RetailSales_Electricity_Residential,2021-03,1809.70299,dcid:GigawattHour,,ELEC.SALES.MA-RES.Q,
+dcid:geoId/25,dcid:Quarterly_RetailSales_Electricity_Residential,2021-06,1956.15091,dcid:GigawattHour,,ELEC.SALES.MA-RES.Q,
+dcid:geoId/05,dcid:Quarterly_Consumption_Fuel_ForElectricityGeneration_Coal_ElectricUtilityNonCogen,2021-06,6586120.0,dcid:MMBtu,,ELEC.CONS_EG_BTU.COW-AR-2.Q,
+dcid:geoId/05,dcid:Quarterly_Consumption_Fuel_ForElectricityGeneration_Coal_ElectricUtilityNonCogen,2021-03,10431100.0,dcid:MMBtu,,ELEC.CONS_EG_BTU.COW-AR-2.Q,
+dcid:geoId/06,dcid:Monthly_SalesRevenue_Electricity,2021-06,4523307770000000.0,dcid:USDollar,,ELEC.REV.CA-ALL.M,
+dcid:geoId/06,dcid:Monthly_SalesRevenue_Electricity,2021-05,3461923010000000.0,dcid:USDollar,,ELEC.REV.CA-ALL.M,
diff --git a/scripts/us_eia/opendata/process/test_data/elec.tmcf b/scripts/us_eia/opendata/process/test_data/elec.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/elec.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/elec.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
diff --git a/scripts/us_eia/opendata/process/test_data/intl.csv b/scripts/us_eia/opendata/process/test_data/intl.csv
index fda1cd461a..6afb7ca9f9 100644
--- a/scripts/us_eia/opendata/process/test_data/intl.csv
+++ b/scripts/us_eia/opendata/process/test_data/intl.csv
@@ -1,7 +1,7 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2020,924.4588369430336,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A
-dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2019,986.3134487671233,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A
-dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2018,1017.5592096438356,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A
-dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2017,1017.3772797808219,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A
-dcid:Earth,dcid:eia/INTL.55-1-TBPD.A,2020,91753.99016207967,ThousandBarrelsPerDay,,INTL.55-1-WORL-TBPD.A
-dcid:Earth,dcid:eia/INTL.55-1-TBPD.A,2019,97993.61794135909,ThousandBarrelsPerDay,,INTL.55-1-WORL-TBPD.A
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2020,924.4588369430336,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A,
+dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2019,986.3134487671233,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A,
+dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2018,1017.5592096438356,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A,
+dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2017,1017.3772797808219,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A,
+dcid:Earth,dcid:eia/INTL.55-1-TBPD.A,2020,91753.99016207967,ThousandBarrelsPerDay,,INTL.55-1-WORL-TBPD.A,
+dcid:Earth,dcid:eia/INTL.55-1-TBPD.A,2019,97993.61794135909,ThousandBarrelsPerDay,,INTL.55-1-WORL-TBPD.A,
diff --git a/scripts/us_eia/opendata/process/test_data/intl.tmcf b/scripts/us_eia/opendata/process/test_data/intl.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/intl.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/intl.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
diff --git a/scripts/us_eia/opendata/process/test_data/ng.csv b/scripts/us_eia/opendata/process/test_data/ng.csv
index a1722b09cf..96af54c914 100644
--- a/scripts/us_eia/opendata/process/test_data/ng.csv
+++ b/scripts/us_eia/opendata/process/test_data/ng.csv
@@ -1,12 +1,12 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:geoId/01,dcid:eia/NG.N3035_4.M,2021-02,26.1,Percent,,NG.N3035AL4.M
-dcid:geoId/01,dcid:eia/NG.N3035_4.M,2021-01,25.2,Percent,,NG.N3035AL4.M
-dcid:geoId/01,dcid:eia/NG.N3035_4.M,2020-12,24.7,Percent,,NG.N3035AL4.M
-dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2008,6,MillionBarrels,,NG.RL2R02SOK_1.A
-dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2007,-4,MillionBarrels,,NG.RL2R02SOK_1.A
-dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2006,13,MillionBarrels,,NG.RL2R02SOK_1.A
-dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2005,16,MillionBarrels,,NG.RL2R02SOK_1.A
-dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2004,40,MillionBarrels,,NG.RL2R02SOK_1.A
-dcid:country/USA,dcid:eia/NG.NA1350_2.A,2019,58084,MillionCubicFeet,,NG.NA1350_NUS_2.A
-dcid:country/USA,dcid:eia/NG.NA1350_2.A,2018,9248,MillionCubicFeet,,NG.NA1350_NUS_2.A
-dcid:country/USA,dcid:eia/NG.NA1350_2.A,2017,-256,MillionCubicFeet,,NG.NA1350_NUS_2.A
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:geoId/01,dcid:eia/NG.N3035_4.M,2021-02,26.1,Percent,,NG.N3035AL4.M,
+dcid:geoId/01,dcid:eia/NG.N3035_4.M,2021-01,25.2,Percent,,NG.N3035AL4.M,
+dcid:geoId/01,dcid:eia/NG.N3035_4.M,2020-12,24.7,Percent,,NG.N3035AL4.M,
+dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2008,6,MillionsBarrels,,NG.RL2R02SOK_1.A,
+dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2007,-4,MillionsBarrels,,NG.RL2R02SOK_1.A,
+dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2006,13,MillionsBarrels,,NG.RL2R02SOK_1.A,
+dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2005,16,MillionsBarrels,,NG.RL2R02SOK_1.A,
+dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2004,40,MillionsBarrels,,NG.RL2R02SOK_1.A,
+dcid:country/USA,dcid:eia/NG.NA1350_2.A,2019,58084,MillionCubicFeet,,NG.NA1350_NUS_2.A,
+dcid:country/USA,dcid:eia/NG.NA1350_2.A,2018,9248,MillionCubicFeet,,NG.NA1350_NUS_2.A,
+dcid:country/USA,dcid:eia/NG.NA1350_2.A,2017,-256,MillionCubicFeet,,NG.NA1350_NUS_2.A,
diff --git a/scripts/us_eia/opendata/process/test_data/ng.tmcf b/scripts/us_eia/opendata/process/test_data/ng.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/ng.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/ng.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
diff --git a/scripts/us_eia/opendata/process/test_data/nuc_status.csv b/scripts/us_eia/opendata/process/test_data/nuc_status.csv
index d1304b4d04..bd3eb5649b 100644
--- a/scripts/us_eia/opendata/process/test_data/nuc_status.csv
+++ b/scripts/us_eia/opendata/process/test_data/nuc_status.csv
@@ -1,13 +1,13 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2021-05-11,1197.1,dcid:Megawatt,,NUC_STATUS.CAP.4046.D
-dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2021-05-10,1197.1,dcid:Megawatt,,NUC_STATUS.CAP.4046.D
-dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2007-01-05,1036,dcid:Megawatt,,NUC_STATUS.CAP.4046.D
-dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-11,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D
-dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-10,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D
-dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2007-01-01,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D
-dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-11,73,,,NUC_STATUS.OUT_PCT.869-2.D
-dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-10,80,,,NUC_STATUS.OUT_PCT.869-2.D
-dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-09,75,,,NUC_STATUS.OUT_PCT.869-2.D
-dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-11,16404.767,dcid:Megawatt,,NUC_STATUS.OUT.US.D
-dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-10,16960.869,dcid:Megawatt,,NUC_STATUS.OUT.US.D
-dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-09,17374.955,dcid:Megawatt,,NUC_STATUS.OUT.US.D
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2021-05-11,1197.1,dcid:Megawatt,,NUC_STATUS.CAP.4046.D,
+dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2021-05-10,1197.1,dcid:Megawatt,,NUC_STATUS.CAP.4046.D,
+dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2007-01-05,1036,dcid:Megawatt,,NUC_STATUS.CAP.4046.D,
+dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-11,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D,
+dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-10,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D,
+dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2007-01-01,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D,
+dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-11,73,,,NUC_STATUS.OUT_PCT.869-2.D,
+dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-10,80,,,NUC_STATUS.OUT_PCT.869-2.D,
+dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-09,75,,,NUC_STATUS.OUT_PCT.869-2.D,
+dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-11,16404.767,dcid:Megawatt,,NUC_STATUS.OUT.US.D,
+dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-10,16960.869,dcid:Megawatt,,NUC_STATUS.OUT.US.D,
+dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-09,17374.955,dcid:Megawatt,,NUC_STATUS.OUT.US.D,
diff --git a/scripts/us_eia/opendata/process/test_data/nuc_status.tmcf b/scripts/us_eia/opendata/process/test_data/nuc_status.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/nuc_status.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/nuc_status.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
diff --git a/scripts/us_eia/opendata/process/test_data/pet.csv b/scripts/us_eia/opendata/process/test_data/pet.csv
index 05d459da20..894f1122a8 100644
--- a/scripts/us_eia/opendata/process/test_data/pet.csv
+++ b/scripts/us_eia/opendata/process/test_data/pet.csv
@@ -1,8 +1,8 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2019,40989,ThousandGallons,,PET.KDLVISSCO1.A
-dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2018,37954,ThousandGallons,,PET.KDLVISSCO1.A
-dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2019,3,MillionBarrels,,PET.RCRR06SLA_1.A
-dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2018,6,MillionBarrels,,PET.RCRR06SLA_1.A
-dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2017,29,MillionBarrels,,PET.RCRR06SLA_1.A
-dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-11-30,32.2,Percent,,PET.M_EPC0_SPT_NUS_PER.W
-dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-12-06,33.1,Percent,,PET.M_EPC0_SPT_NUS_PER.W
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2019,40989000.0,USGallon,,PET.KDLVISSCO1.A,
+dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2018,37954000.0,USGallon,,PET.KDLVISSCO1.A,
+dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2019,3,MillionsBarrels,,PET.RCRR06SLA_1.A,
+dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2018,6,MillionsBarrels,,PET.RCRR06SLA_1.A,
+dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2017,29,MillionsBarrels,,PET.RCRR06SLA_1.A,
+dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-11-30,32.2,Percent,,PET.M_EPC0_SPT_NUS_PER.W,
+dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-12-06,33.1,Percent,,PET.M_EPC0_SPT_NUS_PER.W,
diff --git a/scripts/us_eia/opendata/process/test_data/pet.tmcf b/scripts/us_eia/opendata/process/test_data/pet.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/pet.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/pet.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
diff --git a/scripts/us_eia/opendata/process/test_data/seds.csv b/scripts/us_eia/opendata/process/test_data/seds.csv
index 25dd6e8f74..06b1a06881 100644
--- a/scripts/us_eia/opendata/process/test_data/seds.csv
+++ b/scripts/us_eia/opendata/process/test_data/seds.csv
@@ -1,5 +1,5 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:geoId/06,dcid:eia/SEDS.TNISB.A,2018,1053194,BillionBtu,,SEDS.TNISB.CA.A
-dcid:geoId/06,dcid:eia/SEDS.TNISB.A,2017,1056142,BillionBtu,,SEDS.TNISB.CA.A
-dcid:country/USA,dcid:eia/SEDS.WXICD.A,2019,29.79,DollarsPerMillionBtu,,SEDS.WXICD.US.A
-dcid:country/USA,dcid:eia/SEDS.WXICD.A,2018,32.94,DollarsPerMillionBtu,,SEDS.WXICD.US.A
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:geoId/06,dcid:eia/SEDS.TNISB.A,2018,1053194,BillionBtu,,SEDS.TNISB.CA.A,
+dcid:geoId/06,dcid:eia/SEDS.TNISB.A,2017,1056142,BillionBtu,,SEDS.TNISB.CA.A,
+dcid:country/USA,dcid:eia/SEDS.WXICD.A,2019,29.79,USDollarPerMillionBtu,,SEDS.WXICD.US.A,
+dcid:country/USA,dcid:eia/SEDS.WXICD.A,2018,32.94,USDollarPerMillionBtu,,SEDS.WXICD.US.A,
diff --git a/scripts/us_eia/opendata/process/test_data/seds.tmcf b/scripts/us_eia/opendata/process/test_data/seds.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/seds.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/seds.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod
diff --git a/scripts/us_eia/opendata/process/test_data/total.csv b/scripts/us_eia/opendata/process/test_data/total.csv
index e7bc7babbb..8426b42903 100644
--- a/scripts/us_eia/opendata/process/test_data/total.csv
+++ b/scripts/us_eia/opendata/process/test_data/total.csv
@@ -1,3 +1,3 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id
-dcid:country/USA,dcid:eia/TOTAL.LUACP.A,2020,52.852,ThousandBarrelsPerDay,,TOTAL.LUACPUS.A
-dcid:country/USA,dcid:eia/TOTAL.LUACP.A,2019,59.325,ThousandBarrelsPerDay,,TOTAL.LUACPUS.A
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:country/USA,dcid:eia/TOTAL.LUACP.A,2020,52.852,ThousandBarrelsPerDay,,TOTAL.LUACPUS.A,
+dcid:country/USA,dcid:eia/TOTAL.LUACP.A,2019,59.325,ThousandBarrelsPerDay,,TOTAL.LUACPUS.A,
diff --git a/scripts/us_eia/opendata/process/test_data/total.tmcf b/scripts/us_eia/opendata/process/test_data/total.tmcf
index e1ef4499a7..f198290a7a 100644
--- a/scripts/us_eia/opendata/process/test_data/total.tmcf
+++ b/scripts/us_eia/opendata/process/test_data/total.tmcf
@@ -8,3 +8,4 @@ value: C:EIATable->value
 unit: C:EIATable->unit
 scalingFactor: C:EIATable->scaling_factor
 eiaSeriesId: C:EIATable->eia_series_id
+measurementMethod: C:EIATable->measurementMethod

From 80ac8dc44b3f1032f0a5feeb62249f2bf5a96ac4 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Thu, 30 Jan 2025 13:16:09 +0000
Subject: [PATCH 02/18] Resloved PR comments

---
 scripts/us_eia/opendata/generate_jsonl_for_bq.py | 4 ----
 scripts/us_eia/opendata/process.py               | 4 +++-
 scripts/us_eia/opendata/process/coal.py          | 4 ++--
 scripts/us_eia/opendata/process/common.py        | 4 +---
 scripts/us_eia/opendata/process/elec.py          | 2 +-
 scripts/us_eia/opendata/process/nuclear.py       | 4 ++--
 scripts/us_eia/opendata/process/pet.py           | 4 ++--
 7 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/scripts/us_eia/opendata/generate_jsonl_for_bq.py b/scripts/us_eia/opendata/generate_jsonl_for_bq.py
index af1579277c..9eeb81796b 100644
--- a/scripts/us_eia/opendata/generate_jsonl_for_bq.py
+++ b/scripts/us_eia/opendata/generate_jsonl_for_bq.py
@@ -103,16 +103,12 @@ def process_all():
         for file in sorted(files):
             if not file.endswith('.txt'):
                 continue
-            print(f'Processing1 {subdir}/{file}')
-
             process_single(subdir, file)
 
 
 if __name__ == '__main__':
     args = sys.argv[1:]
     if len(args) == 0:
-        print('Processing all files')
         process_all()
     else:
-        print(f'Processing {args[0]}/{args[1]}')
         process_single(args[0], args[1])
diff --git a/scripts/us_eia/opendata/process.py b/scripts/us_eia/opendata/process.py
index 3d67164ba8..fe06786560 100644
--- a/scripts/us_eia/opendata/process.py
+++ b/scripts/us_eia/opendata/process.py
@@ -26,7 +26,7 @@
 import sys
 import zipfile
 import requests
-
+from retry import retry
 from absl import flags
 from absl import app
 from absl import logging
@@ -57,6 +57,7 @@
 }
 
 
+@retry(tries=5, delay=3, backoff=2)
 def download_file(url: str, save_path: str):
     try:
         r = requests.get(url, stream=True)
@@ -66,6 +67,7 @@ def download_file(url: str, save_path: str):
         logging.fatal(f"error while downloading the file,{url} -{e}")
 
 
+@retry(tries=5, delay=3, backoff=2)
 def download_manifest():
     try:
         return requests.get(MANIFEST_URL).json()
diff --git a/scripts/us_eia/opendata/process/coal.py b/scripts/us_eia/opendata/process/coal.py
index c19d90990f..3c9c2a3165 100644
--- a/scripts/us_eia/opendata/process/coal.py
+++ b/scripts/us_eia/opendata/process/coal.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 """EIA Coal Dataset specific functions."""
 
-import logging
+from absl import logging
 import re
 
 from . import common
 
 
-def extract_place_statvar(series_id, counters):
+def extract_place_statvar(series_id):
     """Given the series_id, extract the raw place and stat-var ID.
 
     Args:
diff --git a/scripts/us_eia/opendata/process/common.py b/scripts/us_eia/opendata/process/common.py
index 8954d80487..c5cf2b8664 100644
--- a/scripts/us_eia/opendata/process/common.py
+++ b/scripts/us_eia/opendata/process/common.py
@@ -16,7 +16,7 @@
 import sys
 import csv
 import json
-import logging
+from absl import logging
 import re
 from collections import defaultdict
 from sys import path
@@ -237,13 +237,11 @@ def _parse_date(d):
         m_or_q = d[4:]
 
         if m_or_q.startswith('Q'):
-            #print("withQ",yr + '-' + _QUARTER_MAP[m_or_q])
             # Quarterly
             if m_or_q in _QUARTER_MAP:
                 return yr + '-' + _QUARTER_MAP[m_or_q]
         else:
             # Monthly
-            #print("withOutQ",yr + '-' + m_or_q)
             return yr + '-' + m_or_q
 
     if len(d) == 8:
diff --git a/scripts/us_eia/opendata/process/elec.py b/scripts/us_eia/opendata/process/elec.py
index 1258b0344a..22968c7fe7 100644
--- a/scripts/us_eia/opendata/process/elec.py
+++ b/scripts/us_eia/opendata/process/elec.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """EIA Electricity Dataset specific functions."""
 
-import logging
+from absl import logging
 import re
 
 from . import common
diff --git a/scripts/us_eia/opendata/process/nuclear.py b/scripts/us_eia/opendata/process/nuclear.py
index 8ad2f0b291..2e70d6e6e8 100644
--- a/scripts/us_eia/opendata/process/nuclear.py
+++ b/scripts/us_eia/opendata/process/nuclear.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 """EIA Nuclear Status Dataset specific functions."""
 
-import logging
+from absl import logging
 import re
 
 from . import common
 
 
-def extract_place_statvar(series_id, counters):
+def extract_place_statvar(series_id):
     """Given the series_id, extract the raw place and stat-var ID.
 
     Args:
diff --git a/scripts/us_eia/opendata/process/pet.py b/scripts/us_eia/opendata/process/pet.py
index 9740a7bda7..818ba46a7b 100644
--- a/scripts/us_eia/opendata/process/pet.py
+++ b/scripts/us_eia/opendata/process/pet.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@ def _parse_with_place_prefix(m):
     return (place, sv_id, in_us)
 
 
-def extract_place_statvar(series_id, counters):
+def extract_place_statvar(series_id):
     """Given the series_id, extract the raw place and stat-var ID.
 
     Args:

From cbeb28fa75cde0a16a4c5b88c08b896bf7f620c4 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Fri, 31 Jan 2025 06:03:44 +0000
Subject: [PATCH 03/18] fixed lint

---
 scripts/us_eia/opendata/manifest.json         | 48 ++++++++++++-------
 .../us_eia/opendata/process/common_test.py    |  2 +-
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json
index 221c340c1f..1144ac4090 100644
--- a/scripts/us_eia/opendata/manifest.json
+++ b/scripts/us_eia/opendata/manifest.json
@@ -2,7 +2,9 @@
   "import_specifications": [
     {
       "import_name": "EIA_Coal",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/qb.php?category=717234",
       "provenance_description": "Coal dataset has country, state-level level information .",
       "scripts": [
@@ -14,11 +16,13 @@
           "cleaned_csv": "tmp_raw_data/COAL/COAL.csv"
         }
       ],
-      "cron_schedule": "0 6 1 2 *"
+      "cron_schedule": "0 6 5,20 * *"
     },
     {
       "import_name": "EIA_Electricity",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
       "provenance_description": "Electricity dataset has country, state-level and plant-level information on electricity generation, consumption, sales etc by energy source and “sectors” (like residential, commercial, etc.).",
       "scripts": [
@@ -30,11 +34,13 @@
           "cleaned_csv": "tmp_raw_data/ELEC/ELEC.csv"
         }
       ],
-      "cron_schedule": "0 8 1 2 *"
+      "cron_schedule": "0 7 5,20 * *"
     },
     {
       "import_name": "EIA_NaturalGas",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
       "provenance_description": "Natural gas dataset has country and state-level data.",
       "scripts": [
@@ -46,11 +52,13 @@
           "cleaned_csv": "tmp_raw_data/NG/NG.csv"
         }
       ],
-      "cron_schedule": "05 10 * * *"
+      "cron_schedule": "0 8 5,20 * *"
     },
     {
       "import_name": "EIA_NuclearOutages",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
       "provenance_description": "Nuclear outage dataset has nuclear-plant and national data about Nuclear energy generation capacity and planned outages.",
       "scripts": [
@@ -62,11 +70,13 @@
           "cleaned_csv": "tmp_raw_data/NUC_STATUS/NUC_STATUS.csv"
         }
       ],
-      "cron_schedule": "01 9 * * *"
+      "cron_schedule": "0 9 5,20 * *"
     },
     {
       "import_name": "EIA_Petroleum",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
       "provenance_description": "EIA Petroleum dataset has country and state-level data.",
       "scripts": [
@@ -78,11 +88,13 @@
           "cleaned_csv": "tmp_raw_data/PET/PET.csv"
         }
       ],
-      "cron_schedule": "5 9 2 2 *"
+      "cron_schedule": "0 10 5,20 * *"
     },
     {
       "import_name": "EIA_International",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
       "provenance_description": "EIA International Energy dataset has country, continent and world-level data.",
       "scripts": [
@@ -94,11 +106,13 @@
           "cleaned_csv": "tmp_raw_data/INTL/INTL.csv"
         }
       ],
-      "cron_schedule": "1 7 * 1,4,7,10 *"
+      "cron_schedule": "0 11 5,20 * * *"
     },
     {
       "import_name": "EIA_SEDS",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
       "provenance_description": "EIA SEDS International Energy dataset has US country-level and state-level data.",
       "scripts": [
@@ -110,11 +124,13 @@
           "cleaned_csv": "tmp_raw_data/SEDS/SEDS.csv"
         }
       ],
-      "cron_schedule": "0 0 1 1 *"
+      "cron_schedule": "0 12 5,20 * *"
     },
     {
       "import_name": "EIA_TotalEnergy",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0",
       "provenance_description": "Total Energy dataset has US country-level data.",
       "scripts": [
@@ -126,7 +142,7 @@
           "cleaned_csv": "tmp_raw_data/TOTAL/TOTAL.csv"
         }
       ],
-      "cron_schedule": "0 0 1 * *"
+      "cron_schedule": "0 13 5,20 * *"
     }
   ]
 }
\ No newline at end of file
diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index 042927a6af..019f027a06 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -24,7 +24,7 @@
     os.path.dirname(
         os.path.dirname(
             os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
-from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
+from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
 
 # module_dir_ is the path to where this test is running from.
 module_dir_ = os.path.dirname(__file__)

From df585fda31efa1e76418a27cabad1d577843d3e5 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Fri, 31 Jan 2025 08:51:21 +0000
Subject: [PATCH 04/18] fixed test data

---
 scripts/us_eia/opendata/process.py               |  2 +-
 scripts/us_eia/opendata/process/common_test.py   |  2 +-
 .../us_eia/opendata/process/test_data/pet.csv    | 16 ++++++++--------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/scripts/us_eia/opendata/process.py b/scripts/us_eia/opendata/process.py
index fe06786560..c6142c4059 100644
--- a/scripts/us_eia/opendata/process.py
+++ b/scripts/us_eia/opendata/process.py
@@ -31,7 +31,7 @@
 from absl import app
 from absl import logging
 
-from process import common, coal, elec, intl, ng, nuclear, pet, seds, total
+from .process import common, coal, elec, intl, ng, nuclear, pet, seds, total
 
 MANIFEST_URL = "https://api.eia.gov/bulk/manifest.txt"
 
diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index 019f027a06..042927a6af 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -24,7 +24,7 @@
     os.path.dirname(
         os.path.dirname(
             os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
-from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
+from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
 
 # module_dir_ is the path to where this test is running from.
 module_dir_ = os.path.dirname(__file__)
diff --git a/scripts/us_eia/opendata/process/test_data/pet.csv b/scripts/us_eia/opendata/process/test_data/pet.csv
index 894f1122a8..156723593c 100644
--- a/scripts/us_eia/opendata/process/test_data/pet.csv
+++ b/scripts/us_eia/opendata/process/test_data/pet.csv
@@ -1,8 +1,8 @@
-place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
-dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2019,40989000.0,USGallon,,PET.KDLVISSCO1.A,
-dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2018,37954000.0,USGallon,,PET.KDLVISSCO1.A,
-dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2019,3,MillionsBarrels,,PET.RCRR06SLA_1.A,
-dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2018,6,MillionsBarrels,,PET.RCRR06SLA_1.A,
-dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2017,29,MillionsBarrels,,PET.RCRR06SLA_1.A,
-dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-11-30,32.2,Percent,,PET.M_EPC0_SPT_NUS_PER.W,
-dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-12-06,33.1,Percent,,PET.M_EPC0_SPT_NUS_PER.W,
+place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod
+dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2019,40989000.0,USGallon,,PET.KDLVISSCO1.A,
+dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2018,37954000.0,USGallon,,PET.KDLVISSCO1.A,
+dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2019,3,MillionsBarrels,,PET.RCRR06SLA_1.A,
+dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2018,6,MillionsBarrels,,PET.RCRR06SLA_1.A,
+dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2017,29,MillionsBarrels,,PET.RCRR06SLA_1.A,
+dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-11-30,32.2,Percent,,PET.M_EPC0_SPT_NUS_PER.W,
+dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-12-06,33.1,Percent,,PET.M_EPC0_SPT_NUS_PER.W,

From e255e888f3d391f84313beda530d377f1de83f9a Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Fri, 31 Jan 2025 09:28:25 +0000
Subject: [PATCH 05/18] fixed test data

---
 scripts/us_eia/opendata/process/common_test.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index 042927a6af..11337ff8bc 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -17,14 +17,15 @@
 import sys
 import tempfile
 import unittest
+from absl import logging
 
-# Allows the following module imports to work when running as a script
+# Allows the following module imports to work when running as a script.
 # relative to scripts/
 sys.path.append(
     os.path.dirname(
         os.path.dirname(
             os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
-from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
+from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
 
 # module_dir_ is the path to where this test is running from.
 module_dir_ = os.path.dirname(__file__)
@@ -54,7 +55,7 @@ def test_process(self):
         for (dataset, dataset_name, test_fname, extract_fn,
              schema_fn) in _TEST_CASES:
             with tempfile.TemporaryDirectory() as tmp_dir:
-                print('Processing', dataset)
+                logging.info(f"Processing {dataset}")
                 in_file = os.path.join(module_dir_, 'test_data',
                                        f'{test_fname}.txt')
 
@@ -111,4 +112,5 @@ def test_cleanup_name(self):
 
 
 if __name__ == '__main__':
+    logging.set_verbosity(logging.INFO)
     unittest.main()

From 3ba71c819fe46d437a335701f0c72361d0ab0835 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Fri, 31 Jan 2025 10:47:19 +0000
Subject: [PATCH 06/18] fixed test

---
 scripts/us_eia/opendata/process/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/us_eia/opendata/process/main.py b/scripts/us_eia/opendata/process/main.py
index 82a15c01b7..4aab227f2d 100644
--- a/scripts/us_eia/opendata/process/main.py
+++ b/scripts/us_eia/opendata/process/main.py
@@ -25,7 +25,7 @@
     os.path.dirname(
         os.path.dirname(
             os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
-from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
+from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
 
 FLAGS = flags.FLAGS
 flags.DEFINE_string('data_dir', 'tmp_raw_data', 'Raw data dir')

From 2360e86d45159ed5b06f45d7989cbf4a176f2b44 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Fri, 31 Jan 2025 11:07:54 +0000
Subject: [PATCH 07/18] fixed test

---
 scripts/us_eia/opendata/process/common_test.py | 3 ++-
 scripts/us_eia/opendata/process/main.py        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index 11337ff8bc..e2f0f040bd 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -25,7 +25,8 @@
     os.path.dirname(
         os.path.dirname(
             os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
-from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
+#from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
+from main import *
 
 # module_dir_ is the path to where this test is running from.
 module_dir_ = os.path.dirname(__file__)
diff --git a/scripts/us_eia/opendata/process/main.py b/scripts/us_eia/opendata/process/main.py
index 4aab227f2d..82a15c01b7 100644
--- a/scripts/us_eia/opendata/process/main.py
+++ b/scripts/us_eia/opendata/process/main.py
@@ -25,7 +25,7 @@
     os.path.dirname(
         os.path.dirname(
             os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
-from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
+from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
 
 FLAGS = flags.FLAGS
 flags.DEFINE_string('data_dir', 'tmp_raw_data', 'Raw data dir')

From 809cc65b75b2224ce6c6ac7e743412a803dbcce5 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Fri, 31 Jan 2025 11:29:01 +0000
Subject: [PATCH 08/18] fixed test

---
 scripts/us_eia/opendata/process/common_test.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index e2f0f040bd..1690ba5fe2 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -21,15 +21,13 @@
 
 # Allows the following module imports to work when running as a script.
 # relative to scripts/
-sys.path.append(
-    os.path.dirname(
-        os.path.dirname(
-            os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
+
 #from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
-from main import *
 
-# module_dir_ is the path to where this test is running from.
 module_dir_ = os.path.dirname(__file__)
+sys.path.insert(0, module_dir_)
+from main import *
+# module_dir_ is the path to where this test is running from.
 
 _TEST_CASES = [
     # dataset-code, dataset-name, test-case-filename,

From 0a36a2e983d6ab38e79f28f34fe4a4fd1ee95840 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 06:26:34 +0000
Subject: [PATCH 09/18] updated scripts

---
 scripts/us_eia/opendata/process/common.py      | 10 ++++++++--
 scripts/us_eia/opendata/process/common_test.py |  3 +++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/scripts/us_eia/opendata/process/common.py b/scripts/us_eia/opendata/process/common.py
index c5cf2b8664..2ca667c5ee 100644
--- a/scripts/us_eia/opendata/process/common.py
+++ b/scripts/us_eia/opendata/process/common.py
@@ -16,8 +16,9 @@
 import sys
 import csv
 import json
-from absl import logging
+import logging
 import re
+import inspect
 from collections import defaultdict
 from sys import path
 
@@ -237,11 +238,13 @@ def _parse_date(d):
         m_or_q = d[4:]
 
         if m_or_q.startswith('Q'):
+            #print("withQ",yr + '-' + _QUARTER_MAP[m_or_q])
             # Quarterly
             if m_or_q in _QUARTER_MAP:
                 return yr + '-' + _QUARTER_MAP[m_or_q]
         else:
             # Monthly
+            #print("withOutQ",yr + '-' + m_or_q)
             return yr + '-' + m_or_q
 
     if len(d) == 8:
@@ -471,7 +474,7 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf,
             if not line.startswith('{'):
                 continue
             data = json.loads(line)
-            logging.info(f"Loaded data: {data}")
+            #logging.info(f"Loaded data: {data}")
 
             # Preliminary checks
             series_id = data.get('series_id', None)
@@ -486,6 +489,9 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf,
             if not time_series:
                 counters.add_counter('error_missing_time_series', 1)
                 continue
+            logging.info(
+                f"extract_place_statvar_fn {inspect.getmodule(extract_place_statvar_fn)}"
+            )
 
             # Extract raw place and stat-var from series_id.
             (raw_place, raw_sv,
diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index 1690ba5fe2..bb91d41287 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -17,6 +17,7 @@
 import sys
 import tempfile
 import unittest
+import inspect
 from absl import logging
 
 # Allows the following module imports to work when running as a script.
@@ -67,6 +68,8 @@ def test_process(self):
                 act_mcf = os.path.join(tmp_dir, exp_mcf)
                 act_svg_mcf = os.path.join(tmp_dir, exp_svg_mcf)
                 act_tmcf = os.path.join(tmp_dir, exp_tmcf)
+                logging.info(f"extract_fn {inspect.getmodule(extract_fn)}")
+
                 common.process(dataset, dataset_name, in_file, act_csv, act_mcf,
                                act_svg_mcf, act_tmcf, extract_fn, schema_fn)
 

From bbff4cf1787d182d405df48a8c18f391b2381b26 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 06:43:20 +0000
Subject: [PATCH 10/18] fixing test scripts

---
 scripts/us_eia/opendata/process/common_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index bb91d41287..c12f7fd727 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -20,6 +20,7 @@
 import inspect
 from absl import logging
 
+
 # Allows the following module imports to work when running as a script.
 # relative to scripts/
 
@@ -30,6 +31,7 @@
 from main import *
 # module_dir_ is the path to where this test is running from.
 
+
 _TEST_CASES = [
     # dataset-code, dataset-name, test-case-filename,
     #   extract-fn, schema-fn
@@ -68,7 +70,7 @@ def test_process(self):
                 act_mcf = os.path.join(tmp_dir, exp_mcf)
                 act_svg_mcf = os.path.join(tmp_dir, exp_svg_mcf)
                 act_tmcf = os.path.join(tmp_dir, exp_tmcf)
-                logging.info(f"extract_fn {inspect.getmodule(extract_fn)}")
+                logging.debug(f"extract_fn {inspect.getmodule(extract_fn)}")
 
                 common.process(dataset, dataset_name, in_file, act_csv, act_mcf,
                                act_svg_mcf, act_tmcf, extract_fn, schema_fn)
@@ -114,5 +116,5 @@ def test_cleanup_name(self):
 
 
 if __name__ == '__main__':
-    logging.set_verbosity(logging.INFO)
+    logging.set_verbosity(logging.DEBUG)
     unittest.main()

From 192553bce8f341f630f6d21fe723556ac6243bca Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 06:51:40 +0000
Subject: [PATCH 11/18] fixing test scripts

---
 scripts/us_eia/opendata/process/common_test.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index c12f7fd727..3eb0ba22e9 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -20,7 +20,6 @@
 import inspect
 from absl import logging
 
-
 # Allows the following module imports to work when running as a script.
 # relative to scripts/
 
@@ -31,7 +30,6 @@
 from main import *
 # module_dir_ is the path to where this test is running from.
 
-
 _TEST_CASES = [
     # dataset-code, dataset-name, test-case-filename,
     #   extract-fn, schema-fn

From 63b3a9b2a9ad18ef14cac881bce3b43bb456bb76 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 07:22:36 +0000
Subject: [PATCH 12/18] added counters

---
 scripts/us_eia/opendata/process/coal.py    | 5 +++--
 scripts/us_eia/opendata/process/elec.py    | 3 ++-
 scripts/us_eia/opendata/process/intl.py    | 1 +
 scripts/us_eia/opendata/process/ng.py      | 1 +
 scripts/us_eia/opendata/process/nuclear.py | 5 +++--
 scripts/us_eia/opendata/process/pet.py     | 3 ++-
 scripts/us_eia/opendata/process/seds.py    | 1 +
 scripts/us_eia/opendata/process/total.py   | 1 +
 8 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/scripts/us_eia/opendata/process/coal.py b/scripts/us_eia/opendata/process/coal.py
index 3c9c2a3165..3a1ce60a1f 100644
--- a/scripts/us_eia/opendata/process/coal.py
+++ b/scripts/us_eia/opendata/process/coal.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 """EIA Coal Dataset specific functions."""
 
-from absl import logging
+import logging
 import re
 
 from . import common
 
 
-def extract_place_statvar(series_id):
+def extract_place_statvar(series_id, counters):
     """Given the series_id, extract the raw place and stat-var ID.
 
     Args:
@@ -33,6 +33,7 @@ def extract_place_statvar(series_id):
     m = re.match(r"^COAL\.([^._]+_?[^._]+)\.([A-Z]+)-([0-9]+)\.([AQM])$",
                  series_id)
     if m:
+        counters.add_counter('info_coal_record_count', 1)
         measure = m.group(1)
         place = m.group(2)
         code = m.group(3)
diff --git a/scripts/us_eia/opendata/process/elec.py b/scripts/us_eia/opendata/process/elec.py
index 22968c7fe7..b3f309d9b6 100644
--- a/scripts/us_eia/opendata/process/elec.py
+++ b/scripts/us_eia/opendata/process/elec.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """EIA Electricity Dataset specific functions."""
 
-from absl import logging
+import logging
 import re
 
 from . import common
@@ -40,6 +40,7 @@ def extract_place_statvar(series_id, counters):
                  series_id)
 
     if m:
+        counters.add_counter('info_elec_record_count', 1)
         measure = m.group(1)
         fuel_type = m.group(2)
         place = m.group(3)
diff --git a/scripts/us_eia/opendata/process/intl.py b/scripts/us_eia/opendata/process/intl.py
index c94244746a..94517b3fb5 100644
--- a/scripts/us_eia/opendata/process/intl.py
+++ b/scripts/us_eia/opendata/process/intl.py
@@ -16,6 +16,7 @@ def extract_place_statvar(series_id, counters):
     # INTL.{MEASURE1}-{MEASURE2}-{PLACE}-{MEASURE3}.{PERIOD}
     m = re.match(r"^(INTL\.[^-]+-[^-]+)-([^-]+)-([^-]+\.[A-Z])$", series_id)
     if m:
+        counters.add_counter('info_intl_record_count', 1)
         sv_part1 = m.group(1)
         place = m.group(2)
         sv_part2 = m.group(3)
diff --git a/scripts/us_eia/opendata/process/ng.py b/scripts/us_eia/opendata/process/ng.py
index 780108e55e..a6ae1e07c3 100644
--- a/scripts/us_eia/opendata/process/ng.py
+++ b/scripts/us_eia/opendata/process/ng.py
@@ -61,6 +61,7 @@ def extract_place_statvar(series_id, counters):
     # Pattern #1: NG.N{MEASURE1}{PLACE}{MEASURE2}.{PERIOD}
     m = re.match(r"^(NG\.N[^_]+)([A-Z][A-Z])([0-9]\.[A-Z])$", series_id)
     if m:
+        counters.add_counter('info_ng_record_count', 1)
         sv_part1 = m.group(1)
         sv_part2 = m.group(3)
         sv_id = f'{sv_part1}_{sv_part2}'
diff --git a/scripts/us_eia/opendata/process/nuclear.py b/scripts/us_eia/opendata/process/nuclear.py
index 2e70d6e6e8..b03da32d1a 100644
--- a/scripts/us_eia/opendata/process/nuclear.py
+++ b/scripts/us_eia/opendata/process/nuclear.py
@@ -13,13 +13,13 @@
 # limitations under the License.
 """EIA Nuclear Status Dataset specific functions."""
 
-from absl import logging
+import logging
 import re
 
 from . import common
 
 
-def extract_place_statvar(series_id):
+def extract_place_statvar(series_id, counters):
     """Given the series_id, extract the raw place and stat-var ID.
 
     Args:
@@ -30,6 +30,7 @@ def extract_place_statvar(series_id):
     """
     m = re.match(r"^NUC_STATUS\.([^.]+)\.([^.]+)\.(D)$", series_id)
     if m:
+        counters.add_counter('info_nuclear_record_count', 1)
         measure = m.group(1)
         place = m.group(2)
         if not place == 'US':
diff --git a/scripts/us_eia/opendata/process/pet.py b/scripts/us_eia/opendata/process/pet.py
index 818ba46a7b..361cf9dd6b 100644
--- a/scripts/us_eia/opendata/process/pet.py
+++ b/scripts/us_eia/opendata/process/pet.py
@@ -27,7 +27,7 @@ def _parse_with_place_prefix(m):
     return (place, sv_id, in_us)
 
 
-def extract_place_statvar(series_id):
+def extract_place_statvar(series_id, counters):
     """Given the series_id, extract the raw place and stat-var ID.
 
     Args:
@@ -40,6 +40,7 @@ def extract_place_statvar(series_id):
     # Pattern #1: PET.K{MEASURE1}[SN]{PLACE}{MEASURE2}.{PERIOD}
     m = re.match(r"^(PET\.K[^_]+)([NS][A-Z][A-Z])([0-9]\.[A-Z])$", series_id)
     if m:
+        counters.add_counter('info_pet_record_count', 1)
         return _parse_with_place_prefix(m)
 
     # Pattern #2: PET.{MEASURE1}[SN]{PLACE}_{MEASURE2}.{PERIOD}
diff --git a/scripts/us_eia/opendata/process/seds.py b/scripts/us_eia/opendata/process/seds.py
index 6f55ab8b1b..c12c7f2b92 100644
--- a/scripts/us_eia/opendata/process/seds.py
+++ b/scripts/us_eia/opendata/process/seds.py
@@ -20,6 +20,7 @@ def extract_place_statvar(series_id, counters):
     # (https://user-images.githubusercontent.com/4375037/117168919-74618f00-ad7d-11eb-8306-bb4db3f52e03.png)
     m = re.match(r"^(SEDS\.[^.]+)\.([A-Z][A-Z])\.([A-Z])$", series_id)
     if m:
+        counters.add_counter('info_seds_record_count', 1)
         sv_part1 = m.group(1)
         place = m.group(2)
         sv_part2 = m.group(3)
diff --git a/scripts/us_eia/opendata/process/total.py b/scripts/us_eia/opendata/process/total.py
index ce255f258b..fe148f6bbf 100644
--- a/scripts/us_eia/opendata/process/total.py
+++ b/scripts/us_eia/opendata/process/total.py
@@ -22,6 +22,7 @@ def extract_place_statvar(series_id, counters):
     # them for now.
     m = re.match(r"^(TOTAL\..*)US\.([A-Z])$", series_id)
     if m:
+        counters.add_counter('info_total_record_count', 1)
         sv_part1 = m.group(1)
         sv_part2 = m.group(2)
         sv_id = f'{sv_part1}.{sv_part2}'

From 147fd4b8efcabe3f72e8f43602933c00e45b32a0 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 07:41:54 +0000
Subject: [PATCH 13/18] updated eia 8 imports scripts

---
 scripts/us_eia/opendata/process/common_test.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py
index 3eb0ba22e9..919d399a7e 100644
--- a/scripts/us_eia/opendata/process/common_test.py
+++ b/scripts/us_eia/opendata/process/common_test.py
@@ -17,14 +17,10 @@
 import sys
 import tempfile
 import unittest
-import inspect
 from absl import logging
 
 # Allows the following module imports to work when running as a script.
 # relative to scripts/
-
-#from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total
-
 module_dir_ = os.path.dirname(__file__)
 sys.path.insert(0, module_dir_)
 from main import *
@@ -68,8 +64,6 @@ def test_process(self):
                 act_mcf = os.path.join(tmp_dir, exp_mcf)
                 act_svg_mcf = os.path.join(tmp_dir, exp_svg_mcf)
                 act_tmcf = os.path.join(tmp_dir, exp_tmcf)
-                logging.debug(f"extract_fn {inspect.getmodule(extract_fn)}")
-
                 common.process(dataset, dataset_name, in_file, act_csv, act_mcf,
                                act_svg_mcf, act_tmcf, extract_fn, schema_fn)
 

From d7fb1910240c2eb936a8ee52e2c56301e9257734 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 07:54:57 +0000
Subject: [PATCH 14/18] fixed scripts and lint test

---
 scripts/us_eia/opendata/process/common.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/scripts/us_eia/opendata/process/common.py b/scripts/us_eia/opendata/process/common.py
index 2ca667c5ee..0b4bf49a1a 100644
--- a/scripts/us_eia/opendata/process/common.py
+++ b/scripts/us_eia/opendata/process/common.py
@@ -18,7 +18,6 @@
 import json
 import logging
 import re
-import inspect
 from collections import defaultdict
 from sys import path
 
@@ -489,9 +488,6 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf,
             if not time_series:
                 counters.add_counter('error_missing_time_series', 1)
                 continue
-            logging.info(
-                f"extract_place_statvar_fn {inspect.getmodule(extract_place_statvar_fn)}"
-            )
 
             # Extract raw place and stat-var from series_id.
             (raw_place, raw_sv,

From 6070463f5365c356f3d721576c5ad09e168629bd Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 09:42:23 +0000
Subject: [PATCH 15/18] updated manifest.json file

---
 scripts/us_eia/opendata/manifest.json | 40 +++++++++++++++++++++------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json
index 1144ac4090..5c30c7ecf7 100644
--- a/scripts/us_eia/opendata/manifest.json
+++ b/scripts/us_eia/opendata/manifest.json
@@ -13,7 +13,10 @@
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/COAL/COAL.tmcf",
-          "cleaned_csv": "tmp_raw_data/COAL/COAL.csv"
+          "cleaned_csv": "tmp_raw_data/COAL/COAL.csv",
+          "source_files": [
+            "tmp_raw_data/COAL/COAL.txt"
+          ]
         }
       ],
       "cron_schedule": "0 6 5,20 * *"
@@ -34,7 +37,10 @@
           "cleaned_csv": "tmp_raw_data/ELEC/ELEC.csv"
         }
       ],
-      "cron_schedule": "0 7 5,20 * *"
+      "cron_schedule": "0 7 5,20 * *",
+      "source_files": [
+        "tmp_raw_data/ELEC/ELEC.txt"
+      ]
     },
     {
       "import_name": "EIA_NaturalGas",
@@ -52,7 +58,10 @@
           "cleaned_csv": "tmp_raw_data/NG/NG.csv"
         }
       ],
-      "cron_schedule": "0 8 5,20 * *"
+      "cron_schedule": "0 8 5,20 * *",
+      "source_files": [
+        "tmp_raw_data/NG/NG.txt"
+      ]
     },
     {
       "import_name": "EIA_NuclearOutages",
@@ -70,7 +79,10 @@
           "cleaned_csv": "tmp_raw_data/NUC_STATUS/NUC_STATUS.csv"
         }
       ],
-      "cron_schedule": "0 9 5,20 * *"
+      "cron_schedule": "0 9 5,20 * *",
+      "source_files": [
+        "tmp_raw_data/NUC_STATUS/NUC_STATUS.txt"
+      ]
     },
     {
       "import_name": "EIA_Petroleum",
@@ -88,7 +100,10 @@
           "cleaned_csv": "tmp_raw_data/PET/PET.csv"
         }
       ],
-      "cron_schedule": "0 10 5,20 * *"
+      "cron_schedule": "0 10 5,20 * *",
+      "source_files": [
+        "tmp_raw_data/PET/PET.txt"
+      ]
     },
     {
       "import_name": "EIA_International",
@@ -106,7 +121,10 @@
           "cleaned_csv": "tmp_raw_data/INTL/INTL.csv"
         }
       ],
-      "cron_schedule": "0 11 5,20 * * *"
+      "cron_schedule": "0 11 5,20 * * *",
+      "source_files": [
+        "tmp_raw_data/INTL/INTL.txt"
+      ]
     },
     {
       "import_name": "EIA_SEDS",
@@ -124,7 +142,10 @@
           "cleaned_csv": "tmp_raw_data/SEDS/SEDS.csv"
         }
       ],
-      "cron_schedule": "0 12 5,20 * *"
+      "cron_schedule": "0 12 5,20 * *",
+      "source_files": [
+        "tmp_raw_data/SEDS/SEDS.txt"
+      ]
     },
     {
       "import_name": "EIA_TotalEnergy",
@@ -142,7 +163,10 @@
           "cleaned_csv": "tmp_raw_data/TOTAL/TOTAL.csv"
         }
       ],
-      "cron_schedule": "0 13 5,20 * *"
+      "cron_schedule": "0 13 5,20 * *",
+      "source_files": [
+        "tmp_raw_data/TOTAL/TOTAL.txt"
+      ]
     }
   ]
 }
\ No newline at end of file

From 67b235aa950009ed9c5b79d16b2fb3b37c319e76 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 10:41:30 +0000
Subject: [PATCH 16/18] Resolved PR comments

---
 scripts/us_eia/opendata/manifest.json | 70 +++++++++++++--------------
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json
index 5c30c7ecf7..ca33146656 100644
--- a/scripts/us_eia/opendata/manifest.json
+++ b/scripts/us_eia/opendata/manifest.json
@@ -2,24 +2,22 @@
   "import_specifications": [
     {
       "import_name": "EIA_Coal",
-      "curator_emails": [
-        "support@datacommons.org"
-      ],
+      "curator_emails": [],
       "provenance_url": "https://www.eia.gov/opendata/qb.php?category=717234",
       "provenance_description": "Coal dataset has country, state-level level information .",
       "scripts": [
         "process.py --dataset=COAL"
       ],
+      "source_files": [
+        "tmp_raw_data/COAL/COAL.txt"
+      ],
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/COAL/COAL.tmcf",
-          "cleaned_csv": "tmp_raw_data/COAL/COAL.csv",
-          "source_files": [
-            "tmp_raw_data/COAL/COAL.txt"
-          ]
+          "cleaned_csv": "tmp_raw_data/COAL/COAL.csv"
         }
       ],
-      "cron_schedule": "0 6 5,20 * *"
+      "cron_schedule": "0 6 1 2 *"
     },
     {
       "import_name": "EIA_Electricity",
@@ -31,16 +29,16 @@
       "scripts": [
         "process.py --dataset=ELEC"
       ],
+      "source_files": [
+        "tmp_raw_data/ELEC/ELEC.txt"
+      ],
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/ELEC/ELEC.tmcf",
           "cleaned_csv": "tmp_raw_data/ELEC/ELEC.csv"
         }
       ],
-      "cron_schedule": "0 7 5,20 * *",
-      "source_files": [
-        "tmp_raw_data/ELEC/ELEC.txt"
-      ]
+      "cron_schedule": "0 7 5,20 * *"
     },
     {
       "import_name": "EIA_NaturalGas",
@@ -52,16 +50,16 @@
       "scripts": [
         "process.py --dataset=NG"
       ],
+      "source_files": [
+        "tmp_raw_data/NG/NG.txt"
+      ],
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/NG/NG.tmcf",
           "cleaned_csv": "tmp_raw_data/NG/NG.csv"
         }
       ],
-      "cron_schedule": "0 8 5,20 * *",
-      "source_files": [
-        "tmp_raw_data/NG/NG.txt"
-      ]
+      "cron_schedule": "0 8 5,20 * *"
     },
     {
       "import_name": "EIA_NuclearOutages",
@@ -73,16 +71,16 @@
       "scripts": [
         "process.py --dataset=NUC_STATUS"
       ],
+      "source_files": [
+        "tmp_raw_data/NUC_STATUS/NUC_STATUS.txt"
+      ],
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/NUC_STATUS/NUC_STATUS.tmcf",
           "cleaned_csv": "tmp_raw_data/NUC_STATUS/NUC_STATUS.csv"
         }
       ],
-      "cron_schedule": "0 9 5,20 * *",
-      "source_files": [
-        "tmp_raw_data/NUC_STATUS/NUC_STATUS.txt"
-      ]
+      "cron_schedule": "0 9 5,20 * *"
     },
     {
       "import_name": "EIA_Petroleum",
@@ -94,16 +92,16 @@
       "scripts": [
         "process.py --dataset=PET"
       ],
+      "source_files": [
+        "tmp_raw_data/PET/PET.txt"
+      ],
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/PET/PET.tmcf",
           "cleaned_csv": "tmp_raw_data/PET/PET.csv"
         }
       ],
-      "cron_schedule": "0 10 5,20 * *",
-      "source_files": [
-        "tmp_raw_data/PET/PET.txt"
-      ]
+      "cron_schedule": "0 10 5,20 * *"
     },
     {
       "import_name": "EIA_International",
@@ -115,16 +113,16 @@
       "scripts": [
         "process.py --dataset=INTL"
       ],
+      "source_files": [
+        "tmp_raw_data/INTL/INTL.txt"
+      ],
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/INTL/INTL.tmcf",
           "cleaned_csv": "tmp_raw_data/INTL/INTL.csv"
         }
       ],
-      "cron_schedule": "0 11 5,20 * * *",
-      "source_files": [
-        "tmp_raw_data/INTL/INTL.txt"
-      ]
+      "cron_schedule": "0 11 5,20 * * *"
     },
     {
       "import_name": "EIA_SEDS",
@@ -136,16 +134,16 @@
       "scripts": [
         "process.py --dataset=SEDS"
       ],
+      "source_files": [
+        "tmp_raw_data/SEDS/SEDS.txt"
+      ],
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/SEDS/SEDS.tmcf",
           "cleaned_csv": "tmp_raw_data/SEDS/SEDS.csv"
         }
       ],
-      "cron_schedule": "0 12 5,20 * *",
-      "source_files": [
-        "tmp_raw_data/SEDS/SEDS.txt"
-      ]
+      "cron_schedule": "0 12 5,20 * *"
     },
     {
       "import_name": "EIA_TotalEnergy",
@@ -157,16 +155,16 @@
       "scripts": [
         "process.py --dataset=TOTAL"
       ],
+      "source_files": [
+        "tmp_raw_data/TOTAL/TOTAL.txt"
+      ],
       "import_inputs": [
         {
           "template_mcf": "tmp_raw_data/TOTAL/TOTAL.tmcf",
           "cleaned_csv": "tmp_raw_data/TOTAL/TOTAL.csv"
         }
       ],
-      "cron_schedule": "0 13 5,20 * *",
-      "source_files": [
-        "tmp_raw_data/TOTAL/TOTAL.txt"
-      ]
+      "cron_schedule": "0 13 5,20 * *"
     }
   ]
 }
\ No newline at end of file

From 74e54be81318eec7763bb0a651a65b31e2b816bb Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 11:16:49 +0000
Subject: [PATCH 17/18] Resolved PR comments

---
 scripts/us_eia/opendata/manifest.json | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json
index ca33146656..5ab552e647 100644
--- a/scripts/us_eia/opendata/manifest.json
+++ b/scripts/us_eia/opendata/manifest.json
@@ -2,7 +2,9 @@
   "import_specifications": [
     {
       "import_name": "EIA_Coal",
-      "curator_emails": [],
+      "curator_emails": [
+        "support@datacommons.org"
+      ],
       "provenance_url": "https://www.eia.gov/opendata/qb.php?category=717234",
       "provenance_description": "Coal dataset has country, state-level level information .",
       "scripts": [

From 4d097289656d58ae6610389145ca31e9e52bf986 Mon Sep 17 00:00:00 2001
From: Harsha Vardhan Chandaluri <chandaluri@google.com>
Date: Mon, 3 Feb 2025 12:10:17 +0000
Subject: [PATCH 18/18] Resolved PR comments

---
 scripts/us_eia/opendata/manifest.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json
index 5ab552e647..ff50f690d7 100644
--- a/scripts/us_eia/opendata/manifest.json
+++ b/scripts/us_eia/opendata/manifest.json
@@ -19,7 +19,7 @@
           "cleaned_csv": "tmp_raw_data/COAL/COAL.csv"
         }
       ],
-      "cron_schedule": "0 6 1 2 *"
+      "cron_schedule": "0 14 5,20 * *"
     },
     {
       "import_name": "EIA_Electricity",
@@ -169,4 +169,4 @@
       "cron_schedule": "0 13 5,20 * *"
     }
   ]
-}
\ No newline at end of file
+}