From fc4bee8ee9ad1a632c43d2995578bd00f731d9b1 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Wed, 29 Jan 2025 09:33:24 +0000 Subject: [PATCH 01/18] eia_opendata_imports --- scripts/us_eia/opendata/README.md | 52 +++- scripts/us_eia/opendata/download_bulk.py | 61 ---- .../us_eia/opendata/generate_jsonl_for_bq.py | 33 +-- scripts/us_eia/opendata/manifest.json | 132 +++++++++ scripts/us_eia/opendata/process.py | 108 +++++++ scripts/us_eia/opendata/process/README.md | 11 +- scripts/us_eia/opendata/process/category.py | 6 +- scripts/us_eia/opendata/process/coal.py | 14 +- scripts/us_eia/opendata/process/common.py | 271 +++++++++++++++--- scripts/us_eia/opendata/process/elec.py | 23 +- scripts/us_eia/opendata/process/nuclear.py | 10 +- scripts/us_eia/opendata/process/pet.py | 2 +- .../opendata/process/test_data/categories.csv | 10 +- .../process/test_data/categories.tmcf | 1 + .../opendata/process/test_data/coal.csv | 24 +- .../opendata/process/test_data/coal.tmcf | 1 + .../opendata/process/test_data/elec.csv | 20 +- .../opendata/process/test_data/elec.tmcf | 1 + .../opendata/process/test_data/intl.csv | 14 +- .../opendata/process/test_data/intl.tmcf | 1 + .../us_eia/opendata/process/test_data/ng.csv | 24 +- .../us_eia/opendata/process/test_data/ng.tmcf | 1 + .../opendata/process/test_data/nuc_status.csv | 26 +- .../process/test_data/nuc_status.tmcf | 1 + .../us_eia/opendata/process/test_data/pet.csv | 16 +- .../opendata/process/test_data/pet.tmcf | 1 + .../opendata/process/test_data/seds.csv | 10 +- .../opendata/process/test_data/seds.tmcf | 1 + .../opendata/process/test_data/total.csv | 6 +- .../opendata/process/test_data/total.tmcf | 1 + 30 files changed, 644 insertions(+), 238 deletions(-) delete mode 100644 scripts/us_eia/opendata/download_bulk.py create mode 100644 scripts/us_eia/opendata/manifest.json create mode 100644 scripts/us_eia/opendata/process.py diff --git a/scripts/us_eia/opendata/README.md b/scripts/us_eia/opendata/README.md index f61b576b76..88dc7f38f7 100644 --- a/scripts/us_eia/opendata/README.md +++ b/scripts/us_eia/opendata/README.md @@ -6,12 +6,6 @@ Each dataset available as a Zip-file of JSONL content. See [here](https://www.eia.gov/opendata/bulkfiles.php) for more details. -To download the latest versions of ALL datasets available, run the following command. Files will be downloaded and extracted to a tmp_raw_data folder. - -```bash -python3 download_bulk.py -``` - ### Data Exploration To ease analysis of the datasets, see [`generate_jsonl_for_bq.py`](generate_jsonl_for_bq.py) for instructions to convert and import the data into BigQuery. @@ -20,11 +14,47 @@ To ease analysis of the datasets, see [`generate_jsonl_for_bq.py`](generate_json This dataset is available for public use, license is available at https://www.eia.gov/about/copyrights_reuse.php -### Import procedure -- Download data +- Run the [processor](process/README.md) + +### Downloading and Processing Data + + + If you want to perform "only download", run the below command: + + python3 process.py --dataset=INTL --mode=download + python3 process.py --dataset=ELEC --mode=download + python3 process.py --dataset=COAL --mode=download + python3 process.py --dataset=PET --mode=download + python3 process.py --dataset=NG --mode=download + python3 process.py --dataset=SEDS --mode=download + python3 process.py --dataset=NUC_STATUS --mode=download + python3 process.py --dataset=TOTAL --mode=download + + + + If you want to perform "only process", run the below command: + + Running this command generates input_fles and csv, mcf, tmcf, svg.mcf files. + + python3 process.py --dataset=INTL --mode=process + python3 process.py --dataset=ELEC --mode=process + python3 process.py --dataset=COAL --mode=process + python3 process.py --dataset=PET --mode=process + python3 process.py --dataset=NG --mode=process + python3 process.py --dataset=SEDS --mode=process + python3 process.py --dataset=NUC_STATUS --mode=process + python3 process.py --dataset=TOTAL --mode=process + + To Download and process the data together, run the below command: ```bash - python3 download_bulk.py - ``` + python3 process.py --dataset=TOTAL + python3 process.py --dataset=INTL + python3 process.py --dataset=ELEC + python3 process.py --dataset=COAL + python3 process.py --dataset=NG + python3 process.py --dataset=PET + python3 process.py --dataset=SEDS + python3 process.py --dataset=NUC_STATUS -- Run the [processor](process/README.md) \ No newline at end of file + ``` diff --git a/scripts/us_eia/opendata/download_bulk.py b/scripts/us_eia/opendata/download_bulk.py deleted file mode 100644 index 554187e9b6..0000000000 --- a/scripts/us_eia/opendata/download_bulk.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Utility to download all EIA data from https://api.eia.gov/bulk/manifest.txt -Files are stored in raw_data. - -Run this script in this folder: -python3 download_bulk.py -""" - -import io -import zipfile - -import requests - -from absl import flags -from absl import app - -MANIFEST_URL = "https://api.eia.gov/bulk/manifest.txt" - -FLAGS = flags.FLAGS -flags.DEFINE_string('data_dir', 'tmp_raw_data', 'Data dir to download into') -flags.DEFINE_list('datasets', [], 'Datasets to download. Everything, if empty.') - - -def download_file(url: str, save_path: str): - print(f'Downloading {url} to {save_path}') - r = requests.get(url, stream=True) - z = zipfile.ZipFile(io.BytesIO(r.content)) - z.extractall(save_path) - - -def download_manifest(): - return requests.get(MANIFEST_URL).json() - - -def main(_): - assert FLAGS.data_dir - manifest_json = download_manifest() - datasets = manifest_json.get('dataset', {}) - for dataset_name in datasets: - if FLAGS.datasets and dataset_name not in FLAGS.datasets: - continue - print(dataset_name) - dataset = datasets[dataset_name] - download_file(dataset['accessURL'], f'{FLAGS.data_dir}/{dataset_name}') - - -if __name__ == '__main__': - app.run(main) diff --git a/scripts/us_eia/opendata/generate_jsonl_for_bq.py b/scripts/us_eia/opendata/generate_jsonl_for_bq.py index 27227de946..af1579277c 100644 --- a/scripts/us_eia/opendata/generate_jsonl_for_bq.py +++ b/scripts/us_eia/opendata/generate_jsonl_for_bq.py @@ -40,10 +40,9 @@ IN_DATA_PATH = 'tmp_raw_data' OUT_DATA_PATH = 'tmp_bq_import' DATASETS = [ - 'AEO.2014', 'AEO.2015', 'AEO.2016', 'AEO.2017', 'AEO.2018', 'AEO.2019', - 'AEO.2020', 'AEO.2021', 'COAL', 'EBA', 'ELEC', 'EMISS', 'IEO.2017', - 'IEO.2019', 'INTL', 'NG', 'NUC_STATUS', 'PET', 'PET_IMPORTS', 'SEDS', - 'STEO', 'TOTAL' + 'AEO.2020', 'AEO.2021', 'AEO.2022', 'AEO.2023', 'AEO.IEO2', 'COAL', 'EBA', + 'ELEC', 'EMISS', 'IEO', 'INTL', 'NG', 'NUC_STATUS', 'PET', 'PET_IMPORTS', + 'SEDS', 'STEO', 'TOTAL' ] @@ -77,17 +76,18 @@ def process_dataset(dataset, in_file_path, out_file_path): with open(out_file_path + '.series.jsonl', 'w+') as series_fp: with open(out_file_path + '.categories.jsonl', 'w+') as category_fp: for line in data_fp: - data = json.loads(line) - series_id = data.get('series_id', None) - if series_id: - jsonl = extract_series_to_jsonl(line, dataset) - series_fp.write(json.dumps(jsonl)) - series_fp.write('\n') - category_id = data.get('category_id', None) - if category_id: - jsonl = extract_category_to_jsonl(line, dataset) - category_fp.write(json.dumps(jsonl)) - category_fp.write('\n') + if line.startswith('{'): + data = json.loads(line) + series_id = data.get('series_id', None) + if series_id: + jsonl = extract_series_to_jsonl(line, dataset) + series_fp.write(json.dumps(jsonl)) + series_fp.write('\n') + category_id = data.get('category_id', None) + if category_id: + jsonl = extract_category_to_jsonl(line, dataset) + category_fp.write(json.dumps(jsonl)) + category_fp.write('\n') def process_single(subdir, file): @@ -103,7 +103,8 @@ def process_all(): for file in sorted(files): if not file.endswith('.txt'): continue - print(f'Processing {subdir}/{file}') + print(f'Processing1 {subdir}/{file}') + process_single(subdir, file) diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json new file mode 100644 index 0000000000..221c340c1f --- /dev/null +++ b/scripts/us_eia/opendata/manifest.json @@ -0,0 +1,132 @@ +{ + "import_specifications": [ + { + "import_name": "EIA_Coal", + "curator_emails": [], + "provenance_url": "https://www.eia.gov/opendata/qb.php?category=717234", + "provenance_description": "Coal dataset has country, state-level level information .", + "scripts": [ + "process.py --dataset=COAL" + ], + "import_inputs": [ + { + "template_mcf": "tmp_raw_data/COAL/COAL.tmcf", + "cleaned_csv": "tmp_raw_data/COAL/COAL.csv" + } + ], + "cron_schedule": "0 6 1 2 *" + }, + { + "import_name": "EIA_Electricity", + "curator_emails": [], + "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", + "provenance_description": "Electricity dataset has country, state-level and plant-level information on electricity generation, consumption, sales etc by energy source and “sectors” (like residential, commercial, etc.).", + "scripts": [ + "process.py --dataset=ELEC" + ], + "import_inputs": [ + { + "template_mcf": "tmp_raw_data/ELEC/ELEC.tmcf", + "cleaned_csv": "tmp_raw_data/ELEC/ELEC.csv" + } + ], + "cron_schedule": "0 8 1 2 *" + }, + { + "import_name": "EIA_NaturalGas", + "curator_emails": [], + "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", + "provenance_description": "Natural gas dataset has country and state-level data.", + "scripts": [ + "process.py --dataset=NG" + ], + "import_inputs": [ + { + "template_mcf": "tmp_raw_data/NG/NG.tmcf", + "cleaned_csv": "tmp_raw_data/NG/NG.csv" + } + ], + "cron_schedule": "05 10 * * *" + }, + { + "import_name": "EIA_NuclearOutages", + "curator_emails": [], + "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", + "provenance_description": "Nuclear outage dataset has nuclear-plant and national data about Nuclear energy generation capacity and planned outages.", + "scripts": [ + "process.py --dataset=NUC_STATUS" + ], + "import_inputs": [ + { + "template_mcf": "tmp_raw_data/NUC_STATUS/NUC_STATUS.tmcf", + "cleaned_csv": "tmp_raw_data/NUC_STATUS/NUC_STATUS.csv" + } + ], + "cron_schedule": "01 9 * * *" + }, + { + "import_name": "EIA_Petroleum", + "curator_emails": [], + "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", + "provenance_description": "EIA Petroleum dataset has country and state-level data.", + "scripts": [ + "process.py --dataset=PET" + ], + "import_inputs": [ + { + "template_mcf": "tmp_raw_data/PET/PET.tmcf", + "cleaned_csv": "tmp_raw_data/PET/PET.csv" + } + ], + "cron_schedule": "5 9 2 2 *" + }, + { + "import_name": "EIA_International", + "curator_emails": [], + "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", + "provenance_description": "EIA International Energy dataset has country, continent and world-level data.", + "scripts": [ + "process.py --dataset=INTL" + ], + "import_inputs": [ + { + "template_mcf": "tmp_raw_data/INTL/INTL.tmcf", + "cleaned_csv": "tmp_raw_data/INTL/INTL.csv" + } + ], + "cron_schedule": "1 7 * 1,4,7,10 *" + }, + { + "import_name": "EIA_SEDS", + "curator_emails": [], + "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", + "provenance_description": "EIA SEDS International Energy dataset has US country-level and state-level data.", + "scripts": [ + "process.py --dataset=SEDS" + ], + "import_inputs": [ + { + "template_mcf": "tmp_raw_data/SEDS/SEDS.tmcf", + "cleaned_csv": "tmp_raw_data/SEDS/SEDS.csv" + } + ], + "cron_schedule": "0 0 1 1 *" + }, + { + "import_name": "EIA_TotalEnergy", + "curator_emails": [], + "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", + "provenance_description": "Total Energy dataset has US country-level data.", + "scripts": [ + "process.py --dataset=TOTAL" + ], + "import_inputs": [ + { + "template_mcf": "tmp_raw_data/TOTAL/TOTAL.tmcf", + "cleaned_csv": "tmp_raw_data/TOTAL/TOTAL.csv" + } + ], + "cron_schedule": "0 0 1 * *" + } + ] +} \ No newline at end of file diff --git a/scripts/us_eia/opendata/process.py b/scripts/us_eia/opendata/process.py new file mode 100644 index 0000000000..3d67164ba8 --- /dev/null +++ b/scripts/us_eia/opendata/process.py @@ -0,0 +1,108 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Utility to download all EIA data from https://api.eia.gov/bulk/manifest.txt +Files are stored in raw_data. + +Run this script in this folder: +python3 process.py --dataset=INTL --mode=download + +Replace `INTL` with any of the other dataset codes +""" + +import io +import os +import sys +import zipfile +import requests + +from absl import flags +from absl import app +from absl import logging + +from process import common, coal, elec, intl, ng, nuclear, pet, seds, total + +MANIFEST_URL = "https://api.eia.gov/bulk/manifest.txt" + +FLAGS = flags.FLAGS +flags.DEFINE_string('data_dir', 'tmp_raw_data', 'Data dir to download into') +flags.DEFINE_string('dataset', '', + 'Datasets to download. Everything, if empty.') +flags.DEFINE_string('mode', '', 'Options: download or process') + +## Value: (name, extract_fn, schema_fn) +_DATASETS = { + 'COAL': ('Coal', coal.extract_place_statvar, coal.generate_statvar_schema), + 'ELEC': ('Electricity', elec.extract_place_statvar, + elec.generate_statvar_schema), + 'INTL': ('Energy Overview (INTL)', intl.extract_place_statvar, None), + 'PET': ('Petroleum', pet.extract_place_statvar, None), + 'NG': ('Natural Gas', ng.extract_place_statvar, None), + 'NUC_STATUS': ('Nuclear Outages', nuclear.extract_place_statvar, + nuclear.generate_statvar_schema), + 'SEDS': ('Consumption, Production, Prices and Expenditure (SEDS)', + seds.extract_place_statvar, None), + 'TOTAL': ('Energy Overview (TOTAL)', total.extract_place_statvar, None) +} + + +def download_file(url: str, save_path: str): + try: + r = requests.get(url, stream=True) + z = zipfile.ZipFile(io.BytesIO(r.content)) + z.extractall(save_path) + except Exception as e: + logging.fatal(f"error while downloading the file,{url} -{e}") + + +def download_manifest(): + try: + return requests.get(MANIFEST_URL).json() + except Exception as e: + logging.fatal( + f"error while downloading the manifest,{MANIFEST_URL} -{e}") + + +def main(_): + mode = FLAGS.mode + assert FLAGS.data_dir + manifest_json = download_manifest() + datasets = manifest_json.get('dataset', {}) + logging.info("================Calling main method") + for dataset_name in datasets: + if FLAGS.dataset and dataset_name not in FLAGS.dataset: + continue + dataset = datasets[dataset_name] + if mode == "" or mode == "download": + download_file(dataset['accessURL'], + f'{FLAGS.data_dir}/{dataset_name}') + if mode == "" or mode == "process": + file_prefix = os.path.join(f'{FLAGS.data_dir}/{dataset_name}', + FLAGS.dataset) + logging.info("================Calling process method") + common.process( + dataset=FLAGS.dataset, + dataset_name=_DATASETS[FLAGS.dataset], + in_json=file_prefix + '.txt', + out_csv=file_prefix + '.csv', + out_sv_mcf=file_prefix + '.mcf', + out_svg_mcf=file_prefix + '.svg.mcf', + out_tmcf=file_prefix + '.tmcf', + extract_place_statvar_fn=_DATASETS[FLAGS.dataset][1], + generate_statvar_schema_fn=_DATASETS[FLAGS.dataset][2]) + logging.info("================process completed") + + +if __name__ == '__main__': + app.run(main) diff --git a/scripts/us_eia/opendata/process/README.md b/scripts/us_eia/opendata/process/README.md index 1e3a543ace..ad5c8d8bbe 100644 --- a/scripts/us_eia/opendata/process/README.md +++ b/scripts/us_eia/opendata/process/README.md @@ -58,16 +58,21 @@ takes a raw stat-var and generates a fully defined stat-var for it. Download and unzip the data files based on the [manifest](https://api.eia.gov/bulk/manifest.txt) by running the -[`download_bulk.py`](https://github.com/datacommonsorg/data/blob/master/scripts/us_eia/opendata/download_bulk.py) +[`python3 process.py --dataset=TOTAL`](https://github.com/datacommonsorg/data/blob/master/scripts/us_eia/opendata/process.py) script. To generate CSV, TMCF and stat-var MCF for a supported dataset: ```bash -python3 main.py --data_dir=tmp_raw_data/ELEC --dataset=ELEC +python3 process.py --dataset=INTL --mode=process + python3 process.py --dataset=ELEC --mode=process + python3 process.py --dataset=PET --mode=process + python3 process.py --dataset=NG --mode=process + python3 process.py --dataset=SEDS --mode=process + python3 process.py --dataset=NUC_STATUS --mode=process + python3 process.py --dataset=TOTAL --mode=process ``` -Replace `ELEC` with any of the other dataset codes listed above. To run tests: diff --git a/scripts/us_eia/opendata/process/category.py b/scripts/us_eia/opendata/process/category.py index 54720fb327..449c55b1a8 100644 --- a/scripts/us_eia/opendata/process/category.py +++ b/scripts/us_eia/opendata/process/category.py @@ -97,7 +97,7 @@ def trim_area_categories(svg_info, counters): # Delete "area" categories. for svg, (_, name) in list(svg_info.items()): if name and name.lower() == 'by area': - counters['info_deleted_area_categories'] += 1 + counters.add_counter('info_deleted_area_categories', 1) del svg_info[svg] # Trim orphans, except for dataset_root. @@ -107,7 +107,7 @@ def trim_area_categories(svg_info, counters): for svg, (parent, _) in list(svg_info.items()): if parent != dataset_root and parent not in svg_info: run_again = True - counters['info_deleted_orphan_categories'] += 1 + counters.add_counter('info_deleted_orphan_categories', 1) del svg_info[svg] @@ -139,7 +139,7 @@ def process_category(dataset, data, extract_place_statvar_fn, svg_info, for series in child_series: (_, raw_sv, _) = extract_place_statvar_fn(series, counters) if not raw_sv: - counters['error_extract_place_sv_for_category'] += 1 + counters.add_counter('error_extract_place_sv_for_category', 1) continue if raw_sv not in sv_membership_map: diff --git a/scripts/us_eia/opendata/process/coal.py b/scripts/us_eia/opendata/process/coal.py index 6b11850b83..c19d90990f 100644 --- a/scripts/us_eia/opendata/process/coal.py +++ b/scripts/us_eia/opendata/process/coal.py @@ -184,8 +184,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): Returns schema-ful stat-var ID if schema was generated, None otherwise. """ - counters['generate_statvar_schema'] += 1 - + counters.add_counter('generate_statvar_schema', 1) # COAL.{Measure}.{ConsumingSector}.{Period} m = re.match(r"^COAL\.([^._]+_?[^._]+)\.([0-9]+)\.([AQM])$", raw_sv) if m: @@ -193,14 +192,14 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): consuming_sector = m.group(2) period = m.group(3) else: - counters['error_unparsable_raw_statvar'] += 1 + counters.add_counter('error_unparsable_raw_statvar', 1) return None - counters[f'measure-{measure}'] += 1 + counters.add_counter(f'measure-{measure}', 1) # Get popType and mprop based on measure. measure_pvs = _MEASURE_MAP.get(measure, None) if not measure_pvs: - counters[f'error_missing_measure-{measure}'] += 1 + counters.add_counter(f'error_missing_measure-{measure}', 1) return None sv_id_parts = [common.PERIOD_MAP[period], measure_pvs[0]] @@ -213,13 +212,14 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): if consuming_sector: cs = _CONSUMING_SECTOR.get(consuming_sector, None) if not cs: - counters[f'error_missing_consuming_sector-{consumingSector}'] += 1 + counters.add_counter( + f'error_missing_consuming_sector-{consuming_sector}', 1) return None sv_id_parts.append(cs) sv_pvs.append(f'consumingSector: dcs:{cs}') if measure not in _UNIT_MAP: - counters[f'error_missing_unit-{measure}'] += 1 + counters.add_counter(f'error_missing_unit-{measure}', 1) return None (unit, sfactor) = _UNIT_MAP[measure] diff --git a/scripts/us_eia/opendata/process/common.py b/scripts/us_eia/opendata/process/common.py index 6624da1f2f..8954d80487 100644 --- a/scripts/us_eia/opendata/process/common.py +++ b/scripts/us_eia/opendata/process/common.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """Process EIA datasets to produce TMCF and CSV.""" - +import os +import sys import csv import json import logging @@ -21,10 +22,17 @@ from sys import path # For import util.alpha2_to_dcid -path.insert(1, '../../../../') -import util.alpha2_to_dcid as alpha2_to_dcid -import util.name_to_alpha2 as name_to_alpha2 - +# Setup path for import from data/util +_MODULE_DIR = os.path.dirname(os.path.abspath(__file__)) +_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_DIR) +_DATA_DIR = _SCRIPT_DIR.split('/data/')[0] +sys.path.append(os.path.join(_DATA_DIR, 'data/util')) +import alpha2_to_dcid as alpha2_to_dcid +import name_to_alpha2 as name_to_alpha2 + +import file_util +from counters import Counters from . import category PERIOD_MAP = { @@ -34,9 +42,155 @@ 'Q': 'Quarterly', } +MMETHOD_MAPPING_DICT = { + # input source unit wise mapping to measurmentMethod + 'Index1982-1984=100': 'BasePeriod1982_1984', + '2017=1.00000': 'BaseYear2017', + 'Real(1982-1984)CentsPerKilowatthour': 'BasePeriod1982_1984', + 'Real(1982-1984)DollarsPerGallon': 'BasePeriod1982_1984', + 'Real(1982-1984)DollarsPerMillionBtu': 'BasePeriod1982_1984', + 'Real(1982-1984)DollarsPerThousandCubicFeet': 'BasePeriod1982_1984', + 'ThousandBtuPerChained(2017)Dollar': 'BasePeriod2017', + 'BillionChained(2017)Dollars': 'BasePeriod2017', + 'MetricTonsCarbonDioxidePerMillionChained(2017)Dollars': 'BasePeriod2017' +} + +UNIT_MAPPING_DICT = { + # input source unit : DC unit + 'Days': + 'Day', + 'ThousandsOfRegisteredVehicles': + '', + 'RegisteredVehicle': + '', + 'NumberOfDays': + 'Day', + '$/ShortTon': + 'USDollarPerShortTon', + 'Dollars': + 'USDollar', + 'MillionBarrels': + 'MillionsBarrels', + 'ThousandBarrels': + 'Barrel', + 'ThousandDollars': + 'USDollar', + '1000MetricTons': + 'ThousandMetricTons', + 'BillionKilowatthours': + 'BillionKilowattHours', + 'Terajoules': + 'Terajoule', + 'DollarsPerMillionBtu': + 'USDollarPerMillionBtu', + 'DollarsPerThousandCubicFeet': + 'USDollarPerThousandCubicFeet', + 'CentsPerKilowatthour': + 'CentsPerKilowattHour', + 'MillionKilowatthours': + 'MillionKilowattHours', + 'DollarsPerGallon': + 'USDollarPerGallon', + 'Kilowatthours': + 'KilowattHour', + 'Barrels': + 'Barrel', + 'MillionDollars': + 'USDollar', + 'BillionDollars': + 'USDollar', + 'DollarsPerPoundUraniumOxide': + 'USDollarPerPoundUraniumOxide', + 'ThousandKilowatts': + 'Kilowatt', + 'DollarsPerBarrel': + 'USDollarPerBarrel', + 'NumberOfCustomers': + '', + 'NumberOfElements': + '', + 'Thousand': + "", + 'ThousandGallons': + 'USGallon', + 'MillionPounds': + 'GBP', + 'DollarsPerFoot': + 'USDollarPerFoot', + 'ThousandDollarsPerWell': + 'ThousandUSDollarsPerWell', + 'ThousandFeet': + 'Foot', + 'FeetPerWell': + 'Foot', + 'Cost': + 'USDollar', + 'Index1982-1984=100': + '', + '2017=1.00000': + '', + 'NumberOfRigs': + '', + 'Number': + '', + 'Real(1982-1984)DollarsPerGallon': + 'USDollarPerGallon', + 'Real(1982-1984)DollarsPerMillionBtu': + 'USDollarPerMillionBtu', + 'DollarsPerMillionBtu': + 'USDollarPerMillionBtu', + 'Real(1982-1984)CentsPerKilowatthour': + 'USCentPerKilowattHour', + 'Real(1982-1984)DollarsPerThousandCubicFeet': + 'USDollarPerThousandCubicFeet', + 'MetricTonsCarbonDioxidePerMillionChained(2017)Dollars': + 'MetricTonsCarbonDioxidePerMillionChainedUSDollars', + 'ThousandBtuPerChained(2017)Dollar': + 'BtuPerChainedUSDollar', + 'BillionChained(2017)Dollars': + 'ChainedUSDollar', + 'CentsPerKilowatthour,IncludingTaxes': + 'CentsPerKilowattHour', + 'TrillionBtu': + 'Btu', + 'MillionGallons': + 'USGallon', + 'MillionPeople': + '', + 'MillionNominalDollars': + 'NominalUSDollar', + 'NominalDollars': + 'NominalUSDollar', + 'DollarsPerGallonIncludingTaxes': + 'USDollarPerGallon', + 'DollarsPerGallonExcludingTaxes': + 'USDollarPerGallon', + 'DollarsPerMillionBtu,IncludingTaxes': + 'USDollarPerMillionBtu' +} + +UNIT_CONVERT_DICT = { + 'ThousandCubicFeet': 1000, + 'ThousandBtuPerChained(2017)Dollar': 1000, + 'Thousand': 1000, + 'ThousandFeet': 1000, + 'ThousandDollars': 1000, + 'ThousandGallons': 1000, + 'ThousandBarrels': 1000, + 'ThousandsOfRegisteredVehicles': 1000, + 'MillionDollars': 1000000, + 'MillionPeople': 1000000, + 'MillionNominalDollars': 1000000, + 'MillionGallons': 1000000, + 'ThousandKilowatts': 1000, + 'MillionPounds': 1000000, + 'BillionDollars': 10000000000, + 'BillionChained(2017)Dollars': 10000000000, + 'TrillionBtu': 1000000000000 +} _COLUMNS = [ 'place', 'stat_var', 'date', 'value', 'unit', 'scaling_factor', - 'eia_series_id' + 'eia_series_id', 'measurementMethod' ] _TMCF_STRING = """ @@ -49,6 +203,7 @@ unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod """ _DATE_RE = re.compile('[0-9WMQ]') @@ -82,11 +237,13 @@ def _parse_date(d): m_or_q = d[4:] if m_or_q.startswith('Q'): + #print("withQ",yr + '-' + _QUARTER_MAP[m_or_q]) # Quarterly if m_or_q in _QUARTER_MAP: return yr + '-' + _QUARTER_MAP[m_or_q] else: # Monthly + #print("withOutQ",yr + '-' + m_or_q) return yr + '-' + m_or_q if len(d) == 8: @@ -106,15 +263,28 @@ def _sv_dcid(raw_sv): return 'eia/' + raw_sv -def _enumify(in_str): - return in_str.title().replace(' ', '') +def _check_unit_with_mapping(in_str): + if in_str in UNIT_MAPPING_DICT: + in_str = UNIT_MAPPING_DICT[in_str] + return in_str -def _print_counters(counters): - print('\nSTATS:') - for k in sorted(counters): - print(f"\t{k} = {counters[k]}") - print('') +def _check_mMethod_with_mapping(in_str): + if in_str in MMETHOD_MAPPING_DICT: + in_str = MMETHOD_MAPPING_DICT[in_str] + else: + in_str = "" + return in_str + + +def _unitConvert(unit, value): + if unit in UNIT_CONVERT_DICT: + value = float(value) * UNIT_CONVERT_DICT[unit] + return value + + +def _enumify(in_str): + return in_str.title().replace(' ', '') def _find_dc_place(raw_place, is_us_place, counters): @@ -147,7 +317,7 @@ def _find_dc_place(raw_place, is_us_place, counters): return 'Earth' # logging.error('ERROR: unsupported place %s %r', raw_place, is_us_place) - counters[f'error_unsupported_places_{raw_place}'] += 1 + counters.add_counter(f'error_unsupported_places_{raw_place}', 1) return None @@ -216,28 +386,31 @@ def _maybe_parse_name(name, raw_place, is_us_place, counters): # If we didn't find the name for the place, likely the name doesn't include # the place (e.g., TOTAL). - counters['info_unmodified_names'] += 1 + counters.add_counter('info_unmodified_names', 1) return cleanup_name(name) def _generate_sv_nodes(dataset, sv_map, sv_name_map, sv_membership_map, sv_schemaful2raw, svg_info): nodes = [] - for sv, mcf in sv_map.items(): - raw_sv = sv_schemaful2raw[sv] if sv in sv_schemaful2raw else sv + try: + for sv, mcf in sv_map.items(): + raw_sv = sv_schemaful2raw[sv] if sv in sv_schemaful2raw else sv - pvs = [mcf] - if raw_sv in sv_name_map: - pvs.append(f'name: "{sv_name_map[raw_sv]}"') + pvs = [mcf] + if raw_sv in sv_name_map: + pvs.append(f'name: "{sv_name_map[raw_sv]}"') - if dataset == 'NUC_STATUS': - pvs.append(f'memberOf: dcid:{category.NUC_STATUS_ROOT}') - if raw_sv in sv_membership_map: - for svg in sorted(sv_membership_map[raw_sv]): - if svg in svg_info: - pvs.append(f'memberOf: dcid:{svg}') + if dataset == 'NUC_STATUS': + pvs.append(f'memberOf: dcid:{category.NUC_STATUS_ROOT}') + if raw_sv in sv_membership_map: + for svg in sorted(sv_membership_map[raw_sv]): + if svg in svg_info: + pvs.append(f'memberOf: dcid:{svg}') - nodes.append('\n'.join(pvs)) + nodes.append('\n'.join(pvs)) + except Exception as e: + logging.fatal(f"error while generating the SV nodes,{sv_name_map} -{e}") return nodes @@ -286,16 +459,21 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf, counters = defaultdict(lambda: 0) sv_map = {} sv_name_map = {} - with open(in_json) as in_fp, open(out_csv, 'w', newline='') as csv_fp: + counters = Counters() + counters.add_counter('total', file_util.file_estimate_num_rows(in_json)) + with file_util.FileIO(in_json) as in_fp, open(out_csv, 'w', + newline='') as csv_fp: + #with open(in_json) as in_fp, open(out_csv, 'w', newline='') as csv_fp: csvwriter = csv.DictWriter(csv_fp, fieldnames=_COLUMNS) csvwriter.writeheader() for line in in_fp: - counters['info_lines_processed'] += 1 - if counters['info_lines_processed'] % 100000 == 99999: - _print_counters(counters) + counters.add_counter('processed', 1) + if not line.startswith('{'): + continue data = json.loads(line) + logging.info(f"Loaded data: {data}") # Preliminary checks series_id = data.get('series_id', None) @@ -303,28 +481,30 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf, category.process_category(dataset, data, extract_place_statvar_fn, svg_info, sv_membership_map, counters) - counters['info_categories_processed'] += 1 + continue time_series = data.get('data', None) if not time_series: - counters['error_missing_time_series'] += 1 + counters.add_counter('error_missing_time_series', 1) continue # Extract raw place and stat-var from series_id. (raw_place, raw_sv, is_us_place) = extract_place_statvar_fn(series_id, counters) if not raw_place or not raw_sv: - counters['error_extract_place_sv'] += 1 + counters.add_counter('error_extract_place_sv', 1) continue # Map raw place to DC place dc_place = _find_dc_place(raw_place, is_us_place, counters) if not dc_place: - counters['error_place_mapping'] += 1 + counters.add_counter('error_place_mapping', 1) continue raw_unit = _enumify(data.get('units', '')) + dc_unit = _check_unit_with_mapping(raw_unit) + m_method = _check_mMethod_with_mapping(raw_unit) if raw_sv not in sv_name_map: name = _maybe_parse_name(data.get('name', ''), raw_place, @@ -352,26 +532,27 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf, # TODO: Handle some these better. _ = float(v) except Exception: - counters['error_non_numeric_values'] += 1 + counters.add_counter('error_non_numeric_values', 1) continue dt = _parse_date(k) if not dt: logging.error('ERROR: failed to parse date "%s"', k) - counters['error_date_parsing'] += 1 + counters.add_counter('error_date_parsing', 1) continue rows.append({ 'place': f"dcid:{dc_place}", 'stat_var': f"dcid:{_sv_dcid(raw_sv)}", 'date': dt, - 'value': v, + 'value': _unitConvert(raw_unit, v), 'eia_series_id': series_id, - 'unit': raw_unit, + 'unit': dc_unit, + 'measurementMethod': m_method }) if not rows: - counters['error_empty_series'] += 1 + counters.add_counter('error_empty_series', 1) continue schema_sv = None @@ -380,14 +561,13 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf, counters) if schema_sv: sv_schemaful2raw[schema_sv] = raw_sv - counters['info_schemaful_series'] += 1 + counters.add_counter('info_schemaful_series', 1) else: - counters['info_schemaless_series'] += 1 + counters.add_counter('info_schemaless_series', 1) _generate_default_statvar(raw_sv, sv_map) csvwriter.writerows(rows) - counters['info_rows_output'] += len(rows) - + counters.add_counter('info_rows_output', len(rows)) category.trim_area_categories(svg_info, counters) with open(out_sv_mcf, 'w') as out_fp: @@ -407,5 +587,4 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf, with open(out_tmcf, 'w') as out_fp: out_fp.write(_TMCF_STRING) - print('=== FINAL COUNTERS ===') - _print_counters(counters) + logging.info(f"FINAL COUNTERS ") diff --git a/scripts/us_eia/opendata/process/elec.py b/scripts/us_eia/opendata/process/elec.py index fade060fd3..1258b0344a 100644 --- a/scripts/us_eia/opendata/process/elec.py +++ b/scripts/us_eia/opendata/process/elec.py @@ -30,12 +30,15 @@ def extract_place_statvar(series_id, counters): """ if series_id.startswith('ELEC.PLANT.'): - counters['error_unimplemented_plant_series'] += 1 + counters.add_counter('error_unimplemented_plant_series', 1) return (None, None, None) # ELEC.{MEASURE}.{FUEL_TYPE}-{PLACE}-{PRODUCER_SECTOR}.{PERIOD} + #m = re.match(r"^ELEC\.([^.]+)\.([^-]+)-([^-]+)-([^.]+)\.([AQM])$", + # series_id) m = re.match(r"^ELEC\.([^.]+)\.([^-]+)-([^-]+)-([^.]+)\.([AQM])$", series_id) + if m: measure = m.group(1) fuel_type = m.group(2) @@ -47,7 +50,7 @@ def extract_place_statvar(series_id, counters): # ELEC.{MEASURE}.{PLACE}-{CONSUMER_SECTOR}.{PERIOD} m = re.match(r"^ELEC\.([^.]+)\.([^-]+)-([^.]+)\.([AQM])$", series_id) if not m: - counters['error_unparsable_series'] += 1 + counters.add_counter('error_unparsable_series', 1) return (None, None) measure = m.group(1) @@ -222,7 +225,7 @@ def extract_place_statvar(series_id, counters): 'CONS_EG': (_PLACEHOLDER_FUEL_UNIT, '', 1000), 'CONS_EG_BTU': ('MMBtu', '', 1000000), 'COST': (_PLACEHOLDER_FUEL_UNIT, '', 1), - 'COST_BTU': ('MMBtu', '', 1), + 'COST_BTU': ('USDollarPerMMBtu', '', 1), 'CUSTOMERS': ('', '', 1), 'GEN': ('GigawattHour', '', 1), 'PRICE': ('USCentPerKilowattHour', '', 1), @@ -274,7 +277,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): # ELEC.{MEASURE}.{CONSUMER_SECTOR}.{PERIOD} m = re.match(r"^ELEC\.([^.]+)\.([^.]+)\.([AQM])$", raw_sv) if not m: - counters['error_unparsable_raw_statvar'] += 1 + counters.add_counter('error_unparsable_raw_statvar', 1) return None measure = m.group(1) consuming_sector = m.group(2) @@ -285,7 +288,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): # Get popType and mprop based on measure. measure_pvs = _MEASURE_MAP.get(measure, None) if not measure_pvs: - counters['error_missing_measure'] += 1 + counters.add_counter('error_missing_measure', 1) return None sv_id_parts = [common.PERIOD_MAP[period], measure_pvs[0]] @@ -300,7 +303,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): if not es: logging.error('Missing energy source: %s from %s', fuel_type, raw_sv) - counters['error_missing_fuel_type'] += 1 + counters.add_counter('error_missing_fuel_type', 1) return None if es != 'ALL': sv_id_parts.append(es) @@ -312,7 +315,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): if producing_sector: ps = _PRODUCING_SECTOR.get(producing_sector, None) if not ps: - counters['error_missing_producing_sector'] += 1 + counters.add_counter('error_missing_producing_sector', 1) return None if ps != 'ALL': sv_id_parts.append(ps) @@ -324,20 +327,20 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): if consuming_sector: cs = _CONSUMING_SECTOR.get(consuming_sector, None) if not cs: - counters['error_missing_consuming_sector'] += 1 + counters.add_counter('error_missing_consuming_sector', 1) return None if cs != 'ALL': sv_id_parts.append(cs) sv_pvs.append(f'consumingSector: dcs:{cs}') if measure not in _UNIT_MAP: - counters['error_missing_unit'] += 1 + counters.add_counter('error_missing_unit', 1) return None (unit, sfactor, multiplier) = _UNIT_MAP[measure] if unit == _PLACEHOLDER_FUEL_UNIT: if not fuel_type: - counters['error_missing_unit_fuel_type'] += 1 + counters.add_counter('error_missing_unit_fuel_type', 1) return None unit = _get_fuel_unit(fuel_type) if measure == 'COST': diff --git a/scripts/us_eia/opendata/process/nuclear.py b/scripts/us_eia/opendata/process/nuclear.py index c21d253fe9..8ad2f0b291 100644 --- a/scripts/us_eia/opendata/process/nuclear.py +++ b/scripts/us_eia/opendata/process/nuclear.py @@ -102,7 +102,7 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): Returns schema-ful stat-var ID if schema was generated, None otherwise. """ - counters['generate_statvar_schema'] += 1 + counters.add_counter('generate_statvar_schema', 1) # NUC_STATUS.{Measure}.{Period} m = re.match(r"^NUC_STATUS\.([^.]+)\.(D)$", raw_sv) @@ -110,21 +110,21 @@ def generate_statvar_schema(raw_sv, rows, sv_map, counters): measure = m.group(1) period = m.group(2) else: - counters['error_unparsable_raw_statvar'] += 1 + counters.add_counter('error_unparsable_raw_statvar', 1) return None - counters[f'measure-{measure}'] += 1 + counters.add_counter(f'measure-{measure}', 1) # Get popType and mprop based on measure. measure_pvs = _SV_MAP.get(measure, None) if not measure_pvs: - counters[f'error_missing_measure-{measure}'] += 1 + counters.add_counter(f'error_missing_measure-{measure}', 1) return None sv_id = measure_pvs[0] sv_pvs = measure_pvs[1:] if measure not in _UNIT_MAP: - counters[f'error_missing_unit-{measure}'] += 1 + counters.add_counter(f'error_missing_unit-{measure}', 1) return None (unit, sfactor) = _UNIT_MAP[measure] diff --git a/scripts/us_eia/opendata/process/pet.py b/scripts/us_eia/opendata/process/pet.py index b795fb4e1c..9740a7bda7 100644 --- a/scripts/us_eia/opendata/process/pet.py +++ b/scripts/us_eia/opendata/process/pet.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/us_eia/opendata/process/test_data/categories.csv b/scripts/us_eia/opendata/process/test_data/categories.csv index 2678990eda..11d61d34c0 100644 --- a/scripts/us_eia/opendata/process/test_data/categories.csv +++ b/scripts/us_eia/opendata/process/test_data/categories.csv @@ -1,5 +1,5 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:country/USA,dcid:eia/NG.N9140_2.A,2020,30482049,MillionCubicFeet,,NG.N9140US2.A -dcid:country/USA,dcid:eia/NG.N9140_2.A,2019,31099061,MillionCubicFeet,,NG.N9140US2.A -dcid:country/USA,dcid:eia/NG.N9140_2.M,2021-02,3036972,MillionCubicFeet,,NG.N9140US2.M -dcid:country/USA,dcid:eia/NG.N9140_2.M,2021-01,3286266,MillionCubicFeet,,NG.N9140US2.M +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:country/USA,dcid:eia/NG.N9140_2.A,2020,30482049,MillionCubicFeet,,NG.N9140US2.A, +dcid:country/USA,dcid:eia/NG.N9140_2.A,2019,31099061,MillionCubicFeet,,NG.N9140US2.A, +dcid:country/USA,dcid:eia/NG.N9140_2.M,2021-02,3036972,MillionCubicFeet,,NG.N9140US2.M, +dcid:country/USA,dcid:eia/NG.N9140_2.M,2021-01,3286266,MillionCubicFeet,,NG.N9140US2.M, diff --git a/scripts/us_eia/opendata/process/test_data/categories.tmcf b/scripts/us_eia/opendata/process/test_data/categories.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/categories.tmcf +++ b/scripts/us_eia/opendata/process/test_data/categories.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod diff --git a/scripts/us_eia/opendata/process/test_data/coal.csv b/scripts/us_eia/opendata/process/test_data/coal.csv index 6b31796c42..c29aa9febf 100644 --- a/scripts/us_eia/opendata/process/test_data/coal.csv +++ b/scripts/us_eia/opendata/process/test_data/coal.csv @@ -1,12 +1,12 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:geoId/01,dcid:Quarterly_Average_AshContent_Coal_For_ElectricUtility,2020-12,6.744021229492053,,100,COAL.ASH_CONTENT.AL-1.Q -dcid:geoId/01,dcid:Quarterly_Average_AshContent_Coal_For_ElectricUtility,2020-09,6.767757786979022,,100,COAL.ASH_CONTENT.AL-1.Q -dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2002,0,,100,COAL.ASH_CONTENT.KY-8.A -dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2001,0,,100,COAL.ASH_CONTENT.KY-8.A -dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2000,0,,100,COAL.ASH_CONTENT.KY-8.A -dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-09,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q -dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-06,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q -dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-03,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q -dcid:geoId/36,dcid:Annual_Receipt_Coal_ElectricUtilityNonCogen,2008,8236048,dcid:ShortTon,,COAL.RECEIPTS.NY-2.A -dcid:geoId/12,dcid:Quarterly_Stock_Coal_ElectricUtility,2008-03,4067084,dcid:ShortTon,,COAL.STOCKS.FL-1.Q -dcid:geoId/46,dcid:Annual_Average_SulfurContent_Coal_For_ElectricUtility,2008,0.31,,100,COAL.SULFUR_CONTENT.SD-1.A +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:geoId/01,dcid:Quarterly_Average_AshContent_Coal_For_ElectricUtility,2020-12,6.744021229492053,,100,COAL.ASH_CONTENT.AL-1.Q, +dcid:geoId/01,dcid:Quarterly_Average_AshContent_Coal_For_ElectricUtility,2020-09,6.767757786979022,,100,COAL.ASH_CONTENT.AL-1.Q, +dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2002,0,,100,COAL.ASH_CONTENT.KY-8.A, +dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2001,0,,100,COAL.ASH_CONTENT.KY-8.A, +dcid:geoId/21,dcid:Annual_Average_AshContent_Coal_For_CommercialAndInstitutional,2000,0,,100,COAL.ASH_CONTENT.KY-8.A, +dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-09,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q, +dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-06,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q, +dcid:geoId/17,dcid:Quarterly_Average_HeatContent_Coal_For_CommercialAndInstitutional,2000-03,0,dcid:BtuPerPound,,COAL.HEAT_CONTENT.IL-8.Q, +dcid:geoId/36,dcid:Annual_Receipt_Coal_ElectricUtilityNonCogen,2008,8236048,dcid:ShortTon,,COAL.RECEIPTS.NY-2.A, +dcid:geoId/12,dcid:Quarterly_Stock_Coal_ElectricUtility,2008-03,4067084,dcid:ShortTon,,COAL.STOCKS.FL-1.Q, +dcid:geoId/46,dcid:Annual_Average_SulfurContent_Coal_For_ElectricUtility,2008,0.31,,100,COAL.SULFUR_CONTENT.SD-1.A, diff --git a/scripts/us_eia/opendata/process/test_data/coal.tmcf b/scripts/us_eia/opendata/process/test_data/coal.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/coal.tmcf +++ b/scripts/us_eia/opendata/process/test_data/coal.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod diff --git a/scripts/us_eia/opendata/process/test_data/elec.csv b/scripts/us_eia/opendata/process/test_data/elec.csv index 55071994a6..47c39f5acd 100644 --- a/scripts/us_eia/opendata/process/test_data/elec.csv +++ b/scripts/us_eia/opendata/process/test_data/elec.csv @@ -1,10 +1,10 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-02,33.52617,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M -dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-01,33.77782,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M -dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-03,0.0,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M -dcid:geoId/25,dcid:Quarterly_RetailSales_Electricity_Residential,2021-03,1809.70299,dcid:GigawattHour,,ELEC.SALES.MA-RES.Q -dcid:geoId/25,dcid:Quarterly_RetailSales_Electricity_Residential,2021-06,1956.15091,dcid:GigawattHour,,ELEC.SALES.MA-RES.Q -dcid:geoId/05,dcid:Quarterly_Consumption_Fuel_ForElectricityGeneration_Coal_ElectricUtilityNonCogen,2021-06,6586120.0,dcid:MMBtu,,ELEC.CONS_EG_BTU.COW-AR-2.Q -dcid:geoId/05,dcid:Quarterly_Consumption_Fuel_ForElectricityGeneration_Coal_ElectricUtilityNonCogen,2021-03,10431100.0,dcid:MMBtu,,ELEC.CONS_EG_BTU.COW-AR-2.Q -dcid:geoId/06,dcid:Monthly_SalesRevenue_Electricity,2021-06,4523307770.0,dcid:USDollar,,ELEC.REV.CA-ALL.M -dcid:geoId/06,dcid:Monthly_SalesRevenue_Electricity,2021-05,3461923010.0,dcid:USDollar,,ELEC.REV.CA-ALL.M +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-02,33.52617,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M, +dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-01,33.77782,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M, +dcid:geoId/24,dcid:Monthly_Generation_Electricity_Solar_IndependentPowerProducers,2021-03,0.0,dcid:GigawattHour,,ELEC.GEN.TSN-MD-94.M, +dcid:geoId/25,dcid:Quarterly_RetailSales_Electricity_Residential,2021-03,1809.70299,dcid:GigawattHour,,ELEC.SALES.MA-RES.Q, +dcid:geoId/25,dcid:Quarterly_RetailSales_Electricity_Residential,2021-06,1956.15091,dcid:GigawattHour,,ELEC.SALES.MA-RES.Q, +dcid:geoId/05,dcid:Quarterly_Consumption_Fuel_ForElectricityGeneration_Coal_ElectricUtilityNonCogen,2021-06,6586120.0,dcid:MMBtu,,ELEC.CONS_EG_BTU.COW-AR-2.Q, +dcid:geoId/05,dcid:Quarterly_Consumption_Fuel_ForElectricityGeneration_Coal_ElectricUtilityNonCogen,2021-03,10431100.0,dcid:MMBtu,,ELEC.CONS_EG_BTU.COW-AR-2.Q, +dcid:geoId/06,dcid:Monthly_SalesRevenue_Electricity,2021-06,4523307770000000.0,dcid:USDollar,,ELEC.REV.CA-ALL.M, +dcid:geoId/06,dcid:Monthly_SalesRevenue_Electricity,2021-05,3461923010000000.0,dcid:USDollar,,ELEC.REV.CA-ALL.M, diff --git a/scripts/us_eia/opendata/process/test_data/elec.tmcf b/scripts/us_eia/opendata/process/test_data/elec.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/elec.tmcf +++ b/scripts/us_eia/opendata/process/test_data/elec.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod diff --git a/scripts/us_eia/opendata/process/test_data/intl.csv b/scripts/us_eia/opendata/process/test_data/intl.csv index fda1cd461a..6afb7ca9f9 100644 --- a/scripts/us_eia/opendata/process/test_data/intl.csv +++ b/scripts/us_eia/opendata/process/test_data/intl.csv @@ -1,7 +1,7 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2020,924.4588369430336,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A -dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2019,986.3134487671233,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A -dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2018,1017.5592096438356,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A -dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2017,1017.3772797808219,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A -dcid:Earth,dcid:eia/INTL.55-1-TBPD.A,2020,91753.99016207967,ThousandBarrelsPerDay,,INTL.55-1-WORL-TBPD.A -dcid:Earth,dcid:eia/INTL.55-1-TBPD.A,2019,97993.61794135909,ThousandBarrelsPerDay,,INTL.55-1-WORL-TBPD.A +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2020,924.4588369430336,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A, +dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2019,986.3134487671233,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A, +dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2018,1017.5592096438356,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A, +dcid:country/IND,dcid:eia/INTL.53-1-TBPD.A,2017,1017.3772797808219,ThousandBarrelsPerDay,,INTL.53-1-IND-TBPD.A, +dcid:Earth,dcid:eia/INTL.55-1-TBPD.A,2020,91753.99016207967,ThousandBarrelsPerDay,,INTL.55-1-WORL-TBPD.A, +dcid:Earth,dcid:eia/INTL.55-1-TBPD.A,2019,97993.61794135909,ThousandBarrelsPerDay,,INTL.55-1-WORL-TBPD.A, diff --git a/scripts/us_eia/opendata/process/test_data/intl.tmcf b/scripts/us_eia/opendata/process/test_data/intl.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/intl.tmcf +++ b/scripts/us_eia/opendata/process/test_data/intl.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod diff --git a/scripts/us_eia/opendata/process/test_data/ng.csv b/scripts/us_eia/opendata/process/test_data/ng.csv index a1722b09cf..96af54c914 100644 --- a/scripts/us_eia/opendata/process/test_data/ng.csv +++ b/scripts/us_eia/opendata/process/test_data/ng.csv @@ -1,12 +1,12 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:geoId/01,dcid:eia/NG.N3035_4.M,2021-02,26.1,Percent,,NG.N3035AL4.M -dcid:geoId/01,dcid:eia/NG.N3035_4.M,2021-01,25.2,Percent,,NG.N3035AL4.M -dcid:geoId/01,dcid:eia/NG.N3035_4.M,2020-12,24.7,Percent,,NG.N3035AL4.M -dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2008,6,MillionBarrels,,NG.RL2R02SOK_1.A -dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2007,-4,MillionBarrels,,NG.RL2R02SOK_1.A -dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2006,13,MillionBarrels,,NG.RL2R02SOK_1.A -dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2005,16,MillionBarrels,,NG.RL2R02SOK_1.A -dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2004,40,MillionBarrels,,NG.RL2R02SOK_1.A -dcid:country/USA,dcid:eia/NG.NA1350_2.A,2019,58084,MillionCubicFeet,,NG.NA1350_NUS_2.A -dcid:country/USA,dcid:eia/NG.NA1350_2.A,2018,9248,MillionCubicFeet,,NG.NA1350_NUS_2.A -dcid:country/USA,dcid:eia/NG.NA1350_2.A,2017,-256,MillionCubicFeet,,NG.NA1350_NUS_2.A +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:geoId/01,dcid:eia/NG.N3035_4.M,2021-02,26.1,Percent,,NG.N3035AL4.M, +dcid:geoId/01,dcid:eia/NG.N3035_4.M,2021-01,25.2,Percent,,NG.N3035AL4.M, +dcid:geoId/01,dcid:eia/NG.N3035_4.M,2020-12,24.7,Percent,,NG.N3035AL4.M, +dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2008,6,MillionsBarrels,,NG.RL2R02SOK_1.A, +dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2007,-4,MillionsBarrels,,NG.RL2R02SOK_1.A, +dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2006,13,MillionsBarrels,,NG.RL2R02SOK_1.A, +dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2005,16,MillionsBarrels,,NG.RL2R02SOK_1.A, +dcid:geoId/40,dcid:eia/NG.RL2R02_1.A,2004,40,MillionsBarrels,,NG.RL2R02SOK_1.A, +dcid:country/USA,dcid:eia/NG.NA1350_2.A,2019,58084,MillionCubicFeet,,NG.NA1350_NUS_2.A, +dcid:country/USA,dcid:eia/NG.NA1350_2.A,2018,9248,MillionCubicFeet,,NG.NA1350_NUS_2.A, +dcid:country/USA,dcid:eia/NG.NA1350_2.A,2017,-256,MillionCubicFeet,,NG.NA1350_NUS_2.A, diff --git a/scripts/us_eia/opendata/process/test_data/ng.tmcf b/scripts/us_eia/opendata/process/test_data/ng.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/ng.tmcf +++ b/scripts/us_eia/opendata/process/test_data/ng.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod diff --git a/scripts/us_eia/opendata/process/test_data/nuc_status.csv b/scripts/us_eia/opendata/process/test_data/nuc_status.csv index d1304b4d04..bd3eb5649b 100644 --- a/scripts/us_eia/opendata/process/test_data/nuc_status.csv +++ b/scripts/us_eia/opendata/process/test_data/nuc_status.csv @@ -1,13 +1,13 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2021-05-11,1197.1,dcid:Megawatt,,NUC_STATUS.CAP.4046.D -dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2021-05-10,1197.1,dcid:Megawatt,,NUC_STATUS.CAP.4046.D -dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2007-01-05,1036,dcid:Megawatt,,NUC_STATUS.CAP.4046.D -dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-11,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D -dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-10,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D -dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2007-01-01,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D -dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-11,73,,,NUC_STATUS.OUT_PCT.869-2.D -dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-10,80,,,NUC_STATUS.OUT_PCT.869-2.D -dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-09,75,,,NUC_STATUS.OUT_PCT.869-2.D -dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-11,16404.767,dcid:Megawatt,,NUC_STATUS.OUT.US.D -dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-10,16960.869,dcid:Megawatt,,NUC_STATUS.OUT.US.D -dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-09,17374.955,dcid:Megawatt,,NUC_STATUS.OUT.US.D +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2021-05-11,1197.1,dcid:Megawatt,,NUC_STATUS.CAP.4046.D, +dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2021-05-10,1197.1,dcid:Megawatt,,NUC_STATUS.CAP.4046.D, +dcid:eia/pp/4046,dcid:Daily_Capacity_Nuclear_ForEnergyGeneration,2007-01-05,1036,dcid:Megawatt,,NUC_STATUS.CAP.4046.D, +dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-11,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D, +dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-10,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D, +dcid:eia/pp/621,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2007-01-01,0,dcid:Megawatt,,NUC_STATUS.OUT.621.D, +dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-11,73,,,NUC_STATUS.OUT_PCT.869-2.D, +dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-10,80,,,NUC_STATUS.OUT_PCT.869-2.D, +dcid:eia/pp/869-2,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration_AsAFractionOf_Capacity,2021-05-09,75,,,NUC_STATUS.OUT_PCT.869-2.D, +dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-11,16404.767,dcid:Megawatt,,NUC_STATUS.OUT.US.D, +dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-10,16960.869,dcid:Megawatt,,NUC_STATUS.OUT.US.D, +dcid:country/USA,dcid:Daily_CapacityOutage_Nuclear_ForEnergyGeneration,2021-05-09,17374.955,dcid:Megawatt,,NUC_STATUS.OUT.US.D, diff --git a/scripts/us_eia/opendata/process/test_data/nuc_status.tmcf b/scripts/us_eia/opendata/process/test_data/nuc_status.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/nuc_status.tmcf +++ b/scripts/us_eia/opendata/process/test_data/nuc_status.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod diff --git a/scripts/us_eia/opendata/process/test_data/pet.csv b/scripts/us_eia/opendata/process/test_data/pet.csv index 05d459da20..894f1122a8 100644 --- a/scripts/us_eia/opendata/process/test_data/pet.csv +++ b/scripts/us_eia/opendata/process/test_data/pet.csv @@ -1,8 +1,8 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2019,40989,ThousandGallons,,PET.KDLVISSCO1.A -dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2018,37954,ThousandGallons,,PET.KDLVISSCO1.A -dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2019,3,MillionBarrels,,PET.RCRR06SLA_1.A -dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2018,6,MillionBarrels,,PET.RCRR06SLA_1.A -dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2017,29,MillionBarrels,,PET.RCRR06SLA_1.A -dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-11-30,32.2,Percent,,PET.M_EPC0_SPT_NUS_PER.W -dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-12-06,33.1,Percent,,PET.M_EPC0_SPT_NUS_PER.W +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2019,40989000.0,USGallon,,PET.KDLVISSCO1.A, +dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2018,37954000.0,USGallon,,PET.KDLVISSCO1.A, +dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2019,3,MillionsBarrels,,PET.RCRR06SLA_1.A, +dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2018,6,MillionsBarrels,,PET.RCRR06SLA_1.A, +dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2017,29,MillionsBarrels,,PET.RCRR06SLA_1.A, +dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-11-30,32.2,Percent,,PET.M_EPC0_SPT_NUS_PER.W, +dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-12-06,33.1,Percent,,PET.M_EPC0_SPT_NUS_PER.W, diff --git a/scripts/us_eia/opendata/process/test_data/pet.tmcf b/scripts/us_eia/opendata/process/test_data/pet.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/pet.tmcf +++ b/scripts/us_eia/opendata/process/test_data/pet.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod diff --git a/scripts/us_eia/opendata/process/test_data/seds.csv b/scripts/us_eia/opendata/process/test_data/seds.csv index 25dd6e8f74..06b1a06881 100644 --- a/scripts/us_eia/opendata/process/test_data/seds.csv +++ b/scripts/us_eia/opendata/process/test_data/seds.csv @@ -1,5 +1,5 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:geoId/06,dcid:eia/SEDS.TNISB.A,2018,1053194,BillionBtu,,SEDS.TNISB.CA.A -dcid:geoId/06,dcid:eia/SEDS.TNISB.A,2017,1056142,BillionBtu,,SEDS.TNISB.CA.A -dcid:country/USA,dcid:eia/SEDS.WXICD.A,2019,29.79,DollarsPerMillionBtu,,SEDS.WXICD.US.A -dcid:country/USA,dcid:eia/SEDS.WXICD.A,2018,32.94,DollarsPerMillionBtu,,SEDS.WXICD.US.A +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:geoId/06,dcid:eia/SEDS.TNISB.A,2018,1053194,BillionBtu,,SEDS.TNISB.CA.A, +dcid:geoId/06,dcid:eia/SEDS.TNISB.A,2017,1056142,BillionBtu,,SEDS.TNISB.CA.A, +dcid:country/USA,dcid:eia/SEDS.WXICD.A,2019,29.79,USDollarPerMillionBtu,,SEDS.WXICD.US.A, +dcid:country/USA,dcid:eia/SEDS.WXICD.A,2018,32.94,USDollarPerMillionBtu,,SEDS.WXICD.US.A, diff --git a/scripts/us_eia/opendata/process/test_data/seds.tmcf b/scripts/us_eia/opendata/process/test_data/seds.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/seds.tmcf +++ b/scripts/us_eia/opendata/process/test_data/seds.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod diff --git a/scripts/us_eia/opendata/process/test_data/total.csv b/scripts/us_eia/opendata/process/test_data/total.csv index e7bc7babbb..8426b42903 100644 --- a/scripts/us_eia/opendata/process/test_data/total.csv +++ b/scripts/us_eia/opendata/process/test_data/total.csv @@ -1,3 +1,3 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id -dcid:country/USA,dcid:eia/TOTAL.LUACP.A,2020,52.852,ThousandBarrelsPerDay,,TOTAL.LUACPUS.A -dcid:country/USA,dcid:eia/TOTAL.LUACP.A,2019,59.325,ThousandBarrelsPerDay,,TOTAL.LUACPUS.A +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:country/USA,dcid:eia/TOTAL.LUACP.A,2020,52.852,ThousandBarrelsPerDay,,TOTAL.LUACPUS.A, +dcid:country/USA,dcid:eia/TOTAL.LUACP.A,2019,59.325,ThousandBarrelsPerDay,,TOTAL.LUACPUS.A, diff --git a/scripts/us_eia/opendata/process/test_data/total.tmcf b/scripts/us_eia/opendata/process/test_data/total.tmcf index e1ef4499a7..f198290a7a 100644 --- a/scripts/us_eia/opendata/process/test_data/total.tmcf +++ b/scripts/us_eia/opendata/process/test_data/total.tmcf @@ -8,3 +8,4 @@ value: C:EIATable->value unit: C:EIATable->unit scalingFactor: C:EIATable->scaling_factor eiaSeriesId: C:EIATable->eia_series_id +measurementMethod: C:EIATable->measurementMethod From 80ac8dc44b3f1032f0a5feeb62249f2bf5a96ac4 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Thu, 30 Jan 2025 13:16:09 +0000 Subject: [PATCH 02/18] Resloved PR comments --- scripts/us_eia/opendata/generate_jsonl_for_bq.py | 4 ---- scripts/us_eia/opendata/process.py | 4 +++- scripts/us_eia/opendata/process/coal.py | 4 ++-- scripts/us_eia/opendata/process/common.py | 4 +--- scripts/us_eia/opendata/process/elec.py | 2 +- scripts/us_eia/opendata/process/nuclear.py | 4 ++-- scripts/us_eia/opendata/process/pet.py | 4 ++-- 7 files changed, 11 insertions(+), 15 deletions(-) diff --git a/scripts/us_eia/opendata/generate_jsonl_for_bq.py b/scripts/us_eia/opendata/generate_jsonl_for_bq.py index af1579277c..9eeb81796b 100644 --- a/scripts/us_eia/opendata/generate_jsonl_for_bq.py +++ b/scripts/us_eia/opendata/generate_jsonl_for_bq.py @@ -103,16 +103,12 @@ def process_all(): for file in sorted(files): if not file.endswith('.txt'): continue - print(f'Processing1 {subdir}/{file}') - process_single(subdir, file) if __name__ == '__main__': args = sys.argv[1:] if len(args) == 0: - print('Processing all files') process_all() else: - print(f'Processing {args[0]}/{args[1]}') process_single(args[0], args[1]) diff --git a/scripts/us_eia/opendata/process.py b/scripts/us_eia/opendata/process.py index 3d67164ba8..fe06786560 100644 --- a/scripts/us_eia/opendata/process.py +++ b/scripts/us_eia/opendata/process.py @@ -26,7 +26,7 @@ import sys import zipfile import requests - +from retry import retry from absl import flags from absl import app from absl import logging @@ -57,6 +57,7 @@ } +@retry(tries=5, delay=3, backoff=2) def download_file(url: str, save_path: str): try: r = requests.get(url, stream=True) @@ -66,6 +67,7 @@ def download_file(url: str, save_path: str): logging.fatal(f"error while downloading the file,{url} -{e}") +@retry(tries=5, delay=3, backoff=2) def download_manifest(): try: return requests.get(MANIFEST_URL).json() diff --git a/scripts/us_eia/opendata/process/coal.py b/scripts/us_eia/opendata/process/coal.py index c19d90990f..3c9c2a3165 100644 --- a/scripts/us_eia/opendata/process/coal.py +++ b/scripts/us_eia/opendata/process/coal.py @@ -13,13 +13,13 @@ # limitations under the License. """EIA Coal Dataset specific functions.""" -import logging +from absl import logging import re from . import common -def extract_place_statvar(series_id, counters): +def extract_place_statvar(series_id): """Given the series_id, extract the raw place and stat-var ID. Args: diff --git a/scripts/us_eia/opendata/process/common.py b/scripts/us_eia/opendata/process/common.py index 8954d80487..c5cf2b8664 100644 --- a/scripts/us_eia/opendata/process/common.py +++ b/scripts/us_eia/opendata/process/common.py @@ -16,7 +16,7 @@ import sys import csv import json -import logging +from absl import logging import re from collections import defaultdict from sys import path @@ -237,13 +237,11 @@ def _parse_date(d): m_or_q = d[4:] if m_or_q.startswith('Q'): - #print("withQ",yr + '-' + _QUARTER_MAP[m_or_q]) # Quarterly if m_or_q in _QUARTER_MAP: return yr + '-' + _QUARTER_MAP[m_or_q] else: # Monthly - #print("withOutQ",yr + '-' + m_or_q) return yr + '-' + m_or_q if len(d) == 8: diff --git a/scripts/us_eia/opendata/process/elec.py b/scripts/us_eia/opendata/process/elec.py index 1258b0344a..22968c7fe7 100644 --- a/scripts/us_eia/opendata/process/elec.py +++ b/scripts/us_eia/opendata/process/elec.py @@ -13,7 +13,7 @@ # limitations under the License. """EIA Electricity Dataset specific functions.""" -import logging +from absl import logging import re from . import common diff --git a/scripts/us_eia/opendata/process/nuclear.py b/scripts/us_eia/opendata/process/nuclear.py index 8ad2f0b291..2e70d6e6e8 100644 --- a/scripts/us_eia/opendata/process/nuclear.py +++ b/scripts/us_eia/opendata/process/nuclear.py @@ -13,13 +13,13 @@ # limitations under the License. """EIA Nuclear Status Dataset specific functions.""" -import logging +from absl import logging import re from . import common -def extract_place_statvar(series_id, counters): +def extract_place_statvar(series_id): """Given the series_id, extract the raw place and stat-var ID. Args: diff --git a/scripts/us_eia/opendata/process/pet.py b/scripts/us_eia/opendata/process/pet.py index 9740a7bda7..818ba46a7b 100644 --- a/scripts/us_eia/opendata/process/pet.py +++ b/scripts/us_eia/opendata/process/pet.py @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2021 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ def _parse_with_place_prefix(m): return (place, sv_id, in_us) -def extract_place_statvar(series_id, counters): +def extract_place_statvar(series_id): """Given the series_id, extract the raw place and stat-var ID. Args: From cbeb28fa75cde0a16a4c5b88c08b896bf7f620c4 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Fri, 31 Jan 2025 06:03:44 +0000 Subject: [PATCH 03/18] fixed lint --- scripts/us_eia/opendata/manifest.json | 48 ++++++++++++------- .../us_eia/opendata/process/common_test.py | 2 +- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json index 221c340c1f..1144ac4090 100644 --- a/scripts/us_eia/opendata/manifest.json +++ b/scripts/us_eia/opendata/manifest.json @@ -2,7 +2,9 @@ "import_specifications": [ { "import_name": "EIA_Coal", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/qb.php?category=717234", "provenance_description": "Coal dataset has country, state-level level information .", "scripts": [ @@ -14,11 +16,13 @@ "cleaned_csv": "tmp_raw_data/COAL/COAL.csv" } ], - "cron_schedule": "0 6 1 2 *" + "cron_schedule": "0 6 5,20 * *" }, { "import_name": "EIA_Electricity", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", "provenance_description": "Electricity dataset has country, state-level and plant-level information on electricity generation, consumption, sales etc by energy source and “sectors” (like residential, commercial, etc.).", "scripts": [ @@ -30,11 +34,13 @@ "cleaned_csv": "tmp_raw_data/ELEC/ELEC.csv" } ], - "cron_schedule": "0 8 1 2 *" + "cron_schedule": "0 7 5,20 * *" }, { "import_name": "EIA_NaturalGas", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", "provenance_description": "Natural gas dataset has country and state-level data.", "scripts": [ @@ -46,11 +52,13 @@ "cleaned_csv": "tmp_raw_data/NG/NG.csv" } ], - "cron_schedule": "05 10 * * *" + "cron_schedule": "0 8 5,20 * *" }, { "import_name": "EIA_NuclearOutages", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", "provenance_description": "Nuclear outage dataset has nuclear-plant and national data about Nuclear energy generation capacity and planned outages.", "scripts": [ @@ -62,11 +70,13 @@ "cleaned_csv": "tmp_raw_data/NUC_STATUS/NUC_STATUS.csv" } ], - "cron_schedule": "01 9 * * *" + "cron_schedule": "0 9 5,20 * *" }, { "import_name": "EIA_Petroleum", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", "provenance_description": "EIA Petroleum dataset has country and state-level data.", "scripts": [ @@ -78,11 +88,13 @@ "cleaned_csv": "tmp_raw_data/PET/PET.csv" } ], - "cron_schedule": "5 9 2 2 *" + "cron_schedule": "0 10 5,20 * *" }, { "import_name": "EIA_International", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", "provenance_description": "EIA International Energy dataset has country, continent and world-level data.", "scripts": [ @@ -94,11 +106,13 @@ "cleaned_csv": "tmp_raw_data/INTL/INTL.csv" } ], - "cron_schedule": "1 7 * 1,4,7,10 *" + "cron_schedule": "0 11 5,20 * * *" }, { "import_name": "EIA_SEDS", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", "provenance_description": "EIA SEDS International Energy dataset has US country-level and state-level data.", "scripts": [ @@ -110,11 +124,13 @@ "cleaned_csv": "tmp_raw_data/SEDS/SEDS.csv" } ], - "cron_schedule": "0 0 1 1 *" + "cron_schedule": "0 12 5,20 * *" }, { "import_name": "EIA_TotalEnergy", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/v1/qb.php?category=0", "provenance_description": "Total Energy dataset has US country-level data.", "scripts": [ @@ -126,7 +142,7 @@ "cleaned_csv": "tmp_raw_data/TOTAL/TOTAL.csv" } ], - "cron_schedule": "0 0 1 * *" + "cron_schedule": "0 13 5,20 * *" } ] } \ No newline at end of file diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index 042927a6af..019f027a06 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -24,7 +24,7 @@ os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) -from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total +from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total # module_dir_ is the path to where this test is running from. module_dir_ = os.path.dirname(__file__) From df585fda31efa1e76418a27cabad1d577843d3e5 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Fri, 31 Jan 2025 08:51:21 +0000 Subject: [PATCH 04/18] fixed test data --- scripts/us_eia/opendata/process.py | 2 +- scripts/us_eia/opendata/process/common_test.py | 2 +- .../us_eia/opendata/process/test_data/pet.csv | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/scripts/us_eia/opendata/process.py b/scripts/us_eia/opendata/process.py index fe06786560..c6142c4059 100644 --- a/scripts/us_eia/opendata/process.py +++ b/scripts/us_eia/opendata/process.py @@ -31,7 +31,7 @@ from absl import app from absl import logging -from process import common, coal, elec, intl, ng, nuclear, pet, seds, total +from .process import common, coal, elec, intl, ng, nuclear, pet, seds, total MANIFEST_URL = "https://api.eia.gov/bulk/manifest.txt" diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index 019f027a06..042927a6af 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -24,7 +24,7 @@ os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) -from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total +from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total # module_dir_ is the path to where this test is running from. module_dir_ = os.path.dirname(__file__) diff --git a/scripts/us_eia/opendata/process/test_data/pet.csv b/scripts/us_eia/opendata/process/test_data/pet.csv index 894f1122a8..156723593c 100644 --- a/scripts/us_eia/opendata/process/test_data/pet.csv +++ b/scripts/us_eia/opendata/process/test_data/pet.csv @@ -1,8 +1,8 @@ -place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod -dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2019,40989000.0,USGallon,,PET.KDLVISSCO1.A, -dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2018,37954000.0,USGallon,,PET.KDLVISSCO1.A, -dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2019,3,MillionsBarrels,,PET.RCRR06SLA_1.A, -dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2018,6,MillionsBarrels,,PET.RCRR06SLA_1.A, -dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2017,29,MillionsBarrels,,PET.RCRR06SLA_1.A, -dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-11-30,32.2,Percent,,PET.M_EPC0_SPT_NUS_PER.W, -dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-12-06,33.1,Percent,,PET.M_EPC0_SPT_NUS_PER.W, +place,stat_var,date,value,unit,scaling_factor,eia_series_id,measurementMethod +dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2019,40989000.0,USGallon,,PET.KDLVISSCO1.A, +dcid:geoId/08,dcid:eia/PET.KDLVIS_1.A,2018,37954000.0,USGallon,,PET.KDLVISSCO1.A, +dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2019,3,MillionsBarrels,,PET.RCRR06SLA_1.A, +dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2018,6,MillionsBarrels,,PET.RCRR06SLA_1.A, +dcid:geoId/22,dcid:eia/PET.RCRR06_1.A,2017,29,MillionsBarrels,,PET.RCRR06SLA_1.A, +dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-11-30,32.2,Percent,,PET.M_EPC0_SPT_NUS_PER.W, +dcid:country/USA,dcid:eia/PET.M_EPC0_SPT_PER.W,2020-12-06,33.1,Percent,,PET.M_EPC0_SPT_NUS_PER.W, From e255e888f3d391f84313beda530d377f1de83f9a Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Fri, 31 Jan 2025 09:28:25 +0000 Subject: [PATCH 05/18] fixed test data --- scripts/us_eia/opendata/process/common_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index 042927a6af..11337ff8bc 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -17,14 +17,15 @@ import sys import tempfile import unittest +from absl import logging -# Allows the following module imports to work when running as a script +# Allows the following module imports to work when running as a script. # relative to scripts/ sys.path.append( os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) -from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total +from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total # module_dir_ is the path to where this test is running from. module_dir_ = os.path.dirname(__file__) @@ -54,7 +55,7 @@ def test_process(self): for (dataset, dataset_name, test_fname, extract_fn, schema_fn) in _TEST_CASES: with tempfile.TemporaryDirectory() as tmp_dir: - print('Processing', dataset) + logging.info(f"Processing {dataset}") in_file = os.path.join(module_dir_, 'test_data', f'{test_fname}.txt') @@ -111,4 +112,5 @@ def test_cleanup_name(self): if __name__ == '__main__': + logging.set_verbosity(logging.INFO) unittest.main() From 3ba71c819fe46d437a335701f0c72361d0ab0835 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Fri, 31 Jan 2025 10:47:19 +0000 Subject: [PATCH 06/18] fixed test --- scripts/us_eia/opendata/process/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/us_eia/opendata/process/main.py b/scripts/us_eia/opendata/process/main.py index 82a15c01b7..4aab227f2d 100644 --- a/scripts/us_eia/opendata/process/main.py +++ b/scripts/us_eia/opendata/process/main.py @@ -25,7 +25,7 @@ os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) -from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total +from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total FLAGS = flags.FLAGS flags.DEFINE_string('data_dir', 'tmp_raw_data', 'Raw data dir') From 2360e86d45159ed5b06f45d7989cbf4a176f2b44 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Fri, 31 Jan 2025 11:07:54 +0000 Subject: [PATCH 07/18] fixed test --- scripts/us_eia/opendata/process/common_test.py | 3 ++- scripts/us_eia/opendata/process/main.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index 11337ff8bc..e2f0f040bd 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -25,7 +25,8 @@ os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) -from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total +#from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total +from main import * # module_dir_ is the path to where this test is running from. module_dir_ = os.path.dirname(__file__) diff --git a/scripts/us_eia/opendata/process/main.py b/scripts/us_eia/opendata/process/main.py index 4aab227f2d..82a15c01b7 100644 --- a/scripts/us_eia/opendata/process/main.py +++ b/scripts/us_eia/opendata/process/main.py @@ -25,7 +25,7 @@ os.path.dirname( os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) -from .us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total +from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total FLAGS = flags.FLAGS flags.DEFINE_string('data_dir', 'tmp_raw_data', 'Raw data dir') From 809cc65b75b2224ce6c6ac7e743412a803dbcce5 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Fri, 31 Jan 2025 11:29:01 +0000 Subject: [PATCH 08/18] fixed test --- scripts/us_eia/opendata/process/common_test.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index e2f0f040bd..1690ba5fe2 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -21,15 +21,13 @@ # Allows the following module imports to work when running as a script. # relative to scripts/ -sys.path.append( - os.path.dirname( - os.path.dirname( - os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) + #from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total -from main import * -# module_dir_ is the path to where this test is running from. module_dir_ = os.path.dirname(__file__) +sys.path.insert(0, module_dir_) +from main import * +# module_dir_ is the path to where this test is running from. _TEST_CASES = [ # dataset-code, dataset-name, test-case-filename, From 0a36a2e983d6ab38e79f28f34fe4a4fd1ee95840 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 06:26:34 +0000 Subject: [PATCH 09/18] updated scripts --- scripts/us_eia/opendata/process/common.py | 10 ++++++++-- scripts/us_eia/opendata/process/common_test.py | 3 +++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/scripts/us_eia/opendata/process/common.py b/scripts/us_eia/opendata/process/common.py index c5cf2b8664..2ca667c5ee 100644 --- a/scripts/us_eia/opendata/process/common.py +++ b/scripts/us_eia/opendata/process/common.py @@ -16,8 +16,9 @@ import sys import csv import json -from absl import logging +import logging import re +import inspect from collections import defaultdict from sys import path @@ -237,11 +238,13 @@ def _parse_date(d): m_or_q = d[4:] if m_or_q.startswith('Q'): + #print("withQ",yr + '-' + _QUARTER_MAP[m_or_q]) # Quarterly if m_or_q in _QUARTER_MAP: return yr + '-' + _QUARTER_MAP[m_or_q] else: # Monthly + #print("withOutQ",yr + '-' + m_or_q) return yr + '-' + m_or_q if len(d) == 8: @@ -471,7 +474,7 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf, if not line.startswith('{'): continue data = json.loads(line) - logging.info(f"Loaded data: {data}") + #logging.info(f"Loaded data: {data}") # Preliminary checks series_id = data.get('series_id', None) @@ -486,6 +489,9 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf, if not time_series: counters.add_counter('error_missing_time_series', 1) continue + logging.info( + f"extract_place_statvar_fn {inspect.getmodule(extract_place_statvar_fn)}" + ) # Extract raw place and stat-var from series_id. (raw_place, raw_sv, diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index 1690ba5fe2..bb91d41287 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -17,6 +17,7 @@ import sys import tempfile import unittest +import inspect from absl import logging # Allows the following module imports to work when running as a script. @@ -67,6 +68,8 @@ def test_process(self): act_mcf = os.path.join(tmp_dir, exp_mcf) act_svg_mcf = os.path.join(tmp_dir, exp_svg_mcf) act_tmcf = os.path.join(tmp_dir, exp_tmcf) + logging.info(f"extract_fn {inspect.getmodule(extract_fn)}") + common.process(dataset, dataset_name, in_file, act_csv, act_mcf, act_svg_mcf, act_tmcf, extract_fn, schema_fn) From bbff4cf1787d182d405df48a8c18f391b2381b26 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 06:43:20 +0000 Subject: [PATCH 10/18] fixing test scripts --- scripts/us_eia/opendata/process/common_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index bb91d41287..c12f7fd727 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -20,6 +20,7 @@ import inspect from absl import logging + # Allows the following module imports to work when running as a script. # relative to scripts/ @@ -30,6 +31,7 @@ from main import * # module_dir_ is the path to where this test is running from. + _TEST_CASES = [ # dataset-code, dataset-name, test-case-filename, # extract-fn, schema-fn @@ -68,7 +70,7 @@ def test_process(self): act_mcf = os.path.join(tmp_dir, exp_mcf) act_svg_mcf = os.path.join(tmp_dir, exp_svg_mcf) act_tmcf = os.path.join(tmp_dir, exp_tmcf) - logging.info(f"extract_fn {inspect.getmodule(extract_fn)}") + logging.debug(f"extract_fn {inspect.getmodule(extract_fn)}") common.process(dataset, dataset_name, in_file, act_csv, act_mcf, act_svg_mcf, act_tmcf, extract_fn, schema_fn) @@ -114,5 +116,5 @@ def test_cleanup_name(self): if __name__ == '__main__': - logging.set_verbosity(logging.INFO) + logging.set_verbosity(logging.DEBUG) unittest.main() From 192553bce8f341f630f6d21fe723556ac6243bca Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 06:51:40 +0000 Subject: [PATCH 11/18] fixing test scripts --- scripts/us_eia/opendata/process/common_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index c12f7fd727..3eb0ba22e9 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -20,7 +20,6 @@ import inspect from absl import logging - # Allows the following module imports to work when running as a script. # relative to scripts/ @@ -31,7 +30,6 @@ from main import * # module_dir_ is the path to where this test is running from. - _TEST_CASES = [ # dataset-code, dataset-name, test-case-filename, # extract-fn, schema-fn From 63b3a9b2a9ad18ef14cac881bce3b43bb456bb76 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 07:22:36 +0000 Subject: [PATCH 12/18] added counters --- scripts/us_eia/opendata/process/coal.py | 5 +++-- scripts/us_eia/opendata/process/elec.py | 3 ++- scripts/us_eia/opendata/process/intl.py | 1 + scripts/us_eia/opendata/process/ng.py | 1 + scripts/us_eia/opendata/process/nuclear.py | 5 +++-- scripts/us_eia/opendata/process/pet.py | 3 ++- scripts/us_eia/opendata/process/seds.py | 1 + scripts/us_eia/opendata/process/total.py | 1 + 8 files changed, 14 insertions(+), 6 deletions(-) diff --git a/scripts/us_eia/opendata/process/coal.py b/scripts/us_eia/opendata/process/coal.py index 3c9c2a3165..3a1ce60a1f 100644 --- a/scripts/us_eia/opendata/process/coal.py +++ b/scripts/us_eia/opendata/process/coal.py @@ -13,13 +13,13 @@ # limitations under the License. """EIA Coal Dataset specific functions.""" -from absl import logging +import logging import re from . import common -def extract_place_statvar(series_id): +def extract_place_statvar(series_id, counters): """Given the series_id, extract the raw place and stat-var ID. Args: @@ -33,6 +33,7 @@ def extract_place_statvar(series_id): m = re.match(r"^COAL\.([^._]+_?[^._]+)\.([A-Z]+)-([0-9]+)\.([AQM])$", series_id) if m: + counters.add_counter('info_coal_record_count', 1) measure = m.group(1) place = m.group(2) code = m.group(3) diff --git a/scripts/us_eia/opendata/process/elec.py b/scripts/us_eia/opendata/process/elec.py index 22968c7fe7..b3f309d9b6 100644 --- a/scripts/us_eia/opendata/process/elec.py +++ b/scripts/us_eia/opendata/process/elec.py @@ -13,7 +13,7 @@ # limitations under the License. """EIA Electricity Dataset specific functions.""" -from absl import logging +import logging import re from . import common @@ -40,6 +40,7 @@ def extract_place_statvar(series_id, counters): series_id) if m: + counters.add_counter('info_elec_record_count', 1) measure = m.group(1) fuel_type = m.group(2) place = m.group(3) diff --git a/scripts/us_eia/opendata/process/intl.py b/scripts/us_eia/opendata/process/intl.py index c94244746a..94517b3fb5 100644 --- a/scripts/us_eia/opendata/process/intl.py +++ b/scripts/us_eia/opendata/process/intl.py @@ -16,6 +16,7 @@ def extract_place_statvar(series_id, counters): # INTL.{MEASURE1}-{MEASURE2}-{PLACE}-{MEASURE3}.{PERIOD} m = re.match(r"^(INTL\.[^-]+-[^-]+)-([^-]+)-([^-]+\.[A-Z])$", series_id) if m: + counters.add_counter('info_intl_record_count', 1) sv_part1 = m.group(1) place = m.group(2) sv_part2 = m.group(3) diff --git a/scripts/us_eia/opendata/process/ng.py b/scripts/us_eia/opendata/process/ng.py index 780108e55e..a6ae1e07c3 100644 --- a/scripts/us_eia/opendata/process/ng.py +++ b/scripts/us_eia/opendata/process/ng.py @@ -61,6 +61,7 @@ def extract_place_statvar(series_id, counters): # Pattern #1: NG.N{MEASURE1}{PLACE}{MEASURE2}.{PERIOD} m = re.match(r"^(NG\.N[^_]+)([A-Z][A-Z])([0-9]\.[A-Z])$", series_id) if m: + counters.add_counter('info_ng_record_count', 1) sv_part1 = m.group(1) sv_part2 = m.group(3) sv_id = f'{sv_part1}_{sv_part2}' diff --git a/scripts/us_eia/opendata/process/nuclear.py b/scripts/us_eia/opendata/process/nuclear.py index 2e70d6e6e8..b03da32d1a 100644 --- a/scripts/us_eia/opendata/process/nuclear.py +++ b/scripts/us_eia/opendata/process/nuclear.py @@ -13,13 +13,13 @@ # limitations under the License. """EIA Nuclear Status Dataset specific functions.""" -from absl import logging +import logging import re from . import common -def extract_place_statvar(series_id): +def extract_place_statvar(series_id, counters): """Given the series_id, extract the raw place and stat-var ID. Args: @@ -30,6 +30,7 @@ def extract_place_statvar(series_id): """ m = re.match(r"^NUC_STATUS\.([^.]+)\.([^.]+)\.(D)$", series_id) if m: + counters.add_counter('info_nuclear_record_count', 1) measure = m.group(1) place = m.group(2) if not place == 'US': diff --git a/scripts/us_eia/opendata/process/pet.py b/scripts/us_eia/opendata/process/pet.py index 818ba46a7b..361cf9dd6b 100644 --- a/scripts/us_eia/opendata/process/pet.py +++ b/scripts/us_eia/opendata/process/pet.py @@ -27,7 +27,7 @@ def _parse_with_place_prefix(m): return (place, sv_id, in_us) -def extract_place_statvar(series_id): +def extract_place_statvar(series_id, counters): """Given the series_id, extract the raw place and stat-var ID. Args: @@ -40,6 +40,7 @@ def extract_place_statvar(series_id): # Pattern #1: PET.K{MEASURE1}[SN]{PLACE}{MEASURE2}.{PERIOD} m = re.match(r"^(PET\.K[^_]+)([NS][A-Z][A-Z])([0-9]\.[A-Z])$", series_id) if m: + counters.add_counter('info_pet_record_count', 1) return _parse_with_place_prefix(m) # Pattern #2: PET.{MEASURE1}[SN]{PLACE}_{MEASURE2}.{PERIOD} diff --git a/scripts/us_eia/opendata/process/seds.py b/scripts/us_eia/opendata/process/seds.py index 6f55ab8b1b..c12c7f2b92 100644 --- a/scripts/us_eia/opendata/process/seds.py +++ b/scripts/us_eia/opendata/process/seds.py @@ -20,6 +20,7 @@ def extract_place_statvar(series_id, counters): # (https://user-images.githubusercontent.com/4375037/117168919-74618f00-ad7d-11eb-8306-bb4db3f52e03.png) m = re.match(r"^(SEDS\.[^.]+)\.([A-Z][A-Z])\.([A-Z])$", series_id) if m: + counters.add_counter('info_seds_record_count', 1) sv_part1 = m.group(1) place = m.group(2) sv_part2 = m.group(3) diff --git a/scripts/us_eia/opendata/process/total.py b/scripts/us_eia/opendata/process/total.py index ce255f258b..fe148f6bbf 100644 --- a/scripts/us_eia/opendata/process/total.py +++ b/scripts/us_eia/opendata/process/total.py @@ -22,6 +22,7 @@ def extract_place_statvar(series_id, counters): # them for now. m = re.match(r"^(TOTAL\..*)US\.([A-Z])$", series_id) if m: + counters.add_counter('info_total_record_count', 1) sv_part1 = m.group(1) sv_part2 = m.group(2) sv_id = f'{sv_part1}.{sv_part2}' From 147fd4b8efcabe3f72e8f43602933c00e45b32a0 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 07:41:54 +0000 Subject: [PATCH 13/18] updated eia 8 imports scripts --- scripts/us_eia/opendata/process/common_test.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/us_eia/opendata/process/common_test.py b/scripts/us_eia/opendata/process/common_test.py index 3eb0ba22e9..919d399a7e 100644 --- a/scripts/us_eia/opendata/process/common_test.py +++ b/scripts/us_eia/opendata/process/common_test.py @@ -17,14 +17,10 @@ import sys import tempfile import unittest -import inspect from absl import logging # Allows the following module imports to work when running as a script. # relative to scripts/ - -#from us_eia.opendata.process import coal, common, elec, intl, ng, nuclear, pet, seds, total - module_dir_ = os.path.dirname(__file__) sys.path.insert(0, module_dir_) from main import * @@ -68,8 +64,6 @@ def test_process(self): act_mcf = os.path.join(tmp_dir, exp_mcf) act_svg_mcf = os.path.join(tmp_dir, exp_svg_mcf) act_tmcf = os.path.join(tmp_dir, exp_tmcf) - logging.debug(f"extract_fn {inspect.getmodule(extract_fn)}") - common.process(dataset, dataset_name, in_file, act_csv, act_mcf, act_svg_mcf, act_tmcf, extract_fn, schema_fn) From d7fb1910240c2eb936a8ee52e2c56301e9257734 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 07:54:57 +0000 Subject: [PATCH 14/18] fixed scripts and lint test --- scripts/us_eia/opendata/process/common.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/us_eia/opendata/process/common.py b/scripts/us_eia/opendata/process/common.py index 2ca667c5ee..0b4bf49a1a 100644 --- a/scripts/us_eia/opendata/process/common.py +++ b/scripts/us_eia/opendata/process/common.py @@ -18,7 +18,6 @@ import json import logging import re -import inspect from collections import defaultdict from sys import path @@ -489,9 +488,6 @@ def process(dataset, dataset_name, in_json, out_csv, out_sv_mcf, out_svg_mcf, if not time_series: counters.add_counter('error_missing_time_series', 1) continue - logging.info( - f"extract_place_statvar_fn {inspect.getmodule(extract_place_statvar_fn)}" - ) # Extract raw place and stat-var from series_id. (raw_place, raw_sv, From 6070463f5365c356f3d721576c5ad09e168629bd Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 09:42:23 +0000 Subject: [PATCH 15/18] updated manifest.json file --- scripts/us_eia/opendata/manifest.json | 40 +++++++++++++++++++++------ 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json index 1144ac4090..5c30c7ecf7 100644 --- a/scripts/us_eia/opendata/manifest.json +++ b/scripts/us_eia/opendata/manifest.json @@ -13,7 +13,10 @@ "import_inputs": [ { "template_mcf": "tmp_raw_data/COAL/COAL.tmcf", - "cleaned_csv": "tmp_raw_data/COAL/COAL.csv" + "cleaned_csv": "tmp_raw_data/COAL/COAL.csv", + "source_files": [ + "tmp_raw_data/COAL/COAL.txt" + ] } ], "cron_schedule": "0 6 5,20 * *" @@ -34,7 +37,10 @@ "cleaned_csv": "tmp_raw_data/ELEC/ELEC.csv" } ], - "cron_schedule": "0 7 5,20 * *" + "cron_schedule": "0 7 5,20 * *", + "source_files": [ + "tmp_raw_data/ELEC/ELEC.txt" + ] }, { "import_name": "EIA_NaturalGas", @@ -52,7 +58,10 @@ "cleaned_csv": "tmp_raw_data/NG/NG.csv" } ], - "cron_schedule": "0 8 5,20 * *" + "cron_schedule": "0 8 5,20 * *", + "source_files": [ + "tmp_raw_data/NG/NG.txt" + ] }, { "import_name": "EIA_NuclearOutages", @@ -70,7 +79,10 @@ "cleaned_csv": "tmp_raw_data/NUC_STATUS/NUC_STATUS.csv" } ], - "cron_schedule": "0 9 5,20 * *" + "cron_schedule": "0 9 5,20 * *", + "source_files": [ + "tmp_raw_data/NUC_STATUS/NUC_STATUS.txt" + ] }, { "import_name": "EIA_Petroleum", @@ -88,7 +100,10 @@ "cleaned_csv": "tmp_raw_data/PET/PET.csv" } ], - "cron_schedule": "0 10 5,20 * *" + "cron_schedule": "0 10 5,20 * *", + "source_files": [ + "tmp_raw_data/PET/PET.txt" + ] }, { "import_name": "EIA_International", @@ -106,7 +121,10 @@ "cleaned_csv": "tmp_raw_data/INTL/INTL.csv" } ], - "cron_schedule": "0 11 5,20 * * *" + "cron_schedule": "0 11 5,20 * * *", + "source_files": [ + "tmp_raw_data/INTL/INTL.txt" + ] }, { "import_name": "EIA_SEDS", @@ -124,7 +142,10 @@ "cleaned_csv": "tmp_raw_data/SEDS/SEDS.csv" } ], - "cron_schedule": "0 12 5,20 * *" + "cron_schedule": "0 12 5,20 * *", + "source_files": [ + "tmp_raw_data/SEDS/SEDS.txt" + ] }, { "import_name": "EIA_TotalEnergy", @@ -142,7 +163,10 @@ "cleaned_csv": "tmp_raw_data/TOTAL/TOTAL.csv" } ], - "cron_schedule": "0 13 5,20 * *" + "cron_schedule": "0 13 5,20 * *", + "source_files": [ + "tmp_raw_data/TOTAL/TOTAL.txt" + ] } ] } \ No newline at end of file From 67b235aa950009ed9c5b79d16b2fb3b37c319e76 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 10:41:30 +0000 Subject: [PATCH 16/18] Resolved PR comments --- scripts/us_eia/opendata/manifest.json | 70 +++++++++++++-------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json index 5c30c7ecf7..ca33146656 100644 --- a/scripts/us_eia/opendata/manifest.json +++ b/scripts/us_eia/opendata/manifest.json @@ -2,24 +2,22 @@ "import_specifications": [ { "import_name": "EIA_Coal", - "curator_emails": [ - "support@datacommons.org" - ], + "curator_emails": [], "provenance_url": "https://www.eia.gov/opendata/qb.php?category=717234", "provenance_description": "Coal dataset has country, state-level level information .", "scripts": [ "process.py --dataset=COAL" ], + "source_files": [ + "tmp_raw_data/COAL/COAL.txt" + ], "import_inputs": [ { "template_mcf": "tmp_raw_data/COAL/COAL.tmcf", - "cleaned_csv": "tmp_raw_data/COAL/COAL.csv", - "source_files": [ - "tmp_raw_data/COAL/COAL.txt" - ] + "cleaned_csv": "tmp_raw_data/COAL/COAL.csv" } ], - "cron_schedule": "0 6 5,20 * *" + "cron_schedule": "0 6 1 2 *" }, { "import_name": "EIA_Electricity", @@ -31,16 +29,16 @@ "scripts": [ "process.py --dataset=ELEC" ], + "source_files": [ + "tmp_raw_data/ELEC/ELEC.txt" + ], "import_inputs": [ { "template_mcf": "tmp_raw_data/ELEC/ELEC.tmcf", "cleaned_csv": "tmp_raw_data/ELEC/ELEC.csv" } ], - "cron_schedule": "0 7 5,20 * *", - "source_files": [ - "tmp_raw_data/ELEC/ELEC.txt" - ] + "cron_schedule": "0 7 5,20 * *" }, { "import_name": "EIA_NaturalGas", @@ -52,16 +50,16 @@ "scripts": [ "process.py --dataset=NG" ], + "source_files": [ + "tmp_raw_data/NG/NG.txt" + ], "import_inputs": [ { "template_mcf": "tmp_raw_data/NG/NG.tmcf", "cleaned_csv": "tmp_raw_data/NG/NG.csv" } ], - "cron_schedule": "0 8 5,20 * *", - "source_files": [ - "tmp_raw_data/NG/NG.txt" - ] + "cron_schedule": "0 8 5,20 * *" }, { "import_name": "EIA_NuclearOutages", @@ -73,16 +71,16 @@ "scripts": [ "process.py --dataset=NUC_STATUS" ], + "source_files": [ + "tmp_raw_data/NUC_STATUS/NUC_STATUS.txt" + ], "import_inputs": [ { "template_mcf": "tmp_raw_data/NUC_STATUS/NUC_STATUS.tmcf", "cleaned_csv": "tmp_raw_data/NUC_STATUS/NUC_STATUS.csv" } ], - "cron_schedule": "0 9 5,20 * *", - "source_files": [ - "tmp_raw_data/NUC_STATUS/NUC_STATUS.txt" - ] + "cron_schedule": "0 9 5,20 * *" }, { "import_name": "EIA_Petroleum", @@ -94,16 +92,16 @@ "scripts": [ "process.py --dataset=PET" ], + "source_files": [ + "tmp_raw_data/PET/PET.txt" + ], "import_inputs": [ { "template_mcf": "tmp_raw_data/PET/PET.tmcf", "cleaned_csv": "tmp_raw_data/PET/PET.csv" } ], - "cron_schedule": "0 10 5,20 * *", - "source_files": [ - "tmp_raw_data/PET/PET.txt" - ] + "cron_schedule": "0 10 5,20 * *" }, { "import_name": "EIA_International", @@ -115,16 +113,16 @@ "scripts": [ "process.py --dataset=INTL" ], + "source_files": [ + "tmp_raw_data/INTL/INTL.txt" + ], "import_inputs": [ { "template_mcf": "tmp_raw_data/INTL/INTL.tmcf", "cleaned_csv": "tmp_raw_data/INTL/INTL.csv" } ], - "cron_schedule": "0 11 5,20 * * *", - "source_files": [ - "tmp_raw_data/INTL/INTL.txt" - ] + "cron_schedule": "0 11 5,20 * * *" }, { "import_name": "EIA_SEDS", @@ -136,16 +134,16 @@ "scripts": [ "process.py --dataset=SEDS" ], + "source_files": [ + "tmp_raw_data/SEDS/SEDS.txt" + ], "import_inputs": [ { "template_mcf": "tmp_raw_data/SEDS/SEDS.tmcf", "cleaned_csv": "tmp_raw_data/SEDS/SEDS.csv" } ], - "cron_schedule": "0 12 5,20 * *", - "source_files": [ - "tmp_raw_data/SEDS/SEDS.txt" - ] + "cron_schedule": "0 12 5,20 * *" }, { "import_name": "EIA_TotalEnergy", @@ -157,16 +155,16 @@ "scripts": [ "process.py --dataset=TOTAL" ], + "source_files": [ + "tmp_raw_data/TOTAL/TOTAL.txt" + ], "import_inputs": [ { "template_mcf": "tmp_raw_data/TOTAL/TOTAL.tmcf", "cleaned_csv": "tmp_raw_data/TOTAL/TOTAL.csv" } ], - "cron_schedule": "0 13 5,20 * *", - "source_files": [ - "tmp_raw_data/TOTAL/TOTAL.txt" - ] + "cron_schedule": "0 13 5,20 * *" } ] } \ No newline at end of file From 74e54be81318eec7763bb0a651a65b31e2b816bb Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 11:16:49 +0000 Subject: [PATCH 17/18] Resolved PR comments --- scripts/us_eia/opendata/manifest.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json index ca33146656..5ab552e647 100644 --- a/scripts/us_eia/opendata/manifest.json +++ b/scripts/us_eia/opendata/manifest.json @@ -2,7 +2,9 @@ "import_specifications": [ { "import_name": "EIA_Coal", - "curator_emails": [], + "curator_emails": [ + "support@datacommons.org" + ], "provenance_url": "https://www.eia.gov/opendata/qb.php?category=717234", "provenance_description": "Coal dataset has country, state-level level information .", "scripts": [ From 4d097289656d58ae6610389145ca31e9e52bf986 Mon Sep 17 00:00:00 2001 From: Harsha Vardhan Chandaluri Date: Mon, 3 Feb 2025 12:10:17 +0000 Subject: [PATCH 18/18] Resolved PR comments --- scripts/us_eia/opendata/manifest.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/us_eia/opendata/manifest.json b/scripts/us_eia/opendata/manifest.json index 5ab552e647..ff50f690d7 100644 --- a/scripts/us_eia/opendata/manifest.json +++ b/scripts/us_eia/opendata/manifest.json @@ -19,7 +19,7 @@ "cleaned_csv": "tmp_raw_data/COAL/COAL.csv" } ], - "cron_schedule": "0 6 1 2 *" + "cron_schedule": "0 14 5,20 * *" }, { "import_name": "EIA_Electricity", @@ -169,4 +169,4 @@ "cron_schedule": "0 13 5,20 * *" } ] -} \ No newline at end of file +}