Skip to content

Commit

Permalink
Add get_srml iotools function; deprecate read_srml_month_from_solardat (
Browse files Browse the repository at this point in the history
#1779)

* Initial commit

* Add exception for non-existing file names

* Update error handling

* Deprecate read_srml_month_from_solardat

* Add fail_on_pvlib_version to tests

* Apply suggestions from code review

Co-authored-by: Kevin Anderson <[email protected]>

* Address code review

* Conform to code review

* Add warning message

* Use assert_frame_equal

* Update pvlib/tests/iotools/test_srml.py

Co-authored-by: Kevin Anderson <[email protected]>

---------

Co-authored-by: Kevin Anderson <[email protected]>
  • Loading branch information
AdamRJensen and kandersolar authored Jun 29, 2023
1 parent fa9dc9b commit 81e5593
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 12 deletions.
1 change: 1 addition & 0 deletions docs/sphinx/source/reference/iotools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ of sources and file formats relevant to solar energy modeling.
iotools.parse_epw
iotools.read_srml
iotools.read_srml_month_from_solardat
iotools.get_srml
iotools.read_surfrad
iotools.read_midc
iotools.read_midc_raw_data_from_nrel
Expand Down
4 changes: 4 additions & 0 deletions docs/sphinx/source/whatsnew/v0.10.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ Enhancements
the same Python type as the `effective_irradiance` and `temp_cell` parameters. (:issue:`1626`, :pull:`1700`)
* Added `map_variables` parameter to :py:func:`pvlib.iotools.read_srml`
and :py:func:`pvlib.iotools.read_srml_month_from_solardat` (:pull:`1773`)
* Added :func:`pvlib.iotools.get_srml` that is similar to
:func:`pvlib.iotools.read_srml_month_from_solardat` but is able to fetch multiple months
of data using the `start` and `end` parameters.
(:pull:`1779`)
* Allow passing keyword arguments to :py:func:`scipy:scipy.optimize.brentq` and
:py:func:`scipy:scipy.optimize.newton` solvers in
:py:func:`~pvlib.singlediode.bishop88_mpp`,
Expand Down
4 changes: 3 additions & 1 deletion docs/sphinx/source/whatsnew/v0.9.6.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@ Deprecations
(data period 2003-2012). Instead, ECMWF recommends to use CAMS global
reanalysis (EAC4) from the Atmosphere Data Store (ADS). See also :py:func:`pvlib.iotools.get_cams`.
(:issue:`1691`, :pull:`1654`)

* The ``recolumn`` parameter in :py:func:`pvlib.iotools.read_tmy3`, which maps
TMY3 column names to nonstandard alternatives, is now deprecated.
We encourage using ``map_variables`` (which produces standard pvlib names) instead.
(:issue:`1517`, :pull:`1623`)
* :py:func:`pvlib.iotools.read_srml_month_from_solardat` is deprecated and replaced by
:py:func:`pvlib.iotools.get_srml`. (:pull:`1779`)


Enhancements
~~~~~~~~~~~~
Expand Down
1 change: 1 addition & 0 deletions pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pvlib.iotools.epw import read_epw, parse_epw # noqa: F401
from pvlib.iotools.srml import read_srml # noqa: F401
from pvlib.iotools.srml import read_srml_month_from_solardat # noqa: F401
from pvlib.iotools.srml import get_srml # noqa: F401
from pvlib.iotools.surfrad import read_surfrad # noqa: F401
from pvlib.iotools.midc import read_midc # noqa: F401
from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401
Expand Down
111 changes: 104 additions & 7 deletions pvlib/iotools/srml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
"""
import numpy as np
import pandas as pd
import urllib
import warnings

from pvlib._deprecation import deprecated

# VARIABLE_MAP is a dictionary mapping SRML data element numbers to their
# pvlib names. For most variables, only the first three digits are used,
Expand All @@ -26,8 +29,9 @@

def read_srml(filename, map_variables=True):
"""
Read University of Oregon SRML 1min .tsv file into pandas dataframe. The
SRML is described in [1]_.
Read University of Oregon SRML 1min .tsv file into pandas dataframe.
The SRML is described in [1]_.
Parameters
----------
Expand All @@ -51,14 +55,14 @@ def read_srml(filename, map_variables=True):
the time of the row until the time of the next row. This is consistent
with pandas' default labeling behavior.
See SRML's `Archival Files`_ page for more information.
.. _Archival Files: http://solardat.uoregon.edu/ArchivalFiles.html
See [2]_ for more information concerning the file format.
References
----------
.. [1] University of Oregon Solar Radiation Monitoring Laboratory
`http://solardat.uoregon.edu/ <http://solardat.uoregon.edu/>`_
.. [2] `Archival (short interval) data files
<http://solardat.uoregon.edu/ArchivalFiles.html>`_
"""
tsv_data = pd.read_csv(filename, delimiter='\t')
data = _format_index(tsv_data)
Expand Down Expand Up @@ -168,10 +172,12 @@ def _format_index(df):
return df


@deprecated('0.10.0', alternative='pvlib.iotools.get_srml', removal='0.11.0')
def read_srml_month_from_solardat(station, year, month, filetype='PO',
map_variables=True):
"""Request a month of SRML data from solardat and read it into
a Dataframe. The SRML is described in [1]_.
"""Request a month of SRML data and read it into a Dataframe.
The SRML is described in [1]_.
Parameters
----------
Expand Down Expand Up @@ -222,3 +228,94 @@ def read_srml_month_from_solardat(station, year, month, filetype='PO',
url = "http://solardat.uoregon.edu/download/Archive/"
data = read_srml(url + file_name, map_variables=map_variables)
return data


def get_srml(station, start, end, filetype='PO', map_variables=True,
url="http://solardat.uoregon.edu/download/Archive/"):
"""Request data from UoO SRML and read it into a Dataframe.
The University of Oregon Solar Radiation Monitoring Laboratory (SRML) is
described in [1]_. A list of stations can be found in [2]_.
Data is returned for the entire months between and including start and end.
Parameters
----------
station : str
Two letter station abbreviation.
start : datetime like
First day of the requested period
end : datetime like
Last day of the requested period
filetype : string, default: 'PO'
SRML file type to gather. See notes for explanation.
map_variables : bool, default: True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
url : str, default: 'http://solardat.uoregon.edu/download/Archive/'
API endpoint URL
Returns
-------
data : pd.DataFrame
Dataframe with data from SRML.
meta : dict
Metadata.
Notes
-----
File types designate the time interval of a file and if it contains
raw or processed data. For instance, `RO` designates raw, one minute
data and `PO` designates processed one minute data. The availability
of file types varies between sites. Below is a table of file types
and their time intervals. See [1] for site information.
============= ============ ==================
time interval raw filetype processed filetype
============= ============ ==================
1 minute RO PO
5 minute RF PF
15 minute RQ PQ
hourly RH PH
============= ============ ==================
Warning
-------
SRML data has nighttime data prefilled with 0s through the end of the
current month (i.e., values are provided for data in the future).
References
----------
.. [1] University of Oregon Solar Radiation Measurement Laboratory
`http://solardat.uoregon.edu/ <http://solardat.uoregon.edu/>`_
.. [2] Station ID codes - Solar Radiation Measurement Laboratory
`http://solardat.uoregon.edu/StationIDCodes.html
<http://solardat.uoregon.edu/StationIDCodes.html>`_
"""
# Use pd.to_datetime so that strings (e.g. '2021-01-01') are accepted
start = pd.to_datetime(start)
end = pd.to_datetime(end)

# Generate list of months
months = pd.date_range(
start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M')
months_str = months.strftime('%y%m')

# Generate list of filenames
filenames = [f"{station}{filetype}{m}.txt" for m in months_str]

dfs = [] # Initialize list of monthly dataframes
for f in filenames:
try:
dfi = read_srml(url + f, map_variables=map_variables)
dfs.append(dfi)
except urllib.error.HTTPError:
warnings.warn(f"The following file was not found: {f}")

data = pd.concat(dfs, axis='rows')

meta = {'filetype': filetype,
'station': station,
'filenames': filenames}

return data, meta
63 changes: 59 additions & 4 deletions pvlib/tests/iotools/test_srml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import pytest

from pvlib.iotools import srml
from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY
from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal,
assert_frame_equal, fail_on_pvlib_version)
from pvlib._deprecation import pvlibDeprecationWarning

srml_testfile = DATA_DIR / 'SRML-day-EUPO1801.txt'

Expand Down Expand Up @@ -74,19 +76,33 @@ def test__map_columns(column, expected):
assert srml._map_columns(column) == expected


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml():
url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt'
file_data = srml.read_srml(url)
requested, _ = srml.get_srml(station='EU', start='2018-01-01',
end='2018-01-31')
assert_frame_equal(file_data, requested)


@fail_on_pvlib_version('0.11')
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_read_srml_month_from_solardat():
url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt'
file_data = srml.read_srml(url)
requested = srml.read_srml_month_from_solardat('EU', 2018, 1)
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
requested = srml.read_srml_month_from_solardat('EU', 2018, 1)
assert file_data.equals(requested)


@fail_on_pvlib_version('0.11')
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_15_minute_dt_index():
data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ')
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ')
start = pd.Timestamp('20190401 00:00')
start = start.tz_localize('Etc/GMT+8')
end = pd.Timestamp('20190430 23:45')
Expand All @@ -96,14 +112,53 @@ def test_15_minute_dt_index():
assert (data.index[3::4].minute == 45).all()


@fail_on_pvlib_version('0.11')
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_hourly_dt_index():
data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH')
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH')
start = pd.Timestamp('19860401 00:00')
start = start.tz_localize('Etc/GMT+8')
end = pd.Timestamp('19860430 23:00')
end = end.tz_localize('Etc/GMT+8')
assert data.index[0] == start
assert data.index[-1] == end
assert (data.index.minute == 0).all()


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml_hourly():
data, meta = data, meta = srml.get_srml(station='CD', start='1986-04-01',
end='1986-05-31', filetype='PH')
expected_index = pd.date_range(start='1986-04-01', end='1986-05-31 23:59',
freq='1h', tz='Etc/GMT+8')
assert_index_equal(data.index, expected_index)


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml_minute():
data_read = srml.read_srml(srml_testfile)
data_get, meta = srml.get_srml(station='EU', start='2018-01-01',
end='2018-01-31', filetype='PO')
expected_index = pd.date_range(start='2018-01-01', end='2018-01-31 23:59',
freq='1min', tz='Etc/GMT+8')
assert_index_equal(data_get.index, expected_index)
assert all([c in data_get.columns for c in data_read.columns])
# Check that all indices in example file are present in remote file
assert data_read.index.isin(data_get.index).all()
assert meta['station'] == 'EU'
assert meta['filetype'] == 'PO'
assert meta['filenames'] == ['EUPO1801.txt']


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml_nonexisting_month_warning():
with pytest.warns(UserWarning, match='file was not found: EUPO0912.txt'):
# Request data for a period where not all files exist
# Eugene (EU) station started reporting 1-minute data in January 2010
data, meta = data, meta = srml.get_srml(
station='EU', start='2009-12-01', end='2010-01-31', filetype='PO')

0 comments on commit 81e5593

Please sign in to comment.