diff --git a/.buildkite/examples_build.yml b/.buildkite/examples_build.yml index b5be821e..9101af75 100644 --- a/.buildkite/examples_build.yml +++ b/.buildkite/examples_build.yml @@ -1,7 +1,7 @@ agents: queue: new-central slurm_mem: 8G - modules: climacommon/2024_05_27 + modules: climacommon/2024_10_10 slurm_time: 48:00:00 env: diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 5985a14d..dea3c20e 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -34,9 +34,9 @@ steps: - wait - label: "CPU JRA55 tests" - key: "cpu_jra55_tests" + key: "cpu_JRA55_tests" env: - TEST_GROUP: "jra55" + TEST_GROUP: "JRA55" commands: - "julia --project -e 'using Pkg; Pkg.test()'" agents: @@ -78,9 +78,9 @@ steps: slurm_ntasks: 1 - label: "GPU JRA55 tests" - key: "gpu_jra55_tests" + key: "gpu_JRA55_tests" env: - TEST_GROUP: "jra55" + TEST_GROUP: "JRA55" GPU_TEST: "true" commands: - "julia --project -e 'using Pkg; Pkg.test()'" diff --git a/examples/generate_atmos_dataset.jl b/examples/generate_atmos_dataset.jl index 8ba1d323..3568028a 100644 --- a/examples/generate_atmos_dataset.jl +++ b/examples/generate_atmos_dataset.jl @@ -4,9 +4,9 @@ using JLD2 time_indices = 1:1 -qt = ClimaOcean.JRA55.JRA55_field_time_series(:specific_humidity; time_indices) -Tt = ClimaOcean.JRA55.JRA55_field_time_series(:temperature; time_indices) -pt = ClimaOcean.JRA55.JRA55_field_time_series(:sea_level_pressure; time_indices) +qt = ClimaOcean.JRA55.JRA55FieldTimeSeries(:specific_humidity; time_indices) +Tt = ClimaOcean.JRA55.JRA55FieldTimeSeries(:temperature; time_indices) +pt = ClimaOcean.JRA55.JRA55FieldTimeSeries(:sea_level_pressure; time_indices) Nx, Ny, Nz = size(qt[1]) diff --git a/examples/inspect_JRA55_data.jl b/examples/inspect_JRA55_data.jl index 708bc1ca..058a1576 100644 --- a/examples/inspect_JRA55_data.jl +++ b/examples/inspect_JRA55_data.jl @@ -4,9 +4,8 @@ using Oceananigans using Oceananigans.Units using Printf -time_indices = Colon() -Qswt = ClimaOcean.JRA55.JRA55_field_time_series(:downwelling_shortwave_radiation; time_indices) -rht = ClimaOcean.JRA55.JRA55_field_time_series(:relative_humidity; time_indices) +Qswt = ClimaOcean.JRA55.JRA55FieldTimeSeries(:downwelling_shortwave_radiation) +rht = ClimaOcean.JRA55.JRA55FieldTimeSeries(:relative_humidity) function lonlat2xyz(lons::AbstractVector, lats::AbstractVector) x = [cosd(lat) * cosd(lon) for lon in lons, lat in lats] diff --git a/src/ClimaOcean.jl b/src/ClimaOcean.jl index 73a0ef31..e890d985 100644 --- a/src/ClimaOcean.jl +++ b/src/ClimaOcean.jl @@ -24,7 +24,7 @@ export exponential_z_faces, PowerLawStretching, LinearStretching, exponential_z_faces, - JRA55_field_time_series, + JRA55FieldTimeSeries, ECCO_field, ECCOMetadata, ECCORestoring, diff --git a/src/DataWrangling/DataWrangling.jl b/src/DataWrangling/DataWrangling.jl index 792c07b3..faaede15 100644 --- a/src/DataWrangling/DataWrangling.jl +++ b/src/DataWrangling/DataWrangling.jl @@ -1,5 +1,7 @@ module DataWrangling +export Metadata + using Oceananigans using Downloads using Printf @@ -13,6 +15,8 @@ using Oceananigans: pretty_filesize, location using Oceananigans.Utils: launch! using KernelAbstractions: @kernel, @index +using ClimaOcean.DistributedUtils + ##### ##### Downloading utilities ##### @@ -119,11 +123,12 @@ function save_field_time_series!(fts; path, name, overwrite_existing=false) return nothing end +include("metadata.jl") include("inpaint_mask.jl") -include("JRA55.jl") +include("JRA55/JRA55.jl") include("ECCO/ECCO.jl") -using .JRA55 using .ECCO +using .JRA55 end # module diff --git a/src/DataWrangling/ECCO/ECCO.jl b/src/DataWrangling/ECCO/ECCO.jl index 126f3b7a..fdc7cd47 100644 --- a/src/DataWrangling/ECCO/ECCO.jl +++ b/src/DataWrangling/ECCO/ECCO.jl @@ -5,7 +5,7 @@ export ECCO2Monthly, ECCO4Monthly, ECCO2Daily export ECCOFieldTimeSeries, ECCORestoring, LinearlyTaperedPolarMask using ClimaOcean -using ClimaOcean.DistributedUtils: @root +using ClimaOcean.DistributedUtils using ClimaOcean.DataWrangling using ClimaOcean.DataWrangling: inpaint_mask!, NearestNeighborInpainting, download_progress using ClimaOcean.InitialConditions: three_dimensional_regrid!, interpolate! diff --git a/src/DataWrangling/ECCO/ECCO_mask.jl b/src/DataWrangling/ECCO/ECCO_mask.jl index b8ad26cc..9c37de15 100644 --- a/src/DataWrangling/ECCO/ECCO_mask.jl +++ b/src/DataWrangling/ECCO/ECCO_mask.jl @@ -31,7 +31,7 @@ function ECCO_mask(metadata, architecture = CPU(); end # Default -ECCO_mask(arch::AbstractArchitecture=CPU()) = ECCO_mask(ECCOMetadata(:temperature), arch) +ECCO_mask(arch::AbstractArchitecture=CPU()) = ECCO_mask(Metadata(:temperature, version=ECCO4Monthly()), arch) @kernel function _set_ECCO2_mask!(mask, Tᵢ, minimum_value, maximum_value) i, j, k = @index(Global, NTuple) @@ -62,7 +62,7 @@ function ECCO_immersed_grid(metadata, architecture = CPU()) end # Default -ECCO_immersed_grid(arch::AbstractArchitecture=CPU()) = ECCO_immersed_grid(ECCOMetadata(:temperature), arch) +ECCO_immersed_grid(arch::AbstractArchitecture=CPU()) = ECCO_immersed_grid(Metadata(:temperature, version=ECCO4Monthly()), arch) @kernel function _set_height_from_mask!(bottom, grid, mask) i, j = @index(Global, NTuple) diff --git a/src/DataWrangling/ECCO/ECCO_metadata.jl b/src/DataWrangling/ECCO/ECCO_metadata.jl index ce54ea4c..eb43d1f3 100644 --- a/src/DataWrangling/ECCO/ECCO_metadata.jl +++ b/src/DataWrangling/ECCO/ECCO_metadata.jl @@ -1,42 +1,22 @@ using CFTime using Dates using ClimaOcean.DataWrangling -using ClimaOcean.DataWrangling: netrc_downloader +using ClimaOcean.DataWrangling: netrc_downloader, metadata_path import Dates: year, month, day - -using Base: @propagate_inbounds using Downloads import Oceananigans.Fields: set!, location import Base +import ClimaOcean.DataWrangling: all_dates, metadata_filename, default_download_folder struct ECCO2Monthly end struct ECCO2Daily end struct ECCO4Monthly end -""" - struct ECCOMetadata{D, V} +const ECCOMetadata{D, V} = Metadata{D, V} where {D, V<:Union{<:ECCO2Monthly, <:ECCO2Daily, <:ECCO4Monthly}} -Metadata information for an ECCO dataset: -- `name`: The name of the dataset. -- `dates`: The dates of the dataset, in an `AbstractCFDateTime` format. -- `version`: The version of the dataset, could be `ECCO2Monthly`, `ECCO2Daily`, or `ECCO4Monthly`. -- `dir`: The directory where the dataset is stored. -""" -struct ECCOMetadata{D, V} - name :: Symbol - dates :: D - version :: V - dir :: String -end - -Base.show(io::IO, metadata::ECCOMetadata) = - print(io, "ECCOMetadata:", '\n', - "├── name: $(metadata.name)", '\n', - "├── dates: $(metadata.dates)", '\n', - "├── version: $(metadata.version)", '\n', - "└── dir: $(metadata.dir)") +default_download_folder(::Union{<:ECCO2Monthly, <:ECCO2Daily, <:ECCO4Monthly}) = download_ECCO_cache Base.summary(md::ECCOMetadata{<:Any, <:ECCO2Daily}) = "ECCO2Daily $(md.name) metadata ($(first(md.dates))--$(last(md.dates)))" Base.summary(md::ECCOMetadata{<:Any, <:ECCO2Monthly}) = "ECCO2Monthly $(md.name) metadata ($(first(md.dates))--$(last(md.dates)))" @@ -46,78 +26,18 @@ Base.summary(md::ECCOMetadata{<:AbstractCFDateTime, <:ECCO2Daily}) = "ECCO2Dai Base.summary(md::ECCOMetadata{<:AbstractCFDateTime, <:ECCO2Monthly}) = "ECCO2Monthly $(md.name) metadata at $(md.dates)" Base.summary(md::ECCOMetadata{<:AbstractCFDateTime, <:ECCO4Monthly}) = "ECCO4Monthly $(md.name) metadata at $(md.dates)" -""" - ECCOMetadata(name::Symbol; - dates = DateTimeProlepticGregorian(1993, 1, 1), - version = ECCO4Monthly(), - dir = download_ECCO_cache) - -Construct an `ECCOMetadata` object with the specified parameters. - -Arguments -========= -- `name::Symbol`: The name of the metadata. - -Keyword Arguments -================= -- `dates`: The date(s) of the metadata. Note this can either be a single date, - representing a snapshot, or a range of dates, representing a time-series. - Default: `DateTimeProlepticGregorian(1993, 1, 1)`. - -- `version`: The data version. Supported versions are `ECCO2Monthly()`, `ECCO2Daily()`, - or `ECCO4Monthly()`. - -- `dir`: The directory of the data file. Default: `download_ECCO_cache`. -""" -function ECCOMetadata(name::Symbol; - dates = DateTimeProlepticGregorian(1993, 1, 1), - version = ECCO4Monthly(), - dir = download_ECCO_cache) - - return ECCOMetadata(name, dates, version, dir) -end - -ECCOMetadata(name::Symbol, date, version=ECCO4Monthly(); dir=download_ECCO_cache) = - ECCOMetadata(name, date, version, dir) - -# Treat ECCOMetadata as an array to allow iteration over the dates. -Base.eltype(metadata::ECCOMetadata) = Base.eltype(metadata.dates) - -@propagate_inbounds Base.getindex(m::ECCOMetadata, i::Int) = ECCOMetadata(m.name, m.dates[i], m.version, m.dir) -@propagate_inbounds Base.first(m::ECCOMetadata) = ECCOMetadata(m.name, m.dates[1], m.version, m.dir) -@propagate_inbounds Base.last(m::ECCOMetadata) = ECCOMetadata(m.name, m.dates[end], m.version, m.dir) - -@inline function Base.iterate(m::ECCOMetadata, i=1) - if (i % UInt) - 1 < length(m) - return ECCOMetadata(m.name, m.dates[i], m.version, m.dir), i + 1 - else - return nothing - end -end - -Base.axes(metadata::ECCOMetadata{<:AbstractCFDateTime}) = 1 -Base.first(metadata::ECCOMetadata{<:AbstractCFDateTime}) = metadata -Base.last(metadata::ECCOMetadata{<:AbstractCFDateTime}) = metadata -Base.iterate(metadata::ECCOMetadata{<:AbstractCFDateTime}) = (metadata, nothing) -Base.iterate(::ECCOMetadata{<:AbstractCFDateTime}, ::Any) = nothing - -Base.length(metadata::ECCOMetadata) = length(metadata.dates) Base.size(data::ECCOMetadata{<:Any, <:ECCO2Daily}) = (1440, 720, 50, length(data.dates)) Base.size(data::ECCOMetadata{<:Any, <:ECCO2Monthly}) = (1440, 720, 50, length(data.dates)) Base.size(data::ECCOMetadata{<:Any, <:ECCO4Monthly}) = (720, 360, 50, length(data.dates)) -Base.length(metadata::ECCOMetadata{<:AbstractCFDateTime}) = 1 Base.size(::ECCOMetadata{<:AbstractCFDateTime, <:ECCO2Daily}) = (1440, 720, 50, 1) Base.size(::ECCOMetadata{<:AbstractCFDateTime, <:ECCO2Monthly}) = (1440, 720, 50, 1) Base.size(::ECCOMetadata{<:AbstractCFDateTime, <:ECCO4Monthly}) = (720, 360, 50, 1) # The whole range of dates in the different dataset versions -all_ECCO_dates(::ECCO4Monthly) = DateTimeProlepticGregorian(1992, 1, 1) : Month(1) : DateTimeProlepticGregorian(2023, 12, 1) -all_ECCO_dates(::ECCO2Monthly) = DateTimeProlepticGregorian(1992, 1, 1) : Month(1) : DateTimeProlepticGregorian(2023, 12, 1) -all_ECCO_dates(::ECCO2Daily) = DateTimeProlepticGregorian(1992, 1, 4) : Day(1) : DateTimeProlepticGregorian(2023, 12, 31) - -# File names of metadata containing multiple dates -metadata_filename(metadata) = [metadata_filename(metadatum) for metadatum in metadata] +all_dates(::ECCO4Monthly) = DateTimeProlepticGregorian(1992, 1, 1) : Month(1) : DateTimeProlepticGregorian(2023, 12, 1) +all_dates(::ECCO2Monthly) = DateTimeProlepticGregorian(1992, 1, 1) : Month(1) : DateTimeProlepticGregorian(2023, 12, 1) +all_dates(::ECCO2Daily) = DateTimeProlepticGregorian(1992, 1, 4) : Day(3) : DateTimeProlepticGregorian(2023, 12, 31) # File name generation specific to each Dataset version function metadata_filename(metadata::ECCOMetadata{<:AbstractCFDateTime, <:ECCO4Monthly}) @@ -142,7 +62,6 @@ function metadata_filename(metadata::ECCOMetadata{<:AbstractCFDateTime}) end # Convenience functions -metadata_path(metadata) = joinpath(metadata.dir, metadata_filename(metadata)) short_name(data::ECCOMetadata{<:Any, <:ECCO2Daily}) = ECCO2_short_names[data.name] short_name(data::ECCOMetadata{<:Any, <:ECCO2Monthly}) = ECCO2_short_names[data.name] short_name(data::ECCOMetadata{<:Any, <:ECCO4Monthly}) = ECCO4_short_names[data.name] diff --git a/src/DataWrangling/ECCO/ECCO_restoring.jl b/src/DataWrangling/ECCO/ECCO_restoring.jl index a8b89ee7..3ea8792a 100644 --- a/src/DataWrangling/ECCO/ECCO_restoring.jl +++ b/src/DataWrangling/ECCO/ECCO_restoring.jl @@ -10,7 +10,7 @@ using JLD2 using Dates: Second using ClimaOcean: stateindex -using ClimaOcean.DataWrangling: NearestNeighborInpainting +using ClimaOcean.DataWrangling: NearestNeighborInpainting, native_times import Oceananigans.Fields: set! import Oceananigans.Forcings: regularize_forcing @@ -78,33 +78,6 @@ function set!(fts::ECCOFieldTimeSeries) return nothing end -""" - ECCO_times(metadata; start_time = first(metadata).dates) - -Extract the time values from the given metadata and calculates the time difference -from the start time. - -Arguments -========= -- `metadata`: The metadata containing the date information. -- `start_time`: The start time for calculating the time difference. Defaults to the first date in the metadata. - -Returns -======= -An array of time differences in seconds. -""" -function ECCO_times(metadata; start_time = first(metadata).dates) - times = zeros(length(metadata)) - for (t, data) in enumerate(metadata) - date = data.dates - time = date - start_time - time = Second(time).value - times[t] = time - end - - return times -end - """ ECCOFieldTimeSeries(metadata::ECCOMetadata [, arch_or_grid=CPU() ]; time_indices_in_memory = 2, @@ -156,7 +129,7 @@ function ECCOFieldTimeSeries(metadata::ECCOMetadata, grid::AbstractGrid; inpainting isa Int && (inpainting = NearestNeighborInpainting(inpainting)) backend = ECCONetCDFBackend(time_indices_in_memory, metadata; on_native_grid, inpainting, cache_inpainted_data) - times = ECCO_times(metadata) + times = native_times(metadata) loc = LX, LY, LZ = location(metadata) boundary_conditions = FieldBoundaryConditions(grid, loc) fts = FieldTimeSeries{LX, LY, LZ}(grid, times; backend, time_indexing, boundary_conditions) @@ -167,11 +140,11 @@ end function ECCOFieldTimeSeries(variable_name::Symbol, version=ECCO4Monthly(); architecture = CPU(), - dates = all_ECCO_dates(version), + dates = all_dates(version), dir = download_ECCO_cache, kw...) - metadata = ECCOMetadata(variable_name, dates, version, dir) + metadata = Metadata(variable_name, dates, version, dir) return ECCOFieldTimeSeries(metadata, architecture; kw...) end @@ -246,7 +219,7 @@ end """ ECCORestoring(variable_name::Symbol, [ arch_or_grid = CPU(), ]; version = ECCO4Monthly(), - dates = all_ECCO_dates(version), + dates = all_dates(version), time_indices_in_memory = 2, time_indexing = Cyclical(), mask = 1, @@ -292,7 +265,7 @@ Keyword Arguments - `version`: The version of the ECCO dataset. Default: `ECCO4Monthly()`. -- `dates`: The dates to use for the ECCO dataset. Default: `all_ECCO_dates(version)`. +- `dates`: The dates to use for the ECCO dataset. Default: `all_dates(version)`. - `time_indices_in_memory`: The number of time indices to keep in memory. The number is chosen based on a trade-off between increased performance (more indices in memory) and reduced @@ -315,11 +288,11 @@ Keyword Arguments function ECCORestoring(variable_name::Symbol, arch_or_grid = CPU(); version = ECCO4Monthly(), - dates = all_ECCO_dates(version), + dates = all_dates(version), dir = download_ECCO_cache, kw...) - metadata = ECCOMetadata(variable_name, dates, version, dir) + metadata = Metadata(variable_name, dates, version, dir) return ECCORestoring(metadata, arch_or_grid; kw...) end diff --git a/src/DataWrangling/JRA55.jl b/src/DataWrangling/JRA55.jl deleted file mode 100644 index 1fa655b7..00000000 --- a/src/DataWrangling/JRA55.jl +++ /dev/null @@ -1,724 +0,0 @@ -module JRA55 - -using Oceananigans -using Oceananigans.Units - -using Oceananigans: location -using Oceananigans.Architectures: arch_array -using Oceananigans.DistributedComputations -using Oceananigans.DistributedComputations: child_architecture -using Oceananigans.BoundaryConditions: fill_halo_regions! -using Oceananigans.Grids: λnodes, φnodes, on_architecture -using Oceananigans.Fields: interpolate! -using Oceananigans.OutputReaders: Cyclical, TotallyInMemory, AbstractInMemoryBackend, FlavorOfFTS, time_indices -using Oceananigans.TimeSteppers: Clock - -using ClimaOcean -using ClimaOcean.DistributedUtils: @root -using ClimaOcean.DataWrangling: download_progress - -using ClimaOcean.OceanSeaIceModels: - PrescribedAtmosphere, - PrescribedAtmosphereThermodynamicsParameters, - TwoBandDownwellingRadiation - -using CUDA: @allowscalar - -using NCDatasets -using JLD2 -using Dates -using Scratch - -import Oceananigans.Fields: set! -import Oceananigans.OutputReaders: new_backend, update_field_time_series! -using Downloads: download - -download_jra55_cache::String = "" - -function __init__() - global download_jra55_cache = @get_scratch!("JRA55") -end - -struct JRA55Data end -const JRA55PrescribedAtmosphere = PrescribedAtmosphere{<:Any, <:JRA55Data} - -# A list of all variables provided in the JRA55 dataset: -JRA55_variable_names = (:river_freshwater_flux, - :rain_freshwater_flux, - :snow_freshwater_flux, - :iceberg_freshwater_flux, - :specific_humidity, - :sea_level_pressure, - :relative_humidity, - :downwelling_longwave_radiation, - :downwelling_shortwave_radiation, - :temperature, - :eastward_velocity, - :northward_velocity) - -filenames = Dict( - :river_freshwater_flux => "RYF.friver.1990_1991.nc", # Freshwater fluxes from rivers - :rain_freshwater_flux => "RYF.prra.1990_1991.nc", # Freshwater flux from rainfall - :snow_freshwater_flux => "RYF.prsn.1990_1991.nc", # Freshwater flux from snowfall - :iceberg_freshwater_flux => "RYF.licalvf.1990_1991.nc", # Freshwater flux from calving icebergs - :specific_humidity => "RYF.huss.1990_1991.nc", # Surface specific humidity - :sea_level_pressure => "RYF.psl.1990_1991.nc", # Sea level pressure - :relative_humidity => "RYF.rhuss.1990_1991.nc", # Surface relative humidity - :downwelling_longwave_radiation => "RYF.rlds.1990_1991.nc", # Downwelling longwave radiation - :downwelling_shortwave_radiation => "RYF.rsds.1990_1991.nc", # Downwelling shortwave radiation - :temperature => "RYF.tas.1990_1991.nc", # Near-surface air temperature - :eastward_velocity => "RYF.uas.1990_1991.nc", # Eastward near-surface wind - :northward_velocity => "RYF.vas.1990_1991.nc", # Northward near-surface wind -) - -jra55_short_names = Dict( - :river_freshwater_flux => "friver", # Freshwater fluxes from rivers - :rain_freshwater_flux => "prra", # Freshwater flux from rainfall - :snow_freshwater_flux => "prsn", # Freshwater flux from snowfall - :iceberg_freshwater_flux => "licalvf", # Freshwater flux from calving icebergs - :specific_humidity => "huss", # Surface specific humidity - :sea_level_pressure => "psl", # Sea level pressure - :relative_humidity => "rhuss", # Surface relative humidity - :downwelling_longwave_radiation => "rlds", # Downwelling longwave radiation - :downwelling_shortwave_radiation => "rsds", # Downwelling shortwave radiation - :temperature => "tas", # Near-surface air temperature - :eastward_velocity => "uas", # Eastward near-surface wind - :northward_velocity => "vas", # Northward near-surface wind -) - -field_time_series_short_names = Dict( - :river_freshwater_flux => "Fri", # Freshwater fluxes from rivers - :rain_freshwater_flux => "Fra", # Freshwater flux from rainfall - :snow_freshwater_flux => "Fsn", # Freshwater flux from snowfall - :iceberg_freshwater_flux => "Fic", # Freshwater flux from calving icebergs - :specific_humidity => "qa", # Surface specific humidity - :sea_level_pressure => "pa", # Sea level pressure - :relative_humidity => "rh", # Surface relative humidity - :downwelling_longwave_radiation => "Ql", # Downwelling longwave radiation - :downwelling_shortwave_radiation => "Qs", # Downwelling shortwave radiation - :temperature => "Ta", # Near-surface air temperature - :eastward_velocity => "ua", # Eastward near-surface wind - :northward_velocity => "va", # Northward near-surface wind -) - -urls = Dict( - :shortwave_radiation => "https://www.dropbox.com/scl/fi/z6fkvmd9oe3ycmaxta131/" * - "RYF.rsds.1990_1991.nc?rlkey=r7q6zcbj6a4fxsq0f8th7c4tc&dl=0", - - :river_freshwater_flux => "https://www.dropbox.com/scl/fi/21ggl4p74k4zvbf04nb67/" * - "RYF.friver.1990_1991.nc?rlkey=ny2qcjkk1cfijmwyqxsfm68fz&dl=0", - - :rain_freshwater_flux => "https://www.dropbox.com/scl/fi/5icl1gbd7f5hvyn656kjq/" * - "RYF.prra.1990_1991.nc?rlkey=iifyjm4ppwyd8ztcek4dtx0k8&dl=0", - - :snow_freshwater_flux => "https://www.dropbox.com/scl/fi/1r4ajjzb3643z93ads4x4/" * - "RYF.prsn.1990_1991.nc?rlkey=auyqpwn060cvy4w01a2yskfah&dl=0", - - :iceberg_freshwater_flux => "https://www.dropbox.com/scl/fi/44nc5y27ohvif7lkvpyv0/" * - "RYF.licalvf.1990_1991.nc?rlkey=w7rqu48y2baw1efmgrnmym0jk&dl=0", - - :specific_humidity => "https://www.dropbox.com/scl/fi/66z6ymfr4ghkynizydc29/" * - "RYF.huss.1990_1991.nc?rlkey=107yq04aew8lrmfyorj68v4td&dl=0", - - :sea_level_pressure => "https://www.dropbox.com/scl/fi/0fk332027oru1iiseykgp/" * - "RYF.psl.1990_1991.nc?rlkey=4xpr9uah741483aukok6d7ctt&dl=0", - - :relative_humidity => "https://www.dropbox.com/scl/fi/1agwsp0lzvntuyf8bm9la/" * - "RYF.rhuss.1990_1991.nc?rlkey=8cd0vs7iy1rw58b9pc9t68gtz&dl=0", - - :downwelling_longwave_radiation => "https://www.dropbox.com/scl/fi/y6r62szkirrivua5nqq61/" * - "RYF.rlds.1990_1991.nc?rlkey=wt9yq3cyrvs2rbowoirf4nkum&dl=0", - - :downwelling_shortwave_radiation => "https://www.dropbox.com/scl/fi/z6fkvmd9oe3ycmaxta131/" * - "RYF.rsds.1990_1991.nc?rlkey=r7q6zcbj6a4fxsq0f8th7c4tc&dl=0", - - :temperature => "https://www.dropbox.com/scl/fi/fpl0npwi476w635g6lke9/" * - "RYF.tas.1990_1991.nc?rlkey=0skb9pe6lgbfbiaoybe7m945s&dl=0", - - :eastward_velocity => "https://www.dropbox.com/scl/fi/86wetpqla2x97isp8092g/" * - "RYF.uas.1990_1991.nc?rlkey=rcaf18sh1yz0v9g4hjm1249j0&dl=0", - - :northward_velocity => "https://www.dropbox.com/scl/fi/d38sflo9ddljstd5jwgml/" * - "RYF.vas.1990_1991.nc?rlkey=f9y3e57kx8xrb40gbstarf0x6&dl=0", -) - -compute_bounding_nodes(::Nothing, ::Nothing, LH, hnodes) = nothing -compute_bounding_nodes(bounds, ::Nothing, LH, hnodes) = bounds - -function compute_bounding_nodes(x::Number, ::Nothing, LH, hnodes) - ϵ = convert(typeof(x), 0.001) # arbitrary? - return (x - ϵ, x + ϵ) -end - -# TODO: remove the allowscalar -function compute_bounding_nodes(::Nothing, grid, LH, hnodes) - hg = hnodes(grid, LH()) - h₁ = @allowscalar minimum(hg) - h₂ = @allowscalar maximum(hg) - return h₁, h₂ -end - -function compute_bounding_indices(::Nothing, hc) - Nh = length(hc) - return 1, Nh -end - -function compute_bounding_indices(bounds::Tuple, hc) - h₁, h₂ = bounds - Nh = length(hc) - - # The following should work. If ᵒ are the extrema of nodes we want to - # interpolate to, and the following is a sketch of the JRA55 native grid, - # - # 1 2 3 4 5 - # | | | | | | - # | x ᵒ | x | x | x ᵒ | x | - # | | | | | | - # 1 2 3 4 5 6 - # - # then for example, we should find that (iᵢ, i₂) = (1, 5). - # So we want to reduce the first index by one, and limit them - # both by the available data. There could be some mismatch due - # to the use of different coordinate systems (ie whether λ ∈ (0, 360) - # which we may also need to handle separately. - i₁ = searchsortedfirst(hc, h₁) - i₂ = searchsortedfirst(hc, h₂) - i₁ = max(1, i₁ - 1) - i₂ = min(Nh, i₂) - - return i₁, i₂ -end - -infer_longitudinal_topology(::Nothing) = Periodic - -function infer_longitudinal_topology(λbounds) - λ₁, λ₂ = λbounds - TX = λ₂ - λ₁ ≈ 360 ? Periodic : Bounded - return TX -end - -function compute_bounding_indices(longitude, latitude, grid, LX, LY, λc, φc) - λbounds = compute_bounding_nodes(longitude, grid, LX, λnodes) - φbounds = compute_bounding_nodes(latitude, grid, LY, φnodes) - - i₁, i₂ = compute_bounding_indices(λbounds, λc) - j₁, j₂ = compute_bounding_indices(φbounds, φc) - TX = infer_longitudinal_topology(λbounds) - - return i₁, i₂, j₁, j₂, TX -end - -# Convert dates to range until Oceananigans supports dates natively -function jra55_times(native_times, start_time=native_times[1]) - - times = [] - for native_time in native_times - time = native_time - start_time - time = Second(time).value - push!(times, time) - end - - return times -end - -struct JRA55NetCDFBackend <: AbstractInMemoryBackend{Int} - start :: Int - length :: Int -end - -""" - JRA55NetCDFBackend(length) - -Represents a JRA55 FieldTimeSeries backed by JRA55 native .nc files. -""" -JRA55NetCDFBackend(length) = JRA55NetCDFBackend(1, length) - -Base.length(backend::JRA55NetCDFBackend) = backend.length -Base.summary(backend::JRA55NetCDFBackend) = string("JRA55NetCDFBackend(", backend.start, ", ", backend.length, ")") - -const JRA55NetCDFFTS = FlavorOfFTS{<:Any, <:Any, <:Any, <:Any, <:JRA55NetCDFBackend} - -function set!(fts::JRA55NetCDFFTS, path::String=fts.path, name::String=fts.name) - - ds = Dataset(path) - - # Note that each file should have the variables - # - ds["time"]: time coordinate - # - ds["lon"]: longitude at the location of the variable - # - ds["lat"]: latitude at the location of the variable - # - ds["lon_bnds"]: bounding longitudes between which variables are averaged - # - ds["lat_bnds"]: bounding latitudes between which variables are averaged - # - ds[shortname]: the variable data - - # Nodes at the variable location - λc = ds["lon"][:] - φc = ds["lat"][:] - LX, LY, LZ = location(fts) - i₁, i₂, j₁, j₂, TX = compute_bounding_indices(nothing, nothing, fts.grid, LX, LY, λc, φc) - - nn = time_indices(fts) - nn = collect(nn) - - if issorted(nn) - data = ds[name][i₁:i₂, j₁:j₂, nn] - else - # The time indices may be cycling past 1; eg ti = [6, 7, 8, 1]. - # However, DiskArrays does not seem to support loading data with unsorted - # indices. So to handle this, we load the data in chunks, where each chunk's - # indices are sorted, and then glue the data together. - m = findfirst(n -> n == 1, nn) - n1 = nn[1:m-1] - n2 = nn[m:end] - - data1 = ds[name][i₁:i₂, j₁:j₂, n1] - data2 = ds[name][i₁:i₂, j₁:j₂, n2] - data = cat(data1, data2, dims=3) - end - - close(ds) - - copyto!(interior(fts, :, :, 1, :), data) - fill_halo_regions!(fts) - - return nothing -end - -new_backend(::JRA55NetCDFBackend, start, length) = JRA55NetCDFBackend(start, length) - -""" - JRA55_field_time_series(variable_name; - architecture = CPU(), - grid = nothing, - location = nothing, - url = nothing, - dir = download_jra55_cache, - filename = nothing, - shortname = nothing, - latitude = nothing, - longitude = nothing, - backend = InMemory(), - time_indexing = Cyclical(), - preprocess_chunk_size = 10, - preprocess_architecture = CPU(), - time_indices = nothing) - -Return a `FieldTimeSeries` containing atmospheric reanalysis data for `variable_name`, -which describes one of the variables in the "repeat year forcing" dataset derived -from the Japanese 55-year atmospheric reanalysis for driving ocean-sea ice models (JRA55-do). -For more information about the derivation of the repeat-year forcing dataset, see - -> Stewart et al. (2020). JRA55-do-based repeat year forcing datasets for driving ocean–sea-ice models, _Ocean Modelling_, **147**, 101557, https://doi.org/10.1016/j.ocemod.2019.101557. - -The `variable_name`s (and their `shortname`s used in NetCDF files) available from the JRA55-do are: -- `:river_freshwater_flux` ("friver") -- `:rain_freshwater_flux` ("prra") -- `:snow_freshwater_flux` ("prsn") -- `:iceberg_freshwater_flux` ("licalvf") -- `:specific_humidity` ("huss") -- `:sea_level_pressure` ("psl") -- `:relative_humidity` ("rhuss") -- `:downwelling_longwave_radiation` ("rlds") -- `:downwelling_shortwave_radiation` ("rsds") -- `:temperature` ("ras") -- `:eastward_velocity` ("uas") -- `:northward_velocity` ("vas") - -Keyword arguments -================= - -- `architecture`: Architecture for the `FieldTimeSeries`. - Default: CPU() - -- `time_indices`: Indices of the timeseries to extract from file. - For example, `time_indices=1:3` returns a - `FieldTimeSeries` with the first three time snapshots - of `variable_name`. - -- `latitude`: Guiding latitude bounds for the resulting grid. - Used to slice the data when loading into memory. - Default: nothing, which retains the latitude range of the native grid. - -- `longitude`: Guiding longitude bounds for the resulting grid. - Used to slice the data when loading into memory. - Default: nothing, which retains the longitude range of the native grid. - -- `url`: The url accessed to download the data for `variable_name`. - Default: `ClimaOcean.JRA55.urls[variable_name]`. - -- `filename`: The name of the downloaded file. - Default: `ClimaOcean.JRA55.filenames[variable_name]`. - -- `shortname`: The "short name" of `variable_name` inside its NetCDF file. - Default: `ClimaOcean.JRA55.jra55_short_names[variable_name]`. - -- `interpolated_file`: file holding an Oceananigans compatible version of the JRA55 data. - If it does not exist it will be generated. - -- `time_chunks_in_memory`: number of fields held in memory. If `nothing` then the whole timeseries - is loaded (not recommended). -""" -function JRA55_field_time_series(variable_name; - architecture = CPU(), - grid = nothing, - location = nothing, - url = nothing, - dir = download_jra55_cache, - filename = nothing, - shortname = nothing, - latitude = nothing, - longitude = nothing, - backend = InMemory(), - time_indexing = Cyclical(), - preprocess_chunk_size = 10, - preprocess_architecture = CPU(), - time_indices = nothing) - - # OnDisk backends do not support time interpolation! - # Disallow OnDisk for JRA55 dataset loading - if backend isa OnDisk - msg = string("We cannot load the JRA55 dataset with an `OnDisk` backend") - throw(ArgumentError(msg)) - end - - if isnothing(filename) && !(variable_name ∈ JRA55_variable_names) - variable_strs = Tuple(" - :$name \n" for name in JRA55_variable_names) - variables_msg = prod(variable_strs) - - msg = string("The variable :$variable_name is not provided by the JRA55-do dataset!", '\n', - "The variables provided by the JRA55-do dataset are:", '\n', - variables_msg) - - throw(ArgumentError(msg)) - end - - filepath = isnothing(filename) ? joinpath(dir, filenames[variable_name]) : joinpath(dir, filename) - - if !isnothing(filename) && !isfile(filepath) && isnothing(url) - throw(ArgumentError("A filename was provided without a url, but the file does not exist.\n \ - If intended, please provide both the filename and url that should be used \n \ - to download the new file.")) - end - - isnothing(filename) && (filename = filenames[variable_name]) - isnothing(shortname) && (shortname = jra55_short_names[variable_name]) - isnothing(url) && (url = urls[variable_name]) - - # Record some important user decisions - totally_in_memory = backend isa TotallyInMemory - on_native_grid = isnothing(grid) - !on_native_grid && backend isa JRA55NetCDFBackend && error("Can't use custom grid with JRA55NetCDFBackend.") - - jld2_filepath = joinpath(dir, string("JRA55_repeat_year_", variable_name, ".jld2")) - fts_name = field_time_series_short_names[variable_name] - - # Note, we don't re-use existing jld2 files. - @root begin - isfile(filepath) || download(url, filepath; progress=download_progress) - isfile(jld2_filepath) && rm(jld2_filepath) - end - - # Determine default time indices - if totally_in_memory - # In this case, the whole time series is in memory. - # Either the time series is short, or we are doing a limited-area - # simulation, like in a single column. So, we conservatively - # set a default `time_indices = 1:2`. - isnothing(time_indices) && (time_indices = 1:2) - time_indices_in_memory = time_indices - native_fts_architecture = architecture - else - # In this case, part or all of the time series will be stored in a file. - # Note: if the user has provided a grid, we will have to preprocess the - # .nc JRA55 data into a .jld2 file. In this case, `time_indices` refers - # to the time_indices that we will preprocess; - # by default we choose all of them. The architecture is only the - # architecture used for preprocessing, which typically will be CPU() - # even if we would like the final FieldTimeSeries on the GPU. - isnothing(time_indices) && (time_indices = :) - - if backend isa JRA55NetCDFBackend - time_indices_in_memory = 1:length(backend) - native_fts_architecture = architecture - else # then `time_indices_in_memory` refers to preprocessing - maximum_index = min(preprocess_chunk_size, length(time_indices)) - time_indices_in_memory = 1:maximum_index - native_fts_architecture = preprocess_architecture - end - end - - # Set a default location. - if isnothing(location) - LX = LY = Center - else - LX, LY = location - end - - ds = Dataset(filepath) - - # Note that each file should have the variables - # - ds["time"]: time coordinate - # - ds["lon"]: longitude at the location of the variable - # - ds["lat"]: latitude at the location of the variable - # - ds["lon_bnds"]: bounding longitudes between which variables are averaged - # - ds["lat_bnds"]: bounding latitudes between which variables are averaged - # - ds[shortname]: the variable data - - # Nodes at the variable location - λc = ds["lon"][:] - φc = ds["lat"][:] - - # Interfaces for the "native" JRA55 grid - λn = ds["lon_bnds"][1, :] - φn = ds["lat_bnds"][1, :] - - # The .nc coordinates lon_bnds and lat_bnds do not include - # the last interface, so we push them here. - push!(φn, 90) - push!(λn, λn[1] + 360) - - # TODO: support loading just part of the JRA55 data. - # Probably with arguments that take latitude, longitude bounds. - i₁, i₂, j₁, j₂, TX = compute_bounding_indices(longitude, latitude, grid, LX, LY, λc, φc) - - native_times = ds["time"][time_indices] - data = ds[shortname][i₁:i₂, j₁:j₂, time_indices_in_memory] - λr = λn[i₁:i₂+1] - φr = φn[j₁:j₂+1] - Nrx, Nry, Nt = size(data) - close(ds) - - N = (Nrx, Nry) - H = min.(N, (3, 3)) - - JRA55_native_grid = LatitudeLongitudeGrid(native_fts_architecture, Float32; - halo = H, - size = N, - longitude = λr, - latitude = φr, - topology = (TX, Bounded, Flat)) - - boundary_conditions = FieldBoundaryConditions(JRA55_native_grid, (Center, Center, Nothing)) - times = jra55_times(native_times) - - if backend isa JRA55NetCDFBackend - fts = FieldTimeSeries{Center, Center, Nothing}(JRA55_native_grid, times; - backend, - time_indexing, - boundary_conditions, - path = filepath, - name = shortname) - - # Fill the data in a GPU-friendly manner - copyto!(interior(fts, :, :, 1, :), data) - fill_halo_regions!(fts) - - return fts - else - # Make times into an array for later preprocessing - if !totally_in_memory - times = collect(times) - end - - native_fts = FieldTimeSeries{Center, Center, Nothing}(JRA55_native_grid, times; - time_indexing, - boundary_conditions) - - # Fill the data in a GPU-friendly manner - copyto!(interior(native_fts, :, :, 1, :), data) - fill_halo_regions!(native_fts) - - if on_native_grid && totally_in_memory - return native_fts - - elseif totally_in_memory # but not on the native grid! - boundary_conditions = FieldBoundaryConditions(grid, (LX, LY, Nothing)) - fts = FieldTimeSeries{LX, LY, Nothing}(grid, times; time_indexing, boundary_conditions) - interpolate!(fts, native_fts) - return fts - end - end - - @info "Pre-processing JRA55 $variable_name data into a JLD2 file..." - - preprocessing_grid = on_native_grid ? JRA55_native_grid : grid - - # Re-open the dataset! - ds = Dataset(filepath) - all_datetimes = ds["time"][time_indices] - all_Nt = length(all_datetimes) - - all_times = jra55_times(all_datetimes) - - on_disk_fts = FieldTimeSeries{LX, LY, Nothing}(preprocessing_grid, all_times; - boundary_conditions, - backend = OnDisk(), - path = jld2_filepath, - name = fts_name) - - # Save data to disk, one field at a time - start_clock = time_ns() - n = 1 # on disk - m = 0 # in memory - - times_in_memory = all_times[time_indices_in_memory] - - fts = FieldTimeSeries{LX, LY, Nothing}(preprocessing_grid, times_in_memory; - boundary_conditions, - backend = InMemory(), - path = jld2_filepath, - name = fts_name) - - # Re-compute data - new_data = ds[shortname][i₁:i₂, j₁:j₂, time_indices_in_memory] - - if !on_native_grid - copyto!(interior(native_fts, :, :, 1, :), new_data[:, :, :]) - fill_halo_regions!(native_fts) - interpolate!(fts, native_fts) - else - copyto!(interior(fts, :, :, 1, :), new_data[:, :, :]) - end - - while n <= all_Nt - print(" ... processing time index $n of $all_Nt \r") - - if time_indices_in_memory isa Colon || n ∈ time_indices_in_memory - m += 1 - else # load new data - # Update time_indices - time_indices_in_memory = time_indices_in_memory .+ preprocess_chunk_size - n₁ = first(time_indices_in_memory) - - # Clip time_indices if they extend past the end of the dataset - if last(time_indices_in_memory) > all_Nt - time_indices_in_memory = UnitRange(n₁, all_Nt) - end - - # Re-compute times - new_times = jra55_times(all_times[time_indices_in_memory], all_times[n₁]) - native_fts.times = new_times - - # Re-compute data - new_data = ds[shortname][i₁:i₂, j₁:j₂, time_indices_in_memory] - fts.times = new_times - - if !on_native_grid - copyto!(interior(native_fts, :, :, 1, :), new_data[:, :, :]) - fill_halo_regions!(native_fts) - interpolate!(fts, native_fts) - else - copyto!(interior(fts, :, :, 1, :), new_data[:, :, :]) - end - - m = 1 # reset - end - - set!(on_disk_fts, fts[m], n, fts.times[m]) - - n += 1 - end - - elapsed = 1e-9 * (time_ns() - start_clock) - elapsed_str = prettytime(elapsed) - @info " ... done ($elapsed_str)" * repeat(" ", 20) - - close(ds) - - user_fts = FieldTimeSeries(jld2_filepath, fts_name; architecture, backend, time_indexing) - fill_halo_regions!(user_fts) - - return user_fts -end - -const AA = Oceananigans.Architectures.AbstractArchitecture - -JRA55PrescribedAtmosphere(time_indices=Colon(); kw...) = - JRA55PrescribedAtmosphere(CPU(), time_indices; kw...) - -JRA55PrescribedAtmosphere(arch::Distributed, time_indices=Colon(); kw...) = - JRA55PrescribedAtmosphere(child_architecture(arch), time_indices; kw...) - -# TODO: allow the user to pass dates -""" - JRA55PrescribedAtmosphere(architecture::AA, time_indices=Colon(); - backend = nothing, - time_indexing = Cyclical(), - reference_height = 10, # meters - include_rivers_and_icebergs = false, - other_kw...) - -Return a `PrescribedAtmosphere` representing JRA55 reanalysis data. -""" -function JRA55PrescribedAtmosphere(architecture::AA, time_indices=Colon(); - backend = nothing, - time_indexing = Cyclical(), - reference_height = 10, # meters - include_rivers_and_icebergs = false, - other_kw...) - - if isnothing(backend) # apply a default - Ni = try - length(time_indices) - catch - Inf - end - - # Manufacture a default for the number of fields to keep InMemory - Nf = min(24, Ni) - backend = JRA55NetCDFBackend(Nf) - end - - kw = (; time_indices, time_indexing, backend, architecture) - kw = merge(kw, other_kw) - - ua = JRA55_field_time_series(:eastward_velocity; kw...) - va = JRA55_field_time_series(:northward_velocity; kw...) - Ta = JRA55_field_time_series(:temperature; kw...) - qa = JRA55_field_time_series(:specific_humidity; kw...) - pa = JRA55_field_time_series(:sea_level_pressure; kw...) - Fra = JRA55_field_time_series(:rain_freshwater_flux; kw...) - Fsn = JRA55_field_time_series(:snow_freshwater_flux; kw...) - Ql = JRA55_field_time_series(:downwelling_longwave_radiation; kw...) - Qs = JRA55_field_time_series(:downwelling_shortwave_radiation; kw...) - - # In JRA55, rivers and icebergs are on a different grid and stored with - # a different frequency than the rest of the data. Here, we use the - # `PrescribedAtmosphere.auxiliary_freshwater_flux` feature to represent them. - if include_rivers_and_icebergs - Fri = JRA55_field_time_series(:river_freshwater_flux; kw...) - Fic = JRA55_field_time_series(:iceberg_freshwater_flux; kw...) - auxiliary_freshwater_flux = (rivers=Fri, icebergs=Fic) - else - auxiliary_freshwater_flux = nothing - end - - times = ua.times - freshwater_flux = (rain=Fra, snow=Fsn) - velocities = (u=ua, v=va) - tracers = (T=Ta, q=qa) - pressure = pa - downwelling_radiation = TwoBandDownwellingRadiation(shortwave=Qs, longwave=Ql) - - FT = eltype(ua) - boundary_layer_height = convert(FT, 600) - reference_height = convert(FT, reference_height) - thermodynamics_parameters = PrescribedAtmosphereThermodynamicsParameters(FT) - grid = ua.grid - metadata = JRA55Data() - - return PrescribedAtmosphere(grid, - Clock{Float64}(time = 0), - metadata, - velocities, - pressure, - tracers, - freshwater_flux, - auxiliary_freshwater_flux, - downwelling_radiation, - thermodynamics_parameters, - times, - reference_height, - boundary_layer_height) -end - -end # module diff --git a/src/DataWrangling/JRA55/JRA55.jl b/src/DataWrangling/JRA55/JRA55.jl new file mode 100644 index 00000000..80127d40 --- /dev/null +++ b/src/DataWrangling/JRA55/JRA55.jl @@ -0,0 +1,38 @@ +module JRA55 + +export JRA55FieldTimeSeries, JRA55PrescribedAtmosphere, JRA55RepeatYear + +using Oceananigans +using Oceananigans.Units + +using Oceananigans.Architectures: arch_array +using Oceananigans.DistributedComputations +using Oceananigans.DistributedComputations: child_architecture +using Oceananigans.BoundaryConditions: fill_halo_regions! +using Oceananigans.Grids: λnodes, φnodes, on_architecture +using Oceananigans.Fields: interpolate! +using Oceananigans.OutputReaders: Cyclical, TotallyInMemory, AbstractInMemoryBackend, FlavorOfFTS, time_indices + +using ClimaOcean +using ClimaOcean.DistributedUtils + +using ClimaOcean.OceanSeaIceModels: + PrescribedAtmosphere, + TwoBandDownwellingRadiation + +using CUDA: @allowscalar + +using NCDatasets +using JLD2 +using Dates +using Scratch + +import Oceananigans.Fields: set! +import Oceananigans.OutputReaders: new_backend, update_field_time_series! +using Downloads: download + +include("JRA55_metadata.jl") +include("JRA55_field_time_series.jl") +include("JRA55_prescribed_atmosphere.jl") + +end # module diff --git a/src/DataWrangling/JRA55/JRA55_field_time_series.jl b/src/DataWrangling/JRA55/JRA55_field_time_series.jl new file mode 100644 index 00000000..e0ba709e --- /dev/null +++ b/src/DataWrangling/JRA55/JRA55_field_time_series.jl @@ -0,0 +1,341 @@ +using ClimaOcean.DataWrangling: all_dates, native_times +using Oceananigans.Grids: AbstractGrid +using Oceananigans.OutputReaders: PartlyInMemory + +download_JRA55_cache::String = "" + +function __init__() + global download_JRA55_cache = @get_scratch!("JRA55") +end + +compute_bounding_nodes(::Nothing, ::Nothing, LH, hnodes) = nothing +compute_bounding_nodes(bounds, ::Nothing, LH, hnodes) = bounds + +function compute_bounding_nodes(x::Number, ::Nothing, LH, hnodes) + ϵ = convert(typeof(x), 0.001) # arbitrary? + return (x - ϵ, x + ϵ) +end + +# TODO: remove the allowscalar +function compute_bounding_nodes(::Nothing, grid, LH, hnodes) + hg = hnodes(grid, LH()) + h₁ = @allowscalar minimum(hg) + h₂ = @allowscalar maximum(hg) + return h₁, h₂ +end + +function compute_bounding_indices(::Nothing, hc) + Nh = length(hc) + return 1, Nh +end + +function compute_bounding_indices(bounds::Tuple, hc) + h₁, h₂ = bounds + Nh = length(hc) + + # The following should work. If ᵒ are the extrema of nodes we want to + # interpolate to, and the following is a sketch of the JRA55 native grid, + # + # 1 2 3 4 5 + # | | | | | | + # | x ᵒ | x | x | x ᵒ | x | + # | | | | | | + # 1 2 3 4 5 6 + # + # then for example, we should find that (iᵢ, i₂) = (1, 5). + # So we want to reduce the first index by one, and limit them + # both by the available data. There could be some mismatch due + # to the use of different coordinate systems (ie whether λ ∈ (0, 360) + # which we may also need to handle separately. + i₁ = searchsortedfirst(hc, h₁) + i₂ = searchsortedfirst(hc, h₂) + i₁ = max(1, i₁ - 1) + i₂ = min(Nh, i₂) + + return i₁, i₂ +end + +infer_longitudinal_topology(::Nothing) = Periodic + +function infer_longitudinal_topology(λbounds) + λ₁, λ₂ = λbounds + TX = λ₂ - λ₁ ≈ 360 ? Periodic : Bounded + return TX +end + +function compute_bounding_indices(longitude, latitude, grid, LX, LY, λc, φc) + λbounds = compute_bounding_nodes(longitude, grid, LX, λnodes) + φbounds = compute_bounding_nodes(latitude, grid, LY, φnodes) + + i₁, i₂ = compute_bounding_indices(λbounds, λc) + j₁, j₂ = compute_bounding_indices(φbounds, φc) + TX = infer_longitudinal_topology(λbounds) + + return i₁, i₂, j₁, j₂, TX +end + +struct JRA55NetCDFBackend <: AbstractInMemoryBackend{Int} + start :: Int + length :: Int +end + +""" + JRA55NetCDFBackend(length) + +Represents a JRA55 FieldTimeSeries backed by JRA55 native .nc files. +""" +JRA55NetCDFBackend(length) = JRA55NetCDFBackend(1, length) + +Base.length(backend::JRA55NetCDFBackend) = backend.length +Base.summary(backend::JRA55NetCDFBackend) = string("JRA55NetCDFBackend(", backend.start, ", ", backend.length, ")") + +const JRA55NetCDFFTS = FlavorOfFTS{<:Any, <:Any, <:Any, <:Any, <:JRA55NetCDFBackend} + +# TODO: This will need to change when we add a method for JRA55MultipleYears +function set!(fts::JRA55NetCDFFTS, path::String=fts.path, name::String=fts.name) + + ds = Dataset(path) + + # Note that each file should have the variables + # - ds["time"]: time coordinate + # - ds["lon"]: longitude at the location of the variable + # - ds["lat"]: latitude at the location of the variable + # - ds["lon_bnds"]: bounding longitudes between which variables are averaged + # - ds["lat_bnds"]: bounding latitudes between which variables are averaged + # - ds[shortname]: the variable data + + # Nodes at the variable location + λc = ds["lon"][:] + φc = ds["lat"][:] + LX, LY, LZ = location(fts) + i₁, i₂, j₁, j₂, TX = compute_bounding_indices(nothing, nothing, fts.grid, LX, LY, λc, φc) + + nn = time_indices(fts) + nn = collect(nn) + + if issorted(nn) + data = ds[name][i₁:i₂, j₁:j₂, nn] + else + # The time indices may be cycling past 1; eg ti = [6, 7, 8, 1]. + # However, DiskArrays does not seem to support loading data with unsorted + # indices. So to handle this, we load the data in chunks, where each chunk's + # indices are sorted, and then glue the data together. + m = findfirst(n -> n == 1, nn) + n1 = nn[1:m-1] + n2 = nn[m:end] + + data1 = ds[name][i₁:i₂, j₁:j₂, n1] + data2 = ds[name][i₁:i₂, j₁:j₂, n2] + data = cat(data1, data2, dims=3) + end + + close(ds) + + copyto!(interior(fts, :, :, 1, :), data) + fill_halo_regions!(fts) + + return nothing +end + +new_backend(::JRA55NetCDFBackend, start, length) = JRA55NetCDFBackend(start, length) + +""" + JRA55FieldTimeSeries(variable_name, [FT = Float32]; + version = JRA55RepeatYear(), + dates = all_JRA55_dates(version), + latitude = nothing, + longitude = nothing, + dir = download_JRA55_cache, + filename = nothing, + shortname = nothing, + backend = InMemory(), + time_indexing = Cyclical(), + preprocess_chunk_size = 10, + preprocess_architecture = CPU()) + +Return a `FieldTimeSeries` containing atmospheric reanalysis data for `variable_name`, +which describes one of the variables in the "repeat year forcing" dataset derived +from the Japanese 55-year atmospheric reanalysis for driving ocean-sea ice models (JRA55-do). +For more information about the derivation of the repeat-year forcing dataset, see + +> Stewart et al. (2020). JRA55-do-based repeat year forcing datasets for driving ocean–sea-ice models, _Ocean Modelling_, **147**, 101557, https://doi.org/10.1016/j.ocemod.2019.101557. + +The `variable_name`s (and their `shortname`s used in NetCDF files) available from the JRA55-do are: +- `:river_freshwater_flux` ("friver") +- `:rain_freshwater_flux` ("prra") +- `:snow_freshwater_flux` ("prsn") +- `:iceberg_freshwater_flux` ("licalvf") +- `:specific_humidity` ("huss") +- `:sea_level_pressure` ("psl") +- `:relative_humidity` ("rhuss") +- `:downwelling_longwave_radiation` ("rlds") +- `:downwelling_shortwave_radiation` ("rsds") +- `:temperature` ("ras") +- `:eastward_velocity` ("uas") +- `:northward_velocity` ("vas") + +Keyword arguments +================= + +- `architecture`: Architecture for the `FieldTimeSeries`. Default: CPU() + +- `dates`: The date(s) of the metadata. Note this can either be a single date, + representing a snapshot, or a range of dates, representing a time-series. + Default: `all_dates(version)` (see `all_dates`). + +- `version`: The data version. The only supported versions is `JRA55RepeatYear()` + +- `dir`: The directory of the data file. Default: `ClimaOcean.JRA55.download_JRA55_cache`. + +- `latitude`: Guiding latitude bounds for the resulting grid. + Used to slice the data when loading into memory. + Default: nothing, which retains the latitude range of the native grid. + +- `longitude`: Guiding longitude bounds for the resulting grid. + Used to slice the data when loading into memory. + Default: nothing, which retains the longitude range of the native grid. + +- `backend`: Backend for the `FieldTimeSeries`. The two options are + * `InMemory()`: the whole time series is loaded into memory. + * `JRA55NetCDFBackend(total_time_instances_in_memory)`: only a subset of the time series is loaded into memory. + Default: `InMemory()`. +""" +function JRA55FieldTimeSeries(variable_name::Symbol, architecture = CPU(), FT=Float32,; + version = JRA55RepeatYear(), + dates = all_dates(version), + dir = download_JRA55_cache, + kw...) + + metadata = Metadata(variable_name, dates, version, dir) + + return JRA55FieldTimeSeries(metadata, architecture, FT; kw...) +end + +function JRA55FieldTimeSeries(metadata::JRA55Metadata, architecture=CPU(), FT=Float32; + latitude = nothing, + longitude = nothing, + backend = InMemory(), + time_indexing = Cyclical()) + + # First thing: we download the dataset! + download_dataset(metadata) + + # Unpack metadata details + time_indices = JRA55_time_indices(metadata.version, metadata.dates) + shortname = short_name(metadata) + variable_name = metadata.name + + filepath = metadata_path(metadata) # Might be multiple paths!!! + filepath = filepath isa AbstractArray ? first(filepath) : filepath + + # OnDisk backends do not support time interpolation! + # Disallow OnDisk for JRA55 dataset loading + if ((backend isa InMemory) && !isnothing(backend.length)) || backend isa OnDisk + msg = string("We cannot load the JRA55 dataset with a $(backend) backend. Use `InMemory()` or `JRA55NetCDFBackend(N)` instead.") + throw(ArgumentError(msg)) + end + + if !(variable_name ∈ JRA55_variable_names) + variable_strs = Tuple(" - :$name \n" for name in JRA55_variable_names) + variables_msg = prod(variable_strs) + + msg = string("The variable :$variable_name is not provided by the JRA55-do dataset!", '\n', + "The variables provided by the JRA55-do dataset are:", '\n', + variables_msg) + + throw(ArgumentError(msg)) + end + + # Record some important user decisions + totally_in_memory = backend isa TotallyInMemory + + # Determine default time indices + if totally_in_memory + # In this case, the whole time series is in memory. + # Either the time series is short, or we are doing a limited-area + # simulation, like in a single column. So, we conservatively + # set a default `time_indices = 1:2`. + time_indices_in_memory = time_indices + native_fts_architecture = architecture + else + # In this case, part or all of the time series will be stored in a file. + # Note: if the user has provided a grid, we will have to preprocess the + # .nc JRA55 data into a .jld2 file. In this case, `time_indices` refers + # to the time_indices that we will preprocess; + # by default we choose all of them. The architecture is only the + # architecture used for preprocessing, which typically will be CPU() + # even if we would like the final FieldTimeSeries on the GPU. + time_indices_in_memory = 1:length(backend) + native_fts_architecture = architecture + end + + ds = Dataset(filepath) + + # Note that each file should have the variables + # - ds["time"]: time coordinate + # - ds["lon"]: longitude at the location of the variable + # - ds["lat"]: latitude at the location of the variable + # - ds["lon_bnds"]: bounding longitudes between which variables are averaged + # - ds["lat_bnds"]: bounding latitudes between which variables are averaged + # - ds[shortname]: the variable data + + # Nodes at the variable location + λc = ds["lon"][:] + φc = ds["lat"][:] + + # Interfaces for the "native" JRA55 grid + λn = ds["lon_bnds"][1, :] + φn = ds["lat_bnds"][1, :] + + # The .nc coordinates lon_bnds and lat_bnds do not include + # the last interface, so we push them here. + push!(φn, 90) + push!(λn, λn[1] + 360) + + i₁, i₂, j₁, j₂, TX = compute_bounding_indices(longitude, latitude, nothing, Center, Center, λc, φc) + + data = ds[shortname][i₁:i₂, j₁:j₂, time_indices_in_memory] + λr = λn[i₁:i₂+1] + φr = φn[j₁:j₂+1] + Nrx, Nry, Nt = size(data) + close(ds) + + N = (Nrx, Nry) + H = min.(N, (3, 3)) + + JRA55_native_grid = LatitudeLongitudeGrid(native_fts_architecture, FT; + halo = H, + size = N, + longitude = λr, + latitude = φr, + topology = (TX, Bounded, Flat)) + + boundary_conditions = FieldBoundaryConditions(JRA55_native_grid, (Center, Center, Nothing)) + times = native_times(metadata) + + if backend isa JRA55NetCDFBackend + fts = FieldTimeSeries{Center, Center, Nothing}(JRA55_native_grid, times; + backend, + time_indexing, + boundary_conditions, + path = filepath, + name = shortname) + + # Fill the data in a GPU-friendly manner + copyto!(interior(fts, :, :, 1, :), data) + fill_halo_regions!(fts) + + return fts + else + native_fts = FieldTimeSeries{Center, Center, Nothing}(JRA55_native_grid, times; + time_indexing, + backend, + boundary_conditions) + + # Fill the data in a GPU-friendly manner + copyto!(interior(native_fts, :, :, 1, :), data) + fill_halo_regions!(native_fts) + + return native_fts + end +end diff --git a/src/DataWrangling/JRA55/JRA55_metadata.jl b/src/DataWrangling/JRA55/JRA55_metadata.jl new file mode 100644 index 00000000..32853989 --- /dev/null +++ b/src/DataWrangling/JRA55/JRA55_metadata.jl @@ -0,0 +1,153 @@ +using CFTime +using Dates +using Downloads + +using ClimaOcean.DataWrangling +using ClimaOcean.DataWrangling: Metadata, metadata_path, download_progress + +import Dates: year, month, day +import Oceananigans.Fields: set! +import Base + +import Oceananigans.Fields: set!, location +import ClimaOcean.DataWrangling: all_dates, metadata_filename, default_download_folder + +struct JRA55MultipleYears end +struct JRA55RepeatYear end + +const JRA55Metadata{T, V} = Metadata{T, V} where {T, V<:Union{<:JRA55MultipleYears, <:JRA55RepeatYear}} + +default_download_folder(::Union{<:JRA55MultipleYears, <:JRA55RepeatYear}) = download_JRA55_cache + +Base.size(data::JRA55Metadata) = (640, 320, length(data.dates)) +Base.size(::JRA55Metadata{<:AbstractCFDateTime}) = (640, 320, 1) + +# The whole range of dates in the different dataset versions +all_dates(::JRA55RepeatYear) = DateTimeProlepticGregorian(1990, 1, 1) : Hour(3) : DateTimeProlepticGregorian(1990, 12, 31, 23, 59, 59) +all_dates(::JRA55MultipleYears) = DateTimeProlepticGregorian(1958, 1, 1) : Hour(3) : DateTimeProlepticGregorian(2021, 1, 1) + +function JRA55_time_indices(version, dates) + all_JRA55_dates = all_dates(version) + indices = Int[] + + for date in dates + index = findfirst(x -> x == date, all_JRA55_dates) + push!(indices, index) + end + + return indices +end + +# File name generation specific to each Dataset version +function metadata_filename(metadata::Metadata{<:Any, <:JRA55RepeatYear}) # No difference + shortname = short_name(metadata) + return "RYF." * shortname * ".1990_1991.nc" +end + +# TODO: Implement this function for JRA55MultipleYears +# function metadata_filename(metadata::Metadata{<:AbstractCFDateTime, <:JRA55MultipleYears}) +# # fix the filename +# shortname = short_name(metadata) +# year = Dates.year(metadata.dates) +# suffix = "_input4MIPs_atmosphericState_OMIP_MRI-JRA55-do-1-5-0_gr_" +# dates = "($year)01010130-($year)12312330.nc" +# return shortname * suffix * dates * ".nc" +# end + +# Convenience functions +short_name(data::JRA55Metadata) = JRA55_short_names[data.name] +location(data::JRA55Metadata) = (Center, Center, Center) + +# A list of all variables provided in the JRA55 dataset: +JRA55_variable_names = (:river_freshwater_flux, + :rain_freshwater_flux, + :snow_freshwater_flux, + :iceberg_freshwater_flux, + :specific_humidity, + :sea_level_pressure, + :relative_humidity, + :downwelling_longwave_radiation, + :downwelling_shortwave_radiation, + :temperature, + :eastward_velocity, + :northward_velocity) + +JRA55_short_names = Dict( + :river_freshwater_flux => "friver", # Freshwater fluxes from rivers + :rain_freshwater_flux => "prra", # Freshwater flux from rainfall + :snow_freshwater_flux => "prsn", # Freshwater flux from snowfall + :iceberg_freshwater_flux => "licalvf", # Freshwater flux from calving icebergs + :specific_humidity => "huss", # Surface specific humidity + :sea_level_pressure => "psl", # Sea level pressure + :relative_humidity => "rhuss", # Surface relative humidity + :downwelling_longwave_radiation => "rlds", # Downwelling longwave radiation + :downwelling_shortwave_radiation => "rsds", # Downwelling shortwave radiation + :temperature => "tas", # Near-surface air temperature + :eastward_velocity => "uas", # Eastward near-surface wind + :northward_velocity => "vas", # Northward near-surface wind +) + +JRA55_repeat_year_urls = Dict( + :shortwave_radiation => "https://www.dropbox.com/scl/fi/z6fkvmd9oe3ycmaxta131/" * + "RYF.rsds.1990_1991.nc?rlkey=r7q6zcbj6a4fxsq0f8th7c4tc&dl=0", + + :river_freshwater_flux => "https://www.dropbox.com/scl/fi/21ggl4p74k4zvbf04nb67/" * + "RYF.friver.1990_1991.nc?rlkey=ny2qcjkk1cfijmwyqxsfm68fz&dl=0", + + :rain_freshwater_flux => "https://www.dropbox.com/scl/fi/5icl1gbd7f5hvyn656kjq/" * + "RYF.prra.1990_1991.nc?rlkey=iifyjm4ppwyd8ztcek4dtx0k8&dl=0", + + :snow_freshwater_flux => "https://www.dropbox.com/scl/fi/1r4ajjzb3643z93ads4x4/" * + "RYF.prsn.1990_1991.nc?rlkey=auyqpwn060cvy4w01a2yskfah&dl=0", + + :iceberg_freshwater_flux => "https://www.dropbox.com/scl/fi/44nc5y27ohvif7lkvpyv0/" * + "RYF.licalvf.1990_1991.nc?rlkey=w7rqu48y2baw1efmgrnmym0jk&dl=0", + + :specific_humidity => "https://www.dropbox.com/scl/fi/66z6ymfr4ghkynizydc29/" * + "RYF.huss.1990_1991.nc?rlkey=107yq04aew8lrmfyorj68v4td&dl=0", + + :sea_level_pressure => "https://www.dropbox.com/scl/fi/0fk332027oru1iiseykgp/" * + "RYF.psl.1990_1991.nc?rlkey=4xpr9uah741483aukok6d7ctt&dl=0", + + :relative_humidity => "https://www.dropbox.com/scl/fi/1agwsp0lzvntuyf8bm9la/" * + "RYF.rhuss.1990_1991.nc?rlkey=8cd0vs7iy1rw58b9pc9t68gtz&dl=0", + + :downwelling_longwave_radiation => "https://www.dropbox.com/scl/fi/y6r62szkirrivua5nqq61/" * + "RYF.rlds.1990_1991.nc?rlkey=wt9yq3cyrvs2rbowoirf4nkum&dl=0", + + :downwelling_shortwave_radiation => "https://www.dropbox.com/scl/fi/z6fkvmd9oe3ycmaxta131/" * + "RYF.rsds.1990_1991.nc?rlkey=r7q6zcbj6a4fxsq0f8th7c4tc&dl=0", + + :temperature => "https://www.dropbox.com/scl/fi/fpl0npwi476w635g6lke9/" * + "RYF.tas.1990_1991.nc?rlkey=0skb9pe6lgbfbiaoybe7m945s&dl=0", + + :eastward_velocity => "https://www.dropbox.com/scl/fi/86wetpqla2x97isp8092g/" * + "RYF.uas.1990_1991.nc?rlkey=rcaf18sh1yz0v9g4hjm1249j0&dl=0", + + :northward_velocity => "https://www.dropbox.com/scl/fi/d38sflo9ddljstd5jwgml/" * + "RYF.vas.1990_1991.nc?rlkey=f9y3e57kx8xrb40gbstarf0x6&dl=0", +) + +variable_is_three_dimensional(data::JRA55Metadata) = false + +urls(metadata::Metadata{<:Any, <:JRA55RepeatYear}) = JRA55_repeat_year_urls[metadata.name] +# TODO: +# urls(metadata::Metadata{<:Any, <:JRA55MultipleYears}) = ... + +metadata_url(prefix, m::Metadata{<:Any, <:JRA55RepeatYear}) = prefix # No specific name for this url + +# TODO: This will need to change when we add a method for JRA55MultipleYears +function download_dataset(metadata::JRA55Metadata; url = urls(metadata)) + + @root for metadatum in metadata + + fileurl = metadata_url(url, metadatum) + filepath = metadata_path(metadatum) + + if !isfile(filepath) + Downloads.download(fileurl, filepath; progress=download_progress) + end + end + + return nothing +end \ No newline at end of file diff --git a/src/DataWrangling/JRA55/JRA55_prescribed_atmosphere.jl b/src/DataWrangling/JRA55/JRA55_prescribed_atmosphere.jl new file mode 100644 index 00000000..33d0afe0 --- /dev/null +++ b/src/DataWrangling/JRA55/JRA55_prescribed_atmosphere.jl @@ -0,0 +1,80 @@ +const AA = Oceananigans.Architectures.AbstractArchitecture + +JRA55PrescribedAtmosphere(arch::Distributed; kw...) = + JRA55PrescribedAtmosphere(child_architecture(arch); kw...) + +""" + JRA55PrescribedAtmosphere([architecture = CPU()]; + version = JRA55RepeatYear(), + dates = all_dates(version), + backend = JRA55NetCDFBackend(10), + time_indexing = Cyclical(), + reference_height = 10, # meters + include_rivers_and_icebergs = false, + other_kw...) + +Return a `PrescribedAtmosphere` representing JRA55 reanalysis data. +""" +function JRA55PrescribedAtmosphere(architecture = CPU(), FT = Float32; + version = JRA55RepeatYear(), + dates = all_dates(version), + backend = JRA55NetCDFBackend(10), + time_indexing = Cyclical(), + reference_height = 10, # meters + include_rivers_and_icebergs = false, + other_kw...) + + kw = (; time_indexing, backend, dates, version) + kw = merge(kw, other_kw) + + ua = JRA55FieldTimeSeries(:eastward_velocity, architecture, FT; kw...) + va = JRA55FieldTimeSeries(:northward_velocity, architecture, FT; kw...) + Ta = JRA55FieldTimeSeries(:temperature, architecture, FT; kw...) + qa = JRA55FieldTimeSeries(:specific_humidity, architecture, FT; kw...) + pa = JRA55FieldTimeSeries(:sea_level_pressure, architecture, FT; kw...) + Fra = JRA55FieldTimeSeries(:rain_freshwater_flux, architecture, FT; kw...) + Fsn = JRA55FieldTimeSeries(:snow_freshwater_flux, architecture, FT; kw...) + Ql = JRA55FieldTimeSeries(:downwelling_longwave_radiation, architecture, FT; kw...) + Qs = JRA55FieldTimeSeries(:downwelling_shortwave_radiation, architecture, FT; kw...) + + freshwater_flux = (rain = Fra, + snow = Fsn) + + # Remember that rivers and icebergs are on a different grid and have + # a different frequency than the rest of the JRA55 data. We use `PrescribedAtmospheres` + # "auxiliary_freshwater_flux" feature to represent them. + if include_rivers_and_icebergs + Fri = JRA55FieldTimeSeries(:river_freshwater_flux, architecture; kw...) + Fic = JRA55FieldTimeSeries(:iceberg_freshwater_flux, architecture; kw...) + auxiliary_freshwater_flux = (rivers = Fri, icebergs = Fic) + else + auxiliary_freshwater_flux = nothing + end + + times = ua.times + grid = ua.grid + + velocities = (u = ua, + v = va) + + tracers = (T = Ta, + q = qa) + + pressure = pa + + downwelling_radiation = TwoBandDownwellingRadiation(shortwave=Qs, longwave=Ql) + + FT = eltype(ua) + reference_height = convert(FT, reference_height) + + atmosphere = PrescribedAtmosphere(grid, times; + velocities, + freshwater_flux, + auxiliary_freshwater_flux, + tracers, + downwelling_radiation, + reference_height, + pressure) + + return atmosphere +end diff --git a/src/DataWrangling/metadata.jl b/src/DataWrangling/metadata.jl new file mode 100644 index 00000000..0b011ebf --- /dev/null +++ b/src/DataWrangling/metadata.jl @@ -0,0 +1,113 @@ +using CFTime +using Dates +using Base: @propagate_inbounds + +struct Metadata{D, V} + name :: Symbol + dates :: D + version :: V + dir :: String +end + +""" + Metadata(variable_name; + version = ECCO4Monthly(), + dates = all_dates(name), + dir = default_download_folder(version)) + +Metadata holding a specific dataset information. + +Arguments +========= +- `variable_name`: a symbol representing the name of the variable (for example :temperature, :salinity, :u_velocity, etc...) + +Keyword Arguments +================= +- `dates`: The dates of the dataset, in a `AbstractCFDateTime` format.. Note this can either be a single date, + representing a snapshot, or a range of dates, representing a time-series. +- `version`: The version of the dataset. Supported versions are `ECCO2Monthly()`, `ECCO2Daily()`, `ECCO4Monthly()`, + `JRA55RepeatYear()`, or `JRA55MultipleYears()`. +- `dir`: The directory where the dataset is stored. +""" +function Metadata(variable_name; + version, + dates = all_dates(version), + dir = default_download_folder(version)) + + return Metadata(variable_name, dates, version, dir) +end + +default_download_folder(version) = "./" + +Base.show(io::IO, metadata::Metadata) = + print(io, "ECCOMetadata:", '\n', + "├── name: $(metadata.name)", '\n', + "├── dates: $(metadata.dates)", '\n', + "├── version: $(metadata.version)", '\n', + "└── data directory: $(metadata.dir)") + +# Treat Metadata as an array to allow iteration over the dates. +Base.length(metadata::Metadata) = length(metadata.dates) +Base.eltype(metadata::Metadata) = Base.eltype(metadata.dates) + +# If only one date, it's a single element array +Base.length(metadata::Metadata{<:AbstractCFDateTime}) = 1 + +@propagate_inbounds Base.getindex(m::Metadata, i::Int) = Metadata(m.name, m.dates[i], m.version, m.dir) +@propagate_inbounds Base.first(m::Metadata) = Metadata(m.name, m.dates[1], m.version, m.dir) +@propagate_inbounds Base.last(m::Metadata) = Metadata(m.name, m.dates[end], m.version, m.dir) + +@inline function Base.iterate(m::Metadata, i=1) + if (i % UInt) - 1 < length(m) + return Metadata(m.name, m.dates[i], m.version, m.dir), i + 1 + else + return nothing + end +end + +Base.axes(metadata::Metadata{<:AbstractCFDateTime}) = 1 +Base.first(metadata::Metadata{<:AbstractCFDateTime}) = metadata +Base.last(metadata::Metadata{<:AbstractCFDateTime}) = metadata +Base.iterate(metadata::Metadata{<:AbstractCFDateTime}) = (metadata, nothing) +Base.iterate(::Metadata{<:AbstractCFDateTime}, ::Any) = nothing + +metadata_path(metadata) = joinpath(metadata.dir, metadata_filename(metadata)) + + +""" + native_times(metadata; start_time = first(metadata).dates) + +Extract the time values from the given metadata and calculates the time difference +from the start time. + +Arguments +========= +- `metadata`: The metadata containing the date information. +- `start_time`: The start time for calculating the time difference. Defaults to the first date in the metadata. + +Returns +======= +An array of time differences in seconds. +""" +function native_times(metadata; start_time=first(metadata).dates) + times = zeros(length(metadata)) + for (t, data) in enumerate(metadata) + date = data.dates + time = date - start_time + time = Second(time).value + times[t] = time + end + + return times +end + +""" + all_dates(metadata) + +Extracts all the dates of the given metadata formatted using the `DateTimeProlepticGregorian` type. +Needs to be extended by any new dataset version. +""" +all_dates(metadata) = all_dates(metadata.version) + +# File names of metadata containing multiple dates +metadata_filename(metadata) = [metadata_filename(metadatum) for metadatum in metadata] diff --git a/src/OceanSeaIceModels/PrescribedAtmospheres.jl b/src/OceanSeaIceModels/PrescribedAtmospheres.jl index dc6ba471..1a754ab6 100644 --- a/src/OceanSeaIceModels/PrescribedAtmospheres.jl +++ b/src/OceanSeaIceModels/PrescribedAtmospheres.jl @@ -288,10 +288,9 @@ const PATP = PrescribedAtmosphereThermodynamicsParameters ##### Prescribed atmosphere (as opposed to dynamically evolving / prognostic) ##### -struct PrescribedAtmosphere{FT, M, G, T, U, P, C, F, I, R, TP, TI} +struct PrescribedAtmosphere{FT, G, T, U, P, C, F, I, R, TP, TI} grid :: G clock :: Clock{T} - metadata :: M velocities :: U pressure :: P tracers :: C @@ -369,7 +368,6 @@ end """ PrescribedAtmosphere(grid, times; clock = Clock{Float64}(time = 0), - metadata = nothing, reference_height = 10, # meters boundary_layer_height = 600 # meters, thermodynamics_parameters = PrescribedAtmosphereThermodynamicsParameters(FT), @@ -385,7 +383,6 @@ state with data given at `times`. """ function PrescribedAtmosphere(grid, times; clock = Clock{Float64}(time = 0), - metadata = nothing, reference_height = convert(eltype(grid), 10), boundary_layer_height = convert(eltype(grid), 600), thermodynamics_parameters = nothing, @@ -403,7 +400,6 @@ function PrescribedAtmosphere(grid, times; return PrescribedAtmosphere(grid, clock, - metadata, velocities, pressure, tracers, diff --git a/test/runtests.jl b/test/runtests.jl index aca50ba5..81dcb0c8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -34,7 +34,7 @@ if test_group == :init || test_group == :all end # Tests JRA55 utilities, plus some DataWrangling utilities -if test_group == :jra55 || test_group == :all +if test_group == :JRA55 || test_group == :all include("test_jra55.jl") end diff --git a/test/runtests_setup.jl b/test/runtests_setup.jl index ecd8c91b..5e6f3ca5 100644 --- a/test/runtests_setup.jl +++ b/test/runtests_setup.jl @@ -6,7 +6,6 @@ using Test using ClimaOcean.DataWrangling using ClimaOcean.ECCO using ClimaOcean.JRA55 -using ClimaOcean.JRA55: JRA55_field_time_series using Oceananigans.Architectures: architecture, on_architecture using Oceananigans.OutputReaders: interpolate! @@ -27,8 +26,8 @@ start_date = DateTimeProlepticGregorian(1993, 1, 1) end_date = DateTimeProlepticGregorian(1993, 4, 1) dates = start_date : Month(1) : end_date -temperature_metadata = ECCOMetadata(:temperature, dates) -salinity_metadata = ECCOMetadata(:salinity, dates) +temperature_metadata = Metadata(:temperature; dates, version=ECCO4Monthly()) +salinity_metadata = Metadata(:salinity; dates, version=ECCO4Monthly()) # Fictitious grid that triggers bathymetry download function download_bathymetry(; dir = download_bathymetry_cache, diff --git a/test/test_diagnostics.jl b/test/test_diagnostics.jl index fc64742d..f61d0e87 100644 --- a/test/test_diagnostics.jl +++ b/test/test_diagnostics.jl @@ -25,8 +25,8 @@ using ClimaOcean.Diagnostics: MixedLayerDepthField, MixedLayerDepthOperand stop = DateTimeProlepticGregorian(1993, 2, 1) dates = range(start; stop, step=Month(1)) - Tmeta = ECCOMetadata(:temperature; dates) - Smeta = ECCOMetadata(:salinity; dates) + Tmeta = Metadata(:temperature; version=ECCO4Monthly(), dates) + Smeta = Metadata(:salinity; version=ECCO4Monthly(), dates) Tt = ECCOFieldTimeSeries(Tmeta, grid; time_indices_in_memory=2) St = ECCOFieldTimeSeries(Smeta, grid; time_indices_in_memory=2) diff --git a/test/test_distributed_utils.jl b/test/test_distributed_utils.jl index 8c790690..cba4ac3f 100644 --- a/test/test_distributed_utils.jl +++ b/test/test_distributed_utils.jl @@ -69,7 +69,7 @@ end @testset "Distributed ECCO download" begin dates = DateTimeProlepticGregorian(1992, 1, 1) : Month(1) : DateTimeProlepticGregorian(1994, 4, 1) - metadata = ECCOMetadata(:u_velocity; dates) + metadata = Metadata(:u_velocity; version=ECCO4Monthly(), dates) download_dataset(metadata) @root for metadatum in metadata diff --git a/test/test_downloading.jl b/test/test_downloading.jl index e399b20e..b4721477 100644 --- a/test/test_downloading.jl +++ b/test/test_downloading.jl @@ -1,18 +1,19 @@ include("runtests_setup.jl") -using ClimaOcean.ECCO: metadata_path +using ClimaOcean.ECCO: metadata_path, ECCO4Monthly +using ClimaOcean.JRA55: JRA55NetCDFBackend @testset "Availability of JRA55 data" begin @info "Testing that we can download all the JRA55 data..." for name in ClimaOcean.DataWrangling.JRA55.JRA55_variable_names - fts = ClimaOcean.JRA55.JRA55_field_time_series(name; time_indices=2:3) + fts = ClimaOcean.JRA55.JRA55FieldTimeSeries(name; backend=JRA55NetCDFBackend(2)) end end @testset "Availability of ECCO data" begin @info "Testing that we can download ECCO data..." for variable in keys(ClimaOcean.ECCO.ECCO4_short_names) - metadata = ECCOMetadata(variable) + metadata = Metadata(variable, dates=DateTimeProlepticGregorian(1993, 1, 1), version=ECCO4Monthly()) filepath = metadata_path(metadata) isfile(filepath) && rm(filepath; force=true) ClimaOcean.ECCO.download_dataset(metadata) diff --git a/test/test_ecco.jl b/test/test_ecco.jl index 04a125d3..090bf60b 100644 --- a/test/test_ecco.jl +++ b/test/test_ecco.jl @@ -5,7 +5,7 @@ using Dates using ClimaOcean using ClimaOcean.ECCO -using ClimaOcean.ECCO: ECCO_field, metadata_path, ECCO_times +using ClimaOcean.ECCO: ECCO_field, metadata_path, native_times using ClimaOcean.DataWrangling: NearestNeighborInpainting using Oceananigans.Grids: topology @@ -27,7 +27,7 @@ inpainting = NearestNeighborInpainting(2) A = typeof(arch) for name in (:temperature, :salinity) @info "Testing ECCO_field on $A..." - metadata = ECCOMetadata(name, dates, ECCO4Monthly()) + metadata = Metadata(name; dates, version=ECCO4Monthly()) restoring = ECCORestoring(metadata; rate=1/1000, inpainting) for datum in metadata @@ -47,8 +47,8 @@ inpainting = NearestNeighborInpainting(2) @test Nz == size(metadata)[3] @test Nt == size(metadata)[4] - @test fts.times[1] == ECCO_times(metadata)[1] - @test fts.times[end] == ECCO_times(metadata)[end] + @test fts.times[1] == native_times(metadata)[1] + @test fts.times[end] == native_times(metadata)[end] datum = first(metadata) ψ = ECCO_field(datum, architecture=arch, inpainting=NearestNeighborInpainting(2)) @@ -60,7 +60,7 @@ end @testset "Inpainting algorithm" begin for arch in test_architectures - T_metadata = ECCOMetadata(:temperature, dates[1], ECCO4Monthly()) + T_metadata = Metadata(:temperature; dates=dates[1], version=ECCO4Monthly()) grid = LatitudeLongitudeGrid(arch, size = (100, 100, 10), @@ -134,8 +134,8 @@ end field = CenterField(grid) @test begin - set!(field, ECCOMetadata(:temperature)) - set!(field, ECCOMetadata(:salinity)) + set!(field, Metadata(:temperature, dates=start_date, version=ECCO4Monthly())) + set!(field, Metadata(:salinity, dates=start_date, version=ECCO4Monthly())) true end end @@ -154,8 +154,8 @@ end field = CenterField(grid) @test begin - set!(field, ECCOMetadata(:temperature)) - set!(field, ECCOMetadata(:salinity)) + set!(field, Metadata(:temperature, dates=start_date, version=ECCO4Monthly())) + set!(field, Metadata(:salinity, dates=start_date, version=ECCO4Monthly())) true end @@ -185,7 +185,8 @@ end ocean = ocean_simulation(grid) date = DateTimeProlepticGregorian(1993, 1, 1) - set!(ocean.model, T=ECCOMetadata(:temperature, date), S=ECCOMetadata(:salinity, date)) + set!(ocean.model, T=Metadata(:temperature; dates=start_date, version=ECCO4Monthly()), + S=Metadata(:salinity; dates=start_date, version=ECCO4Monthly())) end end @@ -207,7 +208,7 @@ end rate = 1 / 1000.0, inpainting) - times = ECCO_times(t_restoring.field_time_series.backend.metadata) + times = native_times(t_restoring.field_time_series.backend.metadata) ocean = ocean_simulation(grid, forcing = (; T = t_restoring)) ocean.model.clock.time = times[3] + 2 * Units.days diff --git a/test/test_jra55.jl b/test/test_jra55.jl index fd938839..50e2718c 100644 --- a/test/test_jra55.jl +++ b/test/test_jra55.jl @@ -1,6 +1,6 @@ include("runtests_setup.jl") -using ClimaOcean.JRA55: download_jra55_cache +using ClimaOcean.JRA55: download_JRA55_cache using ClimaOcean.OceanSeaIceModels: PrescribedAtmosphere @testset "JRA55 and data wrangling utilities" begin @@ -10,67 +10,57 @@ using ClimaOcean.OceanSeaIceModels: PrescribedAtmosphere # This should download files called "RYF.rsds.1990_1991.nc" and "RYF.tas.1990_1991.nc" for test_name in (:downwelling_shortwave_radiation, :temperature) - time_indices = 1:3 - jra55_fts = JRA55_field_time_series(test_name; architecture=arch, time_indices) - test_filename = joinpath(download_jra55_cache, "RYF.rsds.1990_1991.nc") + dates = ClimaOcean.DataWrangling.all_dates(JRA55.JRA55RepeatYear()) + dates = dates[1:3] - @test jra55_fts isa FieldTimeSeries - @test jra55_fts.grid isa LatitudeLongitudeGrid + JRA55_fts = JRA55FieldTimeSeries(test_name, arch; dates) + test_filename = joinpath(download_JRA55_cache, "RYF.rsds.1990_1991.nc") + + @test JRA55_fts isa FieldTimeSeries + @test JRA55_fts.grid isa LatitudeLongitudeGrid - Nx, Ny, Nz, Nt = size(jra55_fts) + Nx, Ny, Nz, Nt = size(JRA55_fts) @test Nx == 640 @test Ny == 320 @test Nz == 1 - @test Nt == length(time_indices) + @test Nt == 3 if test_name == :downwelling_shortwave_radiation CUDA.@allowscalar begin - @test jra55_fts[1, 1, 1, 1] == 430.98105f0 - @test jra55_fts[641, 1, 1, 1] == 430.98105f0 + @test JRA55_fts[1, 1, 1, 1] == 430.98105f0 + @test JRA55_fts[641, 1, 1, 1] == 430.98105f0 end end # Test that halo regions were filled to respect boundary conditions CUDA.@allowscalar begin - @test view(jra55_fts.data, 1, :, 1, :) == view(jra55_fts.data, Nx+1, :, 1, :) + @test view(JRA55_fts.data, 1, :, 1, :) == view(JRA55_fts.data, Nx+1, :, 1, :) end - @info "Testing loading preprocessed JRA55 data on $A..." - in_memory_jra55_fts = JRA55_field_time_series(test_name; - time_indices = 1:4, - architecture = arch, - backend = InMemory(2)) - - @test in_memory_jra55_fts isa FieldTimeSeries - @test interior(in_memory_jra55_fts[1]) == interior(jra55_fts[1]) - - # Clean up - isfile(in_memory_jra55_fts.path) && rm(in_memory_jra55_fts.path, force=true) - @info "Testing Cyclical time_indices for JRA55 data on $A..." Nb = 4 backend = JRA55NetCDFBackend(Nb) - netcdf_jra55_fts = JRA55_field_time_series(test_name; backend, - time_indices = Colon(), - architecture = arch) + netcdf_JRA55_fts = JRA55FieldTimeSeries(test_name, arch; backend) - Nt = length(netcdf_jra55_fts.times) - @test Oceananigans.OutputReaders.time_indices(netcdf_jra55_fts) == (1, 2, 3, 4) - f₁ = view(parent(netcdf_jra55_fts), :, :, 1, 1) + Nt = length(netcdf_JRA55_fts.times) + @test Oceananigans.OutputReaders.time_indices(netcdf_JRA55_fts) == (1, 2, 3, 4) + f₁ = view(parent(netcdf_JRA55_fts), :, :, 1, 1) f₁ = Array(f₁) - netcdf_jra55_fts.backend = JRA55NetCDFBackend(Nt-2, Nb) - @test Oceananigans.OutputReaders.time_indices(netcdf_jra55_fts) == (Nt-2, Nt-1, Nt, 1) - set!(netcdf_jra55_fts) + netcdf_JRA55_fts.backend = JRA55NetCDFBackend(Nt-2, Nb) + @test Oceananigans.OutputReaders.time_indices(netcdf_JRA55_fts) == (Nt-2, Nt-1, Nt, 1) + set!(netcdf_JRA55_fts) - f₁′ = view(parent(netcdf_jra55_fts), :, :, 1, 4) + f₁′ = view(parent(netcdf_JRA55_fts), :, :, 1, 4) f₁′ = Array(f₁′) @test f₁ == f₁′ end @info "Testing interpolate_field_time_series! on $A..." - jra55_fts = JRA55_field_time_series(:downwelling_shortwave_radiation; architecture=arch, time_indices=1:3) + dates = ClimaOcean.DataWrangling.all_dates(JRA55.JRA55RepeatYear()) + dates = dates[1:3] + JRA55_fts = JRA55FieldTimeSeries(:downwelling_shortwave_radiation, arch; dates) # Make target grid and field resolution = 1 # degree, eg 1/4 @@ -89,10 +79,10 @@ using ClimaOcean.OceanSeaIceModels: PrescribedAtmosphere z = (0, 1), topology = (Periodic, Bounded, Bounded)) - times = jra55_fts.times - boundary_conditions = jra55_fts.boundary_conditions + times = JRA55_fts.times + boundary_conditions = JRA55_fts.boundary_conditions target_fts = FieldTimeSeries{Center, Center, Nothing}(target_grid, times; boundary_conditions) - interpolate!(target_fts, jra55_fts) + interpolate!(target_fts, JRA55_fts) # Random regression test CUDA.@allowscalar begin @@ -103,7 +93,7 @@ using ClimaOcean.OceanSeaIceModels: PrescribedAtmosphere @test target_fts[Nx + 1, 1, 1, 1] == 222.243136478611 end - @test target_fts.times == jra55_fts.times + @test target_fts.times == JRA55_fts.times # What else might we test? diff --git a/test/test_surface_fluxes.jl b/test/test_surface_fluxes.jl index 01ee51c5..fec096f8 100644 --- a/test/test_surface_fluxes.jl +++ b/test/test_surface_fluxes.jl @@ -15,6 +15,7 @@ using CUDA using KernelAbstractions: @kernel, @index using Oceananigans.TimeSteppers: update_state! using Oceananigans.Units: hours, days +using ClimaOcean.DataWrangling: all_dates import ClimaOcean.OceanSeaIceModels.InterfaceComputations: saturation_specific_humidity @@ -41,20 +42,27 @@ end closure = nothing, bottom_drag_coefficient = 0.0) - atmosphere = JRA55PrescribedAtmosphere(1:2; grid, architecture=arch, backend=InMemory()) + dates = all_dates(JRA55RepeatYear())[1:2] + atmosphere = JRA55PrescribedAtmosphere(arch, Float64; dates, backend = InMemory()) CUDA.@allowscalar begin h = atmosphere.reference_height pₐ = atmosphere.pressure[1][1, 1, 1] - Tₐ = atmosphere.tracers.T[1][1, 1, 1] - qₐ = atmosphere.tracers.q[1][1, 1, 1] + Tₐ = 15 + celsius_to_kelvin + qₐ = 0.003 uₐ = atmosphere.velocities.u[1][1, 1, 1] vₐ = atmosphere.velocities.v[1][1, 1, 1] ℂₐ = atmosphere.thermodynamics_parameters + fill!(parent(atmosphere.tracers.T), Tₐ) + fill!(parent(atmosphere.tracers.q), qₐ) + fill!(parent(atmosphere.velocities.u), uₐ) + fill!(parent(atmosphere.velocities.v), vₐ) + fill!(parent(atmosphere.pressure), pₐ) + # Force the saturation humidity of the ocean to be # equal to the atmospheric saturation humidity atmosphere_ocean_interface_specific_humidity = FixedSpecificHumidity(qₐ) @@ -83,7 +91,9 @@ end # Note that the Δθ accounts for the "lapse rate" at height h Tₒ = Tₐ - celsius_to_kelvin + h / cp * g - set!(ocean.model, u=uₐ, v=vₐ, T=Tₒ) + fill!(parent(ocean.model.velocities.u), uₐ) + fill!(parent(ocean.model.velocities.v), vₐ) + fill!(parent(ocean.model.tracers.T), Tₒ) # Compute the turbulent fluxes (neglecting radiation) coupled_model = OceanSeaIceModel(ocean; atmosphere, interfaces) @@ -176,7 +186,8 @@ end closure = nothing, bottom_drag_coefficient = 0.0) - atmosphere = JRA55PrescribedAtmosphere(1:2; architecture = arch, backend = InMemory()) + dates = all_dates(JRA55RepeatYear())[1:2] + atmosphere = JRA55PrescribedAtmosphere(arch; dates, backend = InMemory()) fill!(ocean.model.tracers.T, -2.0) @@ -226,12 +237,13 @@ end ocean = ocean_simulation(grid; momentum_advection, tracer_advection, closure, tracers, coriolis) - T_metadata = ECCOMetadata(:temperature) - S_metadata = ECCOMetadata(:salinity) + T_metadata = Metadata(:temperature, dates=DateTimeProlepticGregorian(1993, 1, 1), version=ECCO4Monthly()) + S_metadata = Metadata(:salinity, dates=DateTimeProlepticGregorian(1993, 1, 1), version=ECCO4Monthly()) set!(ocean.model; T=T_metadata, S=S_metadata) - atmosphere = JRA55PrescribedAtmosphere(1:10; grid, architecture = arch, backend = InMemory()) + dates = all_dates(JRA55RepeatYear())[1:10] + atmosphere = JRA55PrescribedAtmosphere(arch; dates, backend = InMemory()) radiation = Radiation(ocean_albedo=0.1, ocean_emissivity=1.0) sea_ice = nothing