Skip to content
23 changes: 13 additions & 10 deletions esmvaltool/cmorizers/data/cmor_config/CDS-SATELLITE-LAI-FAPAR.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,27 @@ attributes:
dataset_id: CDS-SATELLITE-LAI-FAPAR
project_id: OBS
tier: 3
version: 'V1' # Version as listed on source
version: 'V3' # Version as listed on source
modeling_realm: sat
source: 'https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar'
reference: 'cds-satellite-lai-fapar'
comment: |
'Leaf area index and fraction absorbed of photosynthetically active radiation 10-daily gridded data from 1998 to present'

start_year: 2000
end_year: 2001

# Variables to CMORize
variables:
lai:
mip: Lmon
mip: Eday #Lmon
raw: LAI
file: 'c3s_LAI_*_GLOBE_VGT_V1.0.1.nc'
fapar:
mip: Lmon
raw: fAPAR
file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc'
file: 'c3s_LAI_*_GLOBE_VGT_V3.0.1.nc'
# fapar:
# mip: Lmon
# raw: fAPAR
# file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc'

# Parameters
custom:
regrid_resolution: '0.25x0.25'
Parameters:
custom:
regrid_resolution: 'None'
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
Source
https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar?tab=form
Last access
20190703
20260120

NEEDED TO UPDATE THIS for V3!!!
Download and processing instructions
- Open in a browser the data source as specified above
- Put the right ticks:
Expand All @@ -32,14 +33,17 @@
Modification history
20200512-crezee_bas: adapted to reflect changes in download form by CDS.
20190703-crezee_bas: written.

20260120: updates to support all three days of data per month
"""

import glob
import logging
import os
from copy import deepcopy
from datetime import datetime
from warnings import catch_warnings, filterwarnings
import calendar
import numpy as np
import dask.array as da

import cf_units
import iris
Expand All @@ -50,137 +54,45 @@

logger = logging.getLogger(__name__)

def load_callback(cube, field, filename):
"""
Callback fucntion for iris.load to remove all attributes from cube
so they will concatenate into a single cube
"""
cube.attributes = None

def _attrs_are_the_same(cubelist):
# assume they are the same
attrs_the_same = True
allattrs = cubelist[0].attributes
for key in allattrs:
try:
unique_attr_vals = {cube.attributes[key] for cube in cubelist}
# This exception is needed for valid_range, which is an
# array and therefore not hashable
except TypeError:
unique_attr_vals = {
tuple(cube.attributes[key]) for cube in cubelist
}
if len(unique_attr_vals) > 1:
attrs_the_same = False
print(
f"Different values found for {key}-attribute: "
f"{unique_attr_vals}"
def load_dataset(in_dir, var, cfg, year, month):
"""
Load the files from an individual month
"""
filelist = glob.glob(os.path.join(in_dir, var["file"]))
this_month_year_files = []
for file in filelist:
if f"{year}{month:02d}" in file:
this_month_year_files.append(file)

lai_cube = iris.load(this_month_year_files,
NameConstraint(var_name=var["raw"]),
callback=load_callback,
)
return attrs_the_same

return lai_cube.concatenate_cube()


def _cmorize_dataset(in_file, var, cfg, out_dir):
logger.info(
"CMORizing variable '%s' from input file '%s'",
var["short_name"],
in_file,
)
attributes = deepcopy(cfg["attributes"])
attributes["mip"] = var["mip"]
def _cmorize_dataset(cube, var, cfg):

cmor_table = cfg["cmor_table"]
definition = cmor_table.get_variable(var["mip"], var["short_name"])

cube = iris.load_cube(
str(in_file), constraint=NameConstraint(var_name=var["raw"])
)

# Set correct names

cube.var_name = definition.short_name
if definition.standard_name:
cube.standard_name = definition.standard_name

cube.long_name = definition.long_name

# Convert units if required
# units
cube.convert_units(definition.units)

# Set global attributes
utils.set_global_atts(cube, attributes)

logger.info("Saving CMORized cube for variable %s", cube.var_name)
utils.save_variable(cube, cube.var_name, out_dir, attributes)

return in_file


def _regrid_dataset(in_dir, var, cfg):
"""Regridding of original files.

This function regrids each file and write to disk appending 'regrid'
in front of filename.
"""
filelist = glob.glob(os.path.join(in_dir, var["file"]))
for infile in filelist:
_, infile_tail = os.path.split(infile)
outfile_tail = infile_tail.replace("c3s", "c3s_regridded")
outfile = os.path.join(cfg["work_dir"], outfile_tail)
with catch_warnings():
filterwarnings(
action="ignore",
# Full message:
# UserWarning: Skipping global attribute 'long_name':
# 'long_name' is not a permitted attribute
message="Skipping global attribute 'long_name'",
category=UserWarning,
module="iris",
)
lai_cube = iris.load_cube(
infile, constraint=NameConstraint(var_name=var["raw"])
)
lai_cube = regrid(
lai_cube, cfg["custom"]["regrid_resolution"], "nearest"
)
logger.info("Saving: %s", outfile)

iris.save(lai_cube, outfile)


def _set_time_bnds(in_dir, var):
"""Set time_bnds by using attribute and returns a cubelist."""
# This is a complicated expression, but necessary to keep local
# variables below the limit, otherwise prospector complains.
cubelist = iris.load(
glob.glob(
os.path.join(in_dir, var["file"].replace("c3s", "c3s_regridded"))
)
)

# The purpose of the following loop is to remove any attributes
# that differ between cubes (otherwise concatenation over time fails).
# In addition, care is taken of the time coordinate, by adding the
# time_coverage attributes as time_bnds to the time coordinate.
for n_cube, _ in enumerate(cubelist):
time_coverage_start = cubelist[n_cube].attributes.pop(
"time_coverage_start"
)
time_coverage_end = cubelist[n_cube].attributes.pop(
"time_coverage_end"
)

# Now put time_coverage_start/end as time_bnds
# Convert time_coverage_xxxx to datetime
bnd_a = datetime.strptime(time_coverage_start, "%Y-%m-%dT%H:%M:%SZ")
bnd_b = datetime.strptime(time_coverage_end, "%Y-%m-%dT%H:%M:%SZ")

# Put in shape for time_bnds
time_bnds_datetime = [bnd_a, bnd_b]

# Read dataset time unit and calendar from file
dataset_time_unit = str(cubelist[n_cube].coord("time").units)
dataset_time_calender = cubelist[n_cube].coord("time").units.calendar
# Convert datetime
time_bnds = cf_units.date2num(
time_bnds_datetime, dataset_time_unit, dataset_time_calender
)
# Put them on the file
cubelist[n_cube].coord("time").bounds = time_bnds

return cubelist
return cube


def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
Expand All @@ -194,47 +106,97 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"Creating working directory for regridding: %s", cfg["work_dir"]
)
os.mkdir(cfg["work_dir"])

for short_name, var in cfg["variables"].items():
var["short_name"] = short_name
logger.info("Processing var %s", short_name)

# Regridding
logger.info(
"Start regridding to: %s", cfg["custom"]["regrid_resolution"]
)
_regrid_dataset(in_dir, var, cfg)
logger.info("Finished regridding")

# File concatenation
logger.info("Start setting time_bnds")
cubelist = _set_time_bnds(cfg["work_dir"], var)

# Loop over two different platform names
for platformname in ["SPOT-4", "SPOT-5"]:
# Now split the cubelist on the different platform
logger.info("Start processing part of dataset: %s", platformname)
cubelist_platform = cubelist.extract(
iris.AttributeConstraint(platform=platformname)
)
for n_cube, _ in enumerate(cubelist_platform):
cubelist_platform[n_cube].attributes.pop("identifier")
if cubelist_platform:
assert _attrs_are_the_same(cubelist_platform)
cube = cubelist_platform.concatenate_cube()
else:
logger.warning(
"No files found for platform %s \
(check input data)",
platformname,
)
continue
savename = os.path.join(
cfg["work_dir"], var["short_name"] + platformname + ".nc"
)
logger.info("Saving as: %s", savename)
iris.save(cube, savename)
logger.info("Finished file concatenation over time")
logger.info("Start CMORization of file %s", savename)
_cmorize_dataset(savename, var, cfg, out_dir)
logger.info("Finished regridding and CMORizing %s", savename)
for year in range(cfg["attributes"]["start_year"],
cfg["attributes"]["end_year"]):


for month in range(1,13):
output = iris.cube.CubeList([])
logger.info(f"Working with year {year}, month {month}")

# Load orginal data in an indendent function
lai_cube = load_dataset(in_dir, var, cfg, year, month)

# Regrdding
# uses nearest neighbour, skips if resolution = None
# This uses a huge amount of resource - be careful
resolution = cfg["Parameters"]["custom"]["regrid_resolution"]
if resolution == "None":
logger.info("No regridding")
else:
logger.info(f"Regridding {cfg["Parameters"]["custom"]["regrid_resolution"]}")
lai_cube = regrid(
lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest"
)

# make a daily version with Nan cubes for missing days
# This will work with 10-day CDS data and 5-day CCI data in updates at a later date
days_in_month = calendar.monthrange(year, month)[1]
time_coord = lai_cube.coord('time')
time_values = time_coord.points
dts = time_coord.units.num2date(time_values)
days = [item.day for item in dts]

for day in range(1, days_in_month + 1):
if day in days:
logger.info(f"{day} is in CUBES")
new_cube = iris.util.new_axis(lai_cube[days.index(day)], 'time')
output.append(new_cube)
else:
logger.info(f"{day} NOT in CUBES")
nan_cube = create_dask_cube(lai_cube[0], year, month, day)
new_cube = iris.util.new_axis(nan_cube, 'time')
output.append(new_cube)

output = output.concatenate_cube()

# time bounds
# This sets time bounds without needing extra loops and checks
output.coord('time').guess_bounds()

# cmorize
output = _cmorize_dataset(output, var, cfg)

# save cube
logger.info(f"Saving CMORized cube for variable {output.var_name}")
# these should all be the same
attributes = cfg["attributes"]
attributes["mip"] = var["mip"]
utils.save_variable(output, lai_cube.var_name, out_dir, attributes, zlib=True)


def create_dask_cube(cube, year, month, day):
"""Create a cube of NaNs for missing days.

Args:
cube (int): Cube with target shape and coordinates to copy
year (int): Year for time point
month (int): Month for time point
day (int): Day for time point

Returns:
cube: A 1xlatxlon cube of NaNs
"""
nan_da = da.full(cube.shape, np.nan,
chunks='auto', dtype=np.float32)

new_cube = cube.copy()
new_cube.data = nan_da

dataset_time_unit = str(new_cube.coord("time").units)
dataset_time_calender = new_cube.coord("time").units.calendar

# Convert datetime
newtime = datetime(year=year, month=month, day=day)
newtime = cf_units.date2num(
newtime, dataset_time_unit, dataset_time_calender
)

new_cube.coord("time").points = np.float64(newtime)

return new_cube
2 changes: 1 addition & 1 deletion esmvaltool/recipes/examples/recipe_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ preprocessors:

annual_mean_amsterdam:
extract_location:
location: Amsterdam
location: London
scheme: linear
annual_statistics:
operator: mean
Expand Down