ESMValGroup · morobking · Nov 3, 2025 · Nov 3, 2025 · Nov 4, 2025 · Nov 4, 2025
@@ -4,24 +4,27 @@ attributes:
   dataset_id: CDS-SATELLITE-LAI-FAPAR
   project_id: OBS
   tier: 3
-  version: 'V1'  # Version as listed on source
+  version: 'V3'  # Version as listed on source
   modeling_realm: sat
   source: 'https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar'
   reference: 'cds-satellite-lai-fapar'
   comment: |
     'Leaf area index and fraction absorbed of photosynthetically active radiation 10-daily gridded data from 1998 to present'
 
+  start_year: 2000
+  end_year: 2001
+
 # Variables to CMORize
 variables:
   lai:
-    mip: Lmon
+    mip: Eday #Lmon
     raw: LAI
-    file: 'c3s_LAI_*_GLOBE_VGT_V1.0.1.nc'
-  fapar:
-    mip: Lmon
-    raw: fAPAR
-    file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc'
+    file: 'c3s_LAI_*_GLOBE_VGT_V3.0.1.nc'
+#  fapar:
+#    mip: Lmon
+#    raw: fAPAR
+#    file: 'c3s_FAPAR_*_GLOBE_VGT_V1.0.1.nc'
 
-# Parameters
-custom:
-  regrid_resolution: '0.25x0.25'
+Parameters:
+  custom:
+    regrid_resolution: 'None'
@@ -5,8 +5,9 @@
 Source
    https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar?tab=form
 Last access
-   20190703
+   20260120
 
+NEEDED TO UPDATE THIS for V3!!!
 Download and processing instructions
    - Open in a browser the data source as specified above
    - Put the right ticks:
@@ -32,14 +33,17 @@
 Modification history
    20200512-crezee_bas: adapted to reflect changes in download form by CDS.
    20190703-crezee_bas: written.
+
+   20260120: updates to support all three days of data per month
 """
 
 import glob
 import logging
 import os
-from copy import deepcopy
 from datetime import datetime
-from warnings import catch_warnings, filterwarnings
+import calendar
+import numpy as np
+import dask.array as da
 
 import cf_units
 import iris
@@ -50,137 +54,45 @@
 
 logger = logging.getLogger(__name__)
 
+def load_callback(cube, field, filename):
+    """
+    Callback fucntion for iris.load to remove all attributes from cube
+    so they will concatenate into a single cube
+    """
+    cube.attributes = None
 
-def _attrs_are_the_same(cubelist):
-    # assume they are the same
-    attrs_the_same = True
-    allattrs = cubelist[0].attributes
-    for key in allattrs:
-        try:
-            unique_attr_vals = {cube.attributes[key] for cube in cubelist}
-        # This exception is needed for valid_range, which is an
-        # array and therefore not hashable
-        except TypeError:
-            unique_attr_vals = {
-                tuple(cube.attributes[key]) for cube in cubelist
-            }
-        if len(unique_attr_vals) > 1:
-            attrs_the_same = False
-            print(
-                f"Different values found for {key}-attribute: "
-                f"{unique_attr_vals}"
+def load_dataset(in_dir, var, cfg, year, month):
+    """
+    Load the files from an individual month
+    """
+    filelist = glob.glob(os.path.join(in_dir, var["file"]))
+    this_month_year_files = []
+    for file in filelist:
+       if f"{year}{month:02d}" in file:
+           this_month_year_files.append(file)
+
+    lai_cube = iris.load(this_month_year_files,
+                        NameConstraint(var_name=var["raw"]),
+                        callback=load_callback,
             )
-    return attrs_the_same
+
+    return lai_cube.concatenate_cube()
 
-
-def _cmorize_dataset(in_file, var, cfg, out_dir):
-    logger.info(
-        "CMORizing variable '%s' from input file '%s'",
-        var["short_name"],
-        in_file,
-    )
-    attributes = deepcopy(cfg["attributes"])
-    attributes["mip"] = var["mip"]
+def _cmorize_dataset(cube, var, cfg):
 
     cmor_table = cfg["cmor_table"]
     definition = cmor_table.get_variable(var["mip"], var["short_name"])
-
-    cube = iris.load_cube(
-        str(in_file), constraint=NameConstraint(var_name=var["raw"])
-    )
-
-    # Set correct names
+
     cube.var_name = definition.short_name
     if definition.standard_name:
         cube.standard_name = definition.standard_name
 
     cube.long_name = definition.long_name
 
-    # Convert units if required
+    # units
     cube.convert_units(definition.units)
 
-    # Set global attributes
-    utils.set_global_atts(cube, attributes)
-
-    logger.info("Saving CMORized cube for variable %s", cube.var_name)
-    utils.save_variable(cube, cube.var_name, out_dir, attributes)
-
-    return in_file
-
-
-def _regrid_dataset(in_dir, var, cfg):
-    """Regridding of original files.
-
-    This function regrids each file and write to disk appending 'regrid'
-    in front of filename.
-    """
-    filelist = glob.glob(os.path.join(in_dir, var["file"]))
-    for infile in filelist:
-        _, infile_tail = os.path.split(infile)
-        outfile_tail = infile_tail.replace("c3s", "c3s_regridded")
-        outfile = os.path.join(cfg["work_dir"], outfile_tail)
-        with catch_warnings():
-            filterwarnings(
-                action="ignore",
-                # Full message:
-                # UserWarning: Skipping global attribute 'long_name':
-                #              'long_name' is not a permitted attribute
-                message="Skipping global attribute 'long_name'",
-                category=UserWarning,
-                module="iris",
-            )
-            lai_cube = iris.load_cube(
-                infile, constraint=NameConstraint(var_name=var["raw"])
-            )
-        lai_cube = regrid(
-            lai_cube, cfg["custom"]["regrid_resolution"], "nearest"
-        )
-        logger.info("Saving: %s", outfile)
-
-        iris.save(lai_cube, outfile)
-
-
-def _set_time_bnds(in_dir, var):
-    """Set time_bnds by using attribute and returns a cubelist."""
-    # This is a complicated expression, but necessary to keep local
-    # variables below the limit, otherwise prospector complains.
-    cubelist = iris.load(
-        glob.glob(
-            os.path.join(in_dir, var["file"].replace("c3s", "c3s_regridded"))
-        )
-    )
-
-    # The purpose of the following loop is to remove any attributes
-    # that differ between cubes (otherwise concatenation over time fails).
-    # In addition, care is taken of the time coordinate, by adding the
-    # time_coverage attributes as time_bnds to the time coordinate.
-    for n_cube, _ in enumerate(cubelist):
-        time_coverage_start = cubelist[n_cube].attributes.pop(
-            "time_coverage_start"
-        )
-        time_coverage_end = cubelist[n_cube].attributes.pop(
-            "time_coverage_end"
-        )
-
-        # Now put time_coverage_start/end as time_bnds
-        # Convert time_coverage_xxxx to datetime
-        bnd_a = datetime.strptime(time_coverage_start, "%Y-%m-%dT%H:%M:%SZ")
-        bnd_b = datetime.strptime(time_coverage_end, "%Y-%m-%dT%H:%M:%SZ")
-
-        # Put in shape for time_bnds
-        time_bnds_datetime = [bnd_a, bnd_b]
-
-        # Read dataset time unit and calendar from file
-        dataset_time_unit = str(cubelist[n_cube].coord("time").units)
-        dataset_time_calender = cubelist[n_cube].coord("time").units.calendar
-        # Convert datetime
-        time_bnds = cf_units.date2num(
-            time_bnds_datetime, dataset_time_unit, dataset_time_calender
-        )
-        # Put them on the file
-        cubelist[n_cube].coord("time").bounds = time_bnds
-
-    return cubelist
+    return cube
 
 
 def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
@@ -194,47 +106,97 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
             "Creating working directory for regridding: %s", cfg["work_dir"]
         )
         os.mkdir(cfg["work_dir"])
-
+   
     for short_name, var in cfg["variables"].items():
         var["short_name"] = short_name
         logger.info("Processing var %s", short_name)
 
-        # Regridding
-        logger.info(
-            "Start regridding to: %s", cfg["custom"]["regrid_resolution"]
-        )
-        _regrid_dataset(in_dir, var, cfg)
-        logger.info("Finished regridding")
-
-        # File concatenation
-        logger.info("Start setting time_bnds")
-        cubelist = _set_time_bnds(cfg["work_dir"], var)
-
-        # Loop over two different platform names
-        for platformname in ["SPOT-4", "SPOT-5"]:
-            # Now split the cubelist on the different platform
-            logger.info("Start processing part of dataset: %s", platformname)
-            cubelist_platform = cubelist.extract(
-                iris.AttributeConstraint(platform=platformname)
-            )
-            for n_cube, _ in enumerate(cubelist_platform):
-                cubelist_platform[n_cube].attributes.pop("identifier")
-            if cubelist_platform:
-                assert _attrs_are_the_same(cubelist_platform)
-                cube = cubelist_platform.concatenate_cube()
-            else:
-                logger.warning(
-                    "No files found for platform %s \
-                               (check input data)",
-                    platformname,
-                )
-                continue
-            savename = os.path.join(
-                cfg["work_dir"], var["short_name"] + platformname + ".nc"
-            )
-            logger.info("Saving as: %s", savename)
-            iris.save(cube, savename)
-            logger.info("Finished file concatenation over time")
-            logger.info("Start CMORization of file %s", savename)
-            _cmorize_dataset(savename, var, cfg, out_dir)
-            logger.info("Finished regridding and CMORizing %s", savename)
+        for year in range(cfg["attributes"]["start_year"],
+                         cfg["attributes"]["end_year"]):
+
+
+            for month in range(1,13): 
+                output = iris.cube.CubeList([])
+                logger.info(f"Working with year {year}, month {month}")
+
+                # Load orginal data in an indendent function
+                lai_cube = load_dataset(in_dir, var, cfg, year, month)
+
+                # Regrdding
+                # uses nearest neighbour, skips if resolution = None
+                # This uses a huge amount of resource - be careful
+                resolution = cfg["Parameters"]["custom"]["regrid_resolution"]
+                if resolution == "None":
+                    logger.info("No regridding")
+                else:
+                    logger.info(f"Regridding {cfg["Parameters"]["custom"]["regrid_resolution"]}")
+                    lai_cube = regrid(
+                        lai_cube, cfg["Parameters"]["custom"]["regrid_resolution"], "nearest"
+                        )
+
+                # make a daily version with Nan cubes for missing days
+                # This will work with 10-day CDS data and 5-day CCI data in updates at a later date
+                days_in_month = calendar.monthrange(year, month)[1]
+                time_coord = lai_cube.coord('time')
+                time_values = time_coord.points
+                dts = time_coord.units.num2date(time_values)
+                days = [item.day for item in dts]
+
+                for day in range(1, days_in_month + 1):
+                    if day in days:
+                        logger.info(f"{day} is in CUBES")
+                        new_cube = iris.util.new_axis(lai_cube[days.index(day)], 'time')
+                        output.append(new_cube)
+                    else:
+                        logger.info(f"{day} NOT in CUBES")
+                        nan_cube = create_dask_cube(lai_cube[0], year, month, day)
+                        new_cube = iris.util.new_axis(nan_cube, 'time')
+                        output.append(new_cube)
+
+                output = output.concatenate_cube()
+
+                # time bounds
+                # This sets time bounds without needing extra loops and checks
+                output.coord('time').guess_bounds()
+
+                # cmorize
+                output = _cmorize_dataset(output, var, cfg)
+
+                # save cube
+                logger.info(f"Saving CMORized cube for variable {output.var_name}")
+                # these should all be the same
+                attributes = cfg["attributes"]
+                attributes["mip"] = var["mip"]
+                utils.save_variable(output, lai_cube.var_name, out_dir, attributes, zlib=True)
+
+
+def create_dask_cube(cube, year, month, day):
+    """Create a cube of NaNs for missing days.
+
+    Args:
+        cube (int): Cube with target shape and coordinates to copy
+        year (int): Year for time point
+        month (int): Month for time point
+        day (int): Day for time point
+
+    Returns:
+        cube: A 1xlatxlon cube of NaNs
+    """
+    nan_da = da.full(cube.shape, np.nan,
+                     chunks='auto', dtype=np.float32)
+
+    new_cube = cube.copy()
+    new_cube.data = nan_da
+
+    dataset_time_unit = str(new_cube.coord("time").units)
+    dataset_time_calender = new_cube.coord("time").units.calendar
+
+    # Convert datetime
+    newtime = datetime(year=year, month=month, day=day)
+    newtime = cf_units.date2num(
+        newtime, dataset_time_unit, dataset_time_calender
+    )
+
+    new_cube.coord("time").points = np.float64(newtime)
+
+    return new_cube
diff --git a/esmvaltool/recipes/examples/recipe_python.yml b/esmvaltool/recipes/examples/recipe_python.yml
@@ -41,7 +41,7 @@ preprocessors:
 
   annual_mean_amsterdam:
     extract_location:
-      location: Amsterdam
+      location: London
       scheme: linear
     annual_statistics:
       operator: mean