Source code for smif.metadata.spec

"""Data is typically multi-dimensional. :class:`~smif.metadata.spec.Spec` is used to describe
each dataset which is supplied to - or output from - each :class:`~smif.model.model.Model` in a
:class:`~smif.model.model.CompositeModel`
"""
from collections import defaultdict

from smif.metadata.coordinates import Coordinates


[docs]class Spec(object): """N-dimensional metadata. Spec labels each dimension with coordinates and enforces data type, units and absolute and expected ranges. The API here is modelled on :class:`xarray.DataArray`: dtype and shape describe a :class:`numpy.ndarray`; dims and coords follow the xarray conventions for labelled axes; and unit, abs_range and exp_range are introduced as supplementary metadata to help validate connections between models. Attributes ---------- name : str The name of the data that this spec describes description : str A human-friendly description dtype : str Data type for data values abs_range : tuple Absolute range of data values exp_range : tuple Expected range of data values shape : tuple[int] Tuple of dimension sizes ndim : int Number of dimensions dims : list[str] Dimension names coords : list[Coordinates] Dimension coordinate labels unit : str Units of data values Parameters ---------- name : str, optional Name to identifiy the variable described (typically an input, output or parameter) description : str, optional Short description dims : list[str], optional List of dimension names, must be provided if coords is a dict coords : list[Coordinates] or dict[str, list], optional A list of :class`Coordinates` or a dict mapping each dimension name to a list of names which label that dimension. dtype : str String suitable for contructing a simple :class:`numpy.dtype` abs_range : tuple, optional (min, max) absolute range for numeric values - can be used to raise errors exp_range : tuple, optional (min, max) expected range for numeric values - can be used to raise warnings unit : str, optional Unit to be used for data values """ def __init__( self, name=None, dims=None, coords=None, dtype=None, abs_range=None, exp_range=None, unit=None, description=None, ): self._name = name self._description = description # Coords may come as a dict, in which case dims must be provided to define order if isinstance(coords, dict): try: coords, dims = self._coords_from_dict(coords, dims) except (ValueError, KeyError) as error: msg = "Coordinate metadata incorrectly formatted for variable '{}': {}" raise ValueError(msg.format(self.name, error)) # Or as a list of Coordinates, in which case dims must not be provided elif isinstance(coords, list): coords, dims = self._coords_from_list(coords, dims) # Or if None, this spec describes a zero-dimensional parameter - single value else: coords, dims = [], [] self._dims = dims self._coords = coords if dtype is None: raise ValueError("Spec.dtype must be provided, in {}".format(self._name)) self._dtype = dtype if abs_range is not None: self._check_range(abs_range) self._abs_range = abs_range if exp_range is not None: self._check_range(exp_range) self._exp_range = exp_range self._unit = unit def _coords_from_list(self, coords, dims): """Set up coords and dims, checking for consistency""" for coord in coords: if not isinstance(coord, Coordinates): msg = ( "Spec.coords may be a dict[str,list] or a list[Coordinates], in {}" ) raise ValueError(msg.format(self._name)) if dims is not None: msg = ( "Spec.dims are derived from Spec.coords if provided as a list of " + "Coordinates, in {}" ) raise ValueError(msg.format(self._name)) dims = [coord.dim for coord in coords] if len(dims) != len(set(dims)): msg = "Spec cannot be created with duplicate dims, in {}" raise ValueError(msg.format(self._name)) return coords, dims def _coords_from_dict(self, coords, dims): """Set up coords and dims, checking for consistency""" if dims is None: msg = "Spec.dims must be specified if coords are provided as a dict, in {}" raise ValueError(msg.format(self._name)) if len(dims) != len(set(dims)): msg = "Spec cannot be created with duplicate dims, in {}" raise ValueError(msg.format(self._name)) if sorted(dims) != sorted(coords.keys()): msg = "Spec.dims must match the keys in coords, in {}" raise ValueError(msg.format(self._name)) coords = [Coordinates(dim, coords[dim]) for dim in dims] return coords, dims
[docs] @classmethod def from_dict(cls, data_provided): """Create a Spec from a dict representation""" # default anything to None, let constructor handle essential missing values data = defaultdict(lambda: None) data.update(data_provided) spec = Spec( name=data["name"], description=data["description"], dims=data["dims"], coords=data["coords"], dtype=data["dtype"], abs_range=data["abs_range"], exp_range=data["exp_range"], unit=data["unit"], ) return spec
[docs] def as_dict(self): """Serialise to dict representation""" return { "name": self.name, "description": self.description, "dims": self._dims, "coords": {c.name: c.ids for c in self._coords}, "dtype": self._dtype, "abs_range": self._abs_range, "exp_range": self._exp_range, "unit": self._unit, }
@property def name(self): """The name of the data that this spec describes.""" return self._name @name.setter def name(self, value): self._name = value @property def description(self): """A human-friendly description""" return self._description @property def dtype(self): """The dtype of the data that this spec describes.""" return self._dtype @property def abs_range(self): """The absolute range of data values that this spec describes.""" return self._abs_range @property def exp_range(self): """The expected range of data values that this spec describes.""" return self._exp_range @property def shape(self): """Tuple of dimension sizes. The shape of the data that this spec describes.""" return tuple(len(c.ids) for c in self._coords) @property def ndim(self): """The number of dimensions of the data that this spec describes.""" return len(self._coords) @property def dims(self): """Names for each dimension""" return list(self._dims) @property def coords(self): """Coordinate labels for each dimension.""" return list(self._coords)
[docs] def dim_coords(self, dim: str): """Coordinates for a given dimension""" if not isinstance(dim, str): msg = "Expected string as argument, instead received {}" raise TypeError(msg.format(type(dim))) if dim not in self.dims: raise KeyError( "Could not find dim '{}' in Spec '{}'".format(dim, self._name) ) for coord in self._coords: if coord.dim == dim: return coord raise KeyError( "Coords not found for dim '{}', in Spec '{}'".format(dim, self._name) )
[docs] def dim_names(self, dim: str): """Names of each coordinate in a given dimension""" return self.dim_coords(dim).names
[docs] def dim_elements(self, dim: str): """Elements of each coordinate in a given dimension""" return self.dim_coords(dim).elements
@property def unit(self): """The unit for all data points.""" return self._unit def __eq__(self, other): return ( self.dtype == other.dtype and self.dims == other.dims and self.coords == other.coords and self.unit == other.unit ) def __hash__(self): return hash((self.dtype, tuple(self.dims), tuple(self.coords), self.unit)) def __repr__(self): return "<Spec name='{}' dims='{}' unit='{}'>".format( self.name, self.dims, self.unit ) def _check_range(self, range_): """Error if range is not a [min, max] list or tuple""" if not _is_sequence(range_): msg = "Spec range must be a list or tuple, got {} for {}" raise TypeError(msg.format(range_, self._name)) if len(range_) != 2: msg = "Spec range must have min and max values only, got {} for {}" raise ValueError(msg.format(range_, self._name)) min_, max_ = range_ if max_ < min_: msg = "Spec range min value must be smaller than max value, got {} for {}" raise ValueError(msg.format(range_, self._name))
def _is_sequence(obj): """Check for iterable object that is not a string ('strip' is a method on str)""" return not hasattr(obj, "strip") and ( hasattr(obj, "__getitem__") or hasattr(obj, "__iter__") )