Python API

`H5Handle(path)`

Source code in imas2xarray/_io.py

def __init__(self, path: Path | str):
    self.path = Path(path)

`get_all_variables(*, ids, extra_variables=None, squash=True, **kwargs)`

Get all known variables from selected ids from the dataset.

This function looks up the data location from the imas2xarray.var_lookup table

Parameters:

ids (str) –

The IDS to write to (i.e. 'core_profiles')
extra_variables (Iterable[IDSVariableModel], default: None ) –

Extra variables to load in addition to the ones known through the config
squash (bool, default: True ) –

Squash placeholder variables
**kwargs –

These keyword arguments are passed to H5Handle.to_xarray()

Returns:

ds ( xarray ) –

The data in xarray format.

Source code in imas2xarray/_io.py

def get_all_variables(
    self,
    *,
    ids: str,
    extra_variables: None | Iterable[IDSVariableModel] = None,
    squash: bool = True,
    **kwargs,
) -> xr.Dataset:
    """Get all known variables from selected ids from the dataset.

    This function looks up the data location from the
    `imas2xarray.var_lookup` table

    Parameters
    ----------
    ids : str
        The IDS to write to (i.e. 'core_profiles')
    extra_variables : Iterable[Variable]
        Extra variables to load in addition to the ones known through the config
    squash : bool
        Squash placeholder variables
    **kwargs
        These keyword arguments are passed to `H5Handle.to_xarray()`

    Returns
    -------
    ds : xarray
        The data in `xarray` format.
    """
    extra_variables = extra_variables or []

    idsvar_lookup = var_lookup.filter_ids(ids)
    variables = list(set(list(extra_variables) + list(idsvar_lookup.keys())))
    return self.get_variables(variables, ids=ids, squash=squash, missing_ok=True, **kwargs)

`get_variables(variables, *, ids, squash=True, **kwargs)`

Get variables from data set.

This function looks up the data location from the imas2xarray.var_lookup table, and returns an xarray dataset. Variable dimensions are automatically retrieved if available.

Parameters:

variables (Iterable[str | IDSVariableModel]) –

Variable names of the data to load.
ids (str) –

The IDS to write to (i.e. 'core_profiles')
squash (bool, default: True ) –

Squash placeholder variables
**kwargs –

These keyword arguments are passed to IDSMapping.to_xarray()

Returns:

ds ( xarray ) –

The data in xarray format.

Raises:

ValueError –

When variables are from different IDS.

Source code in imas2xarray/_io.py

def get_variables(
    self,
    variables: Iterable[str | IDSVariableModel],
    *,
    ids: str,
    squash: bool = True,
    **kwargs,
) -> xr.Dataset:
    """Get variables from data set.

    This function looks up the data location from the
    `imas2xarray.var_lookup` table, and returns an xarray dataset.
    Variable dimensions are automatically retrieved if available.

    Parameters
    ----------
    variables : Iterable[str | Variable]
        Variable names of the data to load.
    ids : str
        The IDS to write to (i.e. 'core_profiles')
    squash : bool
        Squash placeholder variables
    **kwargs
        These keyword arguments are passed to `IDSMapping.to_xarray()`

    Returns
    -------
    ds : xarray
        The data in `xarray` format.

    Raises
    ------
    ValueError
        When variables are from different IDS.
    """
    var_models = var_lookup.lookup(variables)

    # Attempt to automatically load associated dimensions
    dims = {dim for variable in var_models for dim in variable.dims}
    var_models |= var_lookup.lookup(dims, skip_missing=True)

    for var in var_models:
        if var.ids != ids:
            raise ValueError(f'Variable {var} does not belong to {ids}.')

    with self.open_ids(ids, 'r') as group:
        ds = _mapping_to_xarray(group, variables=var_models, **kwargs)

    if squash:
        ds = squash_placeholders(ds)

    return ds

`open_ids(ids='core_profiles', mode='r')`

Context manager to open the IDS file.

Parameters:

ids (str, default: 'core_profiles' ) –

Name of profiles to open

Returns:

File –

Source code in imas2xarray/_io.py

@contextmanager
def open_ids(self, ids: str = 'core_profiles', mode='r') -> h5py.File:
    """Context manager to open the IDS file.

    Parameters
    ----------
    ids : str, optional
        Name of profiles to open

    Returns
    -------
    h5py.File
    """
    data_file = (self.path / ids).with_suffix('.h5')
    assert data_file.exists()

    with h5py.File(data_file, mode) as f:
        yield f[ids]

`set_variables(dataset, *, ids, variables=None)`

Update variables in corresponding ids datafile.

Parameters:

dataset (Dataset) –

Dataset with variables to write. Their dimensions must match those of the target dataset.
ids (str) –

IDS to write to.
variables (Iterable[str | IDSVariableModel], default: None ) –

List of data variables to write.

Source code in imas2xarray/_io.py

def set_variables(
    self,
    dataset: xr.Dataset,
    *,
    ids: str,
    variables: None | Iterable[str | IDSVariableModel] = None,
):
    """Update variables in corresponding ids datafile.

    Parameters
    ----------
    dataset : xr.Dataset
        Dataset with variables to write. Their dimensions must match those of the
        target dataset.
    ids : str
        IDS to write to.
    variables : Iterable[str | Variable], optional
        List of data variables to write.
    """
    if not variables:
        variables = list(dataset.variables)
        # TODO: check variables in var_lookup

    var_models = var_lookup.lookup(variables)

    for var in var_models:
        if var.ids != ids:
            raise ValueError(f'Variable {var} does not belong to {ids}.')

    with self.open_ids(ids, 'r+') as group:
        for var in var_models:
            arr = dataset[var.name]

            key, slices = _var_path_to_hdf5_key_and_slices(var.path)

            group[key][slices] = arr

`Variable`

Bases: IDSPath

Variable for describing data within a IMAS database.

The variable can be given a name, which will be used in the rest of the config to reference the variable. It will also be used as the column labels or on plots.

The dimensions for each variable must be specified. This ensures the the data will be self-consistent. For example for 1D data, you can use [x] and for 2D data, [x, y].

The IDS path may contain indices. You can point to a single index, by simply giving the complete path (i.e. profiles_1d/0/t_i_ave for the 0th time slice). To retrieve all time slices, you can use profiles_1d/*/t_i_ave.

`VariableConfigLoader(*, model=VariableConfigModel, var_dir='imas2xarray', var_env='IMAS2XARRAY_VARDEF', module=files('imas2xarray.data'))`

Source code in imas2xarray/_lookup.py

def __init__(
    self,
    *,
    model: type = VariableConfigModel,
    var_dir: str = 'imas2xarray',
    var_env: str = 'IMAS2XARRAY_VARDEF',
    module: Path | Any = files('imas2xarray.data'),
):
    self.model = model
    self.var_dir = var_dir
    self.var_env = var_env
    self.module = module

    self.paths = self.get_config_path()

`get_config_path()`

Try to get the config file with variable definitions.

Search order: 1. environment variable (2. local directory, not sure if this should be implemented) 3. config home (first $XDG_CONFIG_HOME/imas2xarray then $HOME/.config/imas2xarray) 4. fall back to variable definitions in package

Source code in imas2xarray/_lookup.py

def get_config_path(self) -> tuple[Path, ...]:
    """Try to get the config file with variable definitions.

    Search order:
    1. environment variable
    (2. local directory, not sure if this should be implemented)
    3. config home (first $XDG_CONFIG_HOME/imas2xarray then `$HOME/.config/imas2xarray`)
    4. fall back to variable definitions in package
    """
    for paths in (
        self._get_paths_from_environment_variable(),
        self._get_paths_from_config_home(),
        self._get_paths_local_directory(),
    ):
        if paths:
            return paths

    return self._get_paths_fallback()

`load(var_lookup=None)`

Load the variables config.

Parameters:

var_lookup (None | VarLookup, default: None ) –

Populate initial variable lookup table with entries from this lookup table. Use this to load variables from different locations.

Returns:

var_lookup ( VarLookup ) –

Variable lookup table

Source code in imas2xarray/_lookup.py

def load(self, var_lookup: None | VarLookup = None) -> VarLookup:
    """Load the variables config.

    Parameters
    ----------
    var_lookup : None | VarLookup
        Populate initial variable lookup table with entries from this lookup table.
        Use this to load variables from different locations.

    Returns
    -------
    var_lookup : VarLookup
        Variable lookup table
    """
    if not var_lookup:
        var_lookup = VarLookup()

    for path in self.paths:
        logger.debug(f'Loading variables from: {path}')
        with open(path) as f:
            var_config = parse_yaml_raw_as(self.model, f)
        var_lookup.update(var_config.to_variable_dict())

    return var_lookup

`VariableConfigModel`

Bases: RootModel

`to_variable_dict()`

Return dict of variables.

Source code in imas2xarray/_models.py

def to_variable_dict(self) -> dict:
    """Return dict of variables."""
    return {variable.name: variable for variable in self}

`rebase_all_coords(datasets, reference_dataset)`

Rebase all coords, by applying rebase operations.

Parameters:

datasets (Sequence[Dataset]) –

datasets
reference_dataset (Dataset) –

reference_dataset

Returns:

tuple[Dataset, ...] –

Source code in imas2xarray/_rebase.py

def rebase_all_coords(
    datasets: Sequence[xr.Dataset],
    reference_dataset: xr.Dataset,
) -> tuple[xr.Dataset, ...]:
    """Rebase all coords, by applying rebase operations.

    Parameters
    ----------
    datasets : Sequence[xr.Dataset]
        datasets
    reference_dataset : xr.Dataset
        reference_dataset

    Returns
    -------
    tuple[xr.Dataset, ...]
    """

    interp_dict = {name: dim for name, dim in reference_dataset.coords.items() if dim.size > 1}

    return tuple(
        ds.interp(coords=interp_dict, kwargs={'fill_value': 'extrapolate'}) for ds in datasets
    )

`rebase_on_grid(ds, *, coord_dim, new_coords)`

Rebase (interpolate) the coordinate dimension to the new coordinates.

Thin wrapper around xarray.Dataset.interp.

Parameters:

ds (Dataset) –

Source dataset
coord_dim (str) –

Name of the grid dimension (i.e. grid variable).
new_coords (ndarray) –

The coordinates to interpolate to

Returns:

Dataset –

Rebased dataset

Source code in imas2xarray/_rebase.py

def rebase_on_grid(ds: xr.Dataset, *, coord_dim: str, new_coords: np.ndarray) -> xr.Dataset:
    """Rebase (interpolate) the coordinate dimension to the new coordinates.

    Thin wrapper around `xarray.Dataset.interp`.

    Parameters
    ----------
    ds : xr.Dataset
        Source dataset
    coord_dim : str
        Name of the grid dimension (i.e. grid variable).
    new_coords : np.ndarray
        The coordinates to interpolate to

    Returns
    -------
    xr.Dataset
        Rebased dataset
    """
    return ds.interp(coords={coord_dim: new_coords}, kwargs={'fill_value': 'extrapolate'})

`rebase_on_time(ds, *, time_dim='time', new_coords)`

Rebase (interpolate) the time dimension to the new coordinates.

Thin wrapper around xarray.Dataset.interp.

Parameters:

ds (Dataset) –

Source dataset
time_dim (str, default: 'time' ) –

Name of the time dimension (i.e. time variable).
new_coords (ndarray) –

The coordinates to interpolate to

Returns:

Dataset –

Rebased dataset

Source code in imas2xarray/_rebase.py

def rebase_on_time(
    ds: xr.Dataset,
    *,
    time_dim='time',
    new_coords: np.ndarray,
) -> xr.Dataset:
    """Rebase (interpolate) the time dimension to the new coordinates.

    Thin wrapper around `xarray.Dataset.interp`.

    Parameters
    ----------
    ds : xr.Dataset
        Source dataset
    time_dim : str
        Name of the time dimension (i.e. time variable).
    new_coords : np.ndarray
        The coordinates to interpolate to

    Returns
    -------
    xr.Dataset
        Rebased dataset
    """
    if len(ds[time_dim]) < 2:
        # nothing to rebase with only 1 timestep
        return ds
    else:
        return rebase_on_grid(ds, coord_dim=time_dim, new_coords=new_coords)

`rezero_time(ds, *, start=0, key='time')`

Standardize the time within a dataset by setting the first timestep to 0.

Simply subtracts time[0] from all time entries and adds start Note: this does not interpolate the times between different datasets

Parameters:

ds (Dataset) –

Source dataset
key (str, default: 'time' ) –

Name of the time dimension
start (int, default: 0 ) –

Where to start the returned time series

Source code in imas2xarray/_rebase.py

def rezero_time(ds: xr.Dataset, *, start: int = 0, key: str = 'time') -> None:
    """Standardize the time within a dataset by setting the first timestep to
    0.

    Simply subtracts time[0] from all time entries and adds `start`
    Note: this does not interpolate the times between different datasets

    Parameters
    ----------
    ds : xr.Dataset
        Source dataset
    key : str
        Name of the time dimension
    start : int, optional
        Where to start the returned time series
    """
    ds[key] = ds[key] - ds[key][0] + start

`squash_placeholders(ds)`

Squash placeholder variables. Data are grouped along the first dimension (usually time).

If the data contains dimensions with a $-prefix, these are all interpolated to the first array of that type.

Parameters:

ds (Dataset) –

xarray Dataset

Returns:

ds ( Dataset ) –

xarray Dataset

Source code in imas2xarray/_rebase.py

def squash_placeholders(ds: xr.Dataset) -> xr.Dataset:
    """Squash placeholder variables. Data are grouped along the first dimension
    (usually time).

    If the data contains dimensions with a `$`-prefix,
    these are all interpolated to the first array of that type.

    Parameters
    ----------
    ds : xr.Dataset
        xarray Dataset

    Returns
    -------
    ds : xr.Dataset
        xarray Dataset
    """
    prefix = '$'

    dimensions = tuple(str(dim) for dim in ds.dims)

    placeholder_vars = [dim for dim in dimensions if dim.startswith(prefix)]

    for var in placeholder_vars:
        new_dim = var.lstrip(prefix)

        if new_dim not in ds:
            raise KeyError(f'Cannot squash placeholder, `{new_dim}` not in dataset.')

        var_index = dimensions.index(var)
        group_dims = dimensions[:var_index]

        groupby = group_dims[0]

        ds = standardize_grid(ds, new_dim=new_dim, old_dim=var, group=groupby)

    return ds

`standardize_grid(ds, *, new_dim, old_dim, group=None, new_dim_data=0)`

Standardize the grid within a dataset.

Perform split-apply-combine routine on the data. Split by the group, standardize the data in new_dim using new_dim_data (interpolate if necessary), and combine replacing old_dim by new_dim.

Parameters:

ds (Dataset) –

Source dataset
new_dim (str) –

Must be an existing variable with group as a dimension.
old_dim (str) –

Must be an existing dimension without coordinates.
group (str, default: None ) –

Split the data in groups over this dimension.
new_dim_data (Union[ndarray, int], default: 0 ) –

The data to be used for new_dim. If it is an integer, use it as an index to grab the data from new_dim.

Returns:

Dataset –

New dataset with new_dim as a coordinate dimension.

Source code in imas2xarray/_rebase.py

def standardize_grid(
    ds: xr.Dataset,
    *,
    new_dim: str,
    old_dim: str,
    group: Optional[str] = None,
    new_dim_data: Union[np.ndarray, int] = 0,
) -> xr.Dataset:
    """Standardize the grid within a dataset.

    Perform `split-apply-combine` routine on the data. Split
    by the `group`, standardize the data in `new_dim` using
    `new_dim_data` (interpolate if necessary),
    and combine replacing `old_dim` by `new_dim`.

    Parameters
    ----------
    ds : xr.Dataset
        Source dataset
    new_dim : str
        Must be an existing variable with `group` as a dimension.
    old_dim : str
        Must be an existing dimension without coordinates.
    group : str, optional
        Split the data in groups over this dimension.
    new_dim_data : Union[np.ndarray, int], optional
        The data to be used for `new_dim`. If it is an integer,
        use it as an index to grab the data from `new_dim`.

    Returns
    -------
    xr.Dataset
        New dataset with `new_dim` as a coordinate dimension.
    """
    if isinstance(new_dim_data, int):
        new_dim_data = ds.isel(  # type: ignore
            **{group: new_dim_data}
        )[new_dim].data  # type:ignore

    gb = ds.groupby(group)

    interp_kwargs = {new_dim: new_dim_data}

    def standardize(group):
        group = group.swap_dims({old_dim: new_dim})
        group = group.interp(**interp_kwargs)
        return group

    return gb.map(standardize)

`standardize_grid_and_time(datasets, *, grid_var='rho_tor_norm', time_var='time', reference_dataset=0)`

Standardize list of datasets by applying standard rebase operations.

Applies, in sequence: 1. rezero_time 2. standardize_grid 3. rebase_on_grid 4. rebase_on_time

Parameters:

datasets (Sequence[Dataset]) –

List of source datasets
grid_var (str, default: 'rho_tor_norm' ) –

Name of the grid dimension (i.e. grid variable)
time_var (str, default: 'time' ) –

Name of the time dimension (i.e. time variable)
reference_dataset (int, default: 0 ) –

The dataset with this index will be used as the reference for rebasing. The grid and time coordinates of the other datasets will be rebased to the reference.

Returns:

tuple[Dataset] –

Tuple of output datasets

Source code in imas2xarray/_rebase.py

def standardize_grid_and_time(
    datasets: Sequence[xr.Dataset],
    *,
    grid_var: str = 'rho_tor_norm',
    time_var: str = 'time',
    reference_dataset: int = 0,
) -> tuple[xr.Dataset, ...]:
    """Standardize list of datasets by applying standard rebase operations.

    Applies, in sequence:
    1. `rezero_time`
    2. `standardize_grid`
    3. `rebase_on_grid`
    4. `rebase_on_time`

    Parameters
    ----------
    datasets : Sequence[xr.Dataset]
        List of source datasets
    grid_var : str, optional
        Name of the grid dimension (i.e. grid variable)
    time_var : str, optional
        Name of the time dimension (i.e. time variable)
    reference_dataset : int, optional
        The dataset with this index will be used as the reference for rebasing.
        The grid and time coordinates of the other datasets will be rebased
        to the reference.

    Returns
    -------
    tuple[xr.Dataset]
        Tuple of output datasets
    """
    reference_grid = datasets[reference_dataset][grid_var].data

    datasets = tuple(
        rebase_on_grid(ds, coord_dim=grid_var, new_coords=reference_grid) for ds in datasets
    )

    reference_time = datasets[reference_dataset][time_var].data

    datasets = tuple(
        rebase_on_time(ds, time_dim=time_var, new_coords=reference_time) for ds in datasets
    )

    return datasets

`to_imas(path, dataset, *, ids, variables=None)`

Write variables in xarray dataset back to IMAS data at given path.

Update only, IMAS data must be in HDF5 format.

Parameters:

path (str | Path) –

Path to the data
dataset (Dataset) –

Input dataset
ids (str) –

The IDS to write to (i.e. 'core_profiles')
variables (Iterable[str | IDSVariableModel], default: None ) –

List of variables to write back. If None, attempt to write back all variables known to imas2xarray

Source code in imas2xarray/_io.py

def to_imas(
    path: str | Path,
    dataset: xr.Dataset,
    *,
    ids: str,
    variables: None | Iterable[str | IDSVariableModel] = None,
):
    """Write variables in xarray dataset back to IMAS data at given path.

    Update only, IMAS data must be in HDF5 format.

    Parameters
    ----------
    path : str | Path
        Path to the data
    dataset : xr.Dataset
        Input dataset
    ids : str
        The IDS to write to (i.e. 'core_profiles')
    variables : Iterable[str | Variable]
        List of variables to write back. If None, attempt to write back
        all variables known to `imas2xarray`
    """
    h = H5Handle(path)

    h.set_variables(dataset, ids=ids, variables=variables)

`to_xarray(path, *, ids, variables=None)`

Load IDS from given path to IMAS data into an xarray dataset.

IMAS data must be in HDF5 format.

Parameters:

path (str | Path) –

Path to the data
ids (str) –

The IDS to load (i.e. 'core_profiles')
variables (None | Iterable[str | IDSVariableModel], default: None ) –

List of variables to load. If None, attempt to load all variables known to imas2xarray

Returns:

dataset ( Dataset ) –

Xarray dataset with all specified variables

Source code in imas2xarray/_io.py

def to_xarray(
    path: str | Path, *, ids: str, variables: None | Iterable[str | IDSVariableModel] = None
) -> xr.Dataset:
    """Load IDS from given path to IMAS data into an xarray dataset.

    IMAS data must be in HDF5 format.

    Parameters
    ----------
    path : str | Path
        Path to the data
    ids : str
        The IDS to load (i.e. 'core_profiles')
    variables : None | Iterable[str | Variable], optional
        List of variables to load. If None, attempt to load
        all variables known to `imas2xarray`

    Returns
    -------
    dataset : xr.Dataset
        Xarray dataset with all specified variables
    """
    h = H5Handle(path)

    if variables:
        return h.get_variables(variables=variables, ids=ids)
    else:
        return h.get_all_variables(ids=ids)