Skip to content

Python API

H5Handle(path)

Source code in imas2xarray/_io.py
177
178
def __init__(self, path: Path | str):
    self.path = Path(path)

get_all_variables(*, ids, extra_variables=None, squash=True, **kwargs)

Get all known variables from selected ids from the dataset.

This function looks up the data location from the imas2xarray.var_lookup table

Parameters:

  • ids (str) –

    The IDS to write to (i.e. 'core_profiles')

  • extra_variables (Iterable[IDSVariableModel], default: None ) –

    Extra variables to load in addition to the ones known through the config

  • squash (bool, default: True ) –

    Squash placeholder variables

  • **kwargs –

    These keyword arguments are passed to H5Handle.to_xarray()

Returns:

  • ds ( xarray ) –

    The data in xarray format.

Source code in imas2xarray/_io.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def get_all_variables(
    self,
    *,
    ids: str,
    extra_variables: None | Iterable[IDSVariableModel] = None,
    squash: bool = True,
    **kwargs,
) -> xr.Dataset:
    """Get all known variables from selected ids from the dataset.

    This function looks up the data location from the
    `imas2xarray.var_lookup` table

    Parameters
    ----------
    ids : str
        The IDS to write to (i.e. 'core_profiles')
    extra_variables : Iterable[Variable]
        Extra variables to load in addition to the ones known through the config
    squash : bool
        Squash placeholder variables
    **kwargs
        These keyword arguments are passed to `H5Handle.to_xarray()`

    Returns
    -------
    ds : xarray
        The data in `xarray` format.
    """
    extra_variables = extra_variables or []

    idsvar_lookup = var_lookup.filter_ids(ids)
    variables = list(set(list(extra_variables) + list(idsvar_lookup.keys())))
    return self.get_variables(variables, ids=ids, squash=squash, missing_ok=True, **kwargs)

get_variables(variables, *, ids, squash=True, **kwargs)

Get variables from data set.

This function looks up the data location from the imas2xarray.var_lookup table, and returns an xarray dataset. Variable dimensions are automatically retrieved if available.

Parameters:

  • variables (Iterable[str | IDSVariableModel]) –

    Variable names of the data to load.

  • ids (str) –

    The IDS to write to (i.e. 'core_profiles')

  • squash (bool, default: True ) –

    Squash placeholder variables

  • **kwargs –

    These keyword arguments are passed to IDSMapping.to_xarray()

Returns:

  • ds ( xarray ) –

    The data in xarray format.

Raises:

  • ValueError –

    When variables are from different IDS.

Source code in imas2xarray/_io.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def get_variables(
    self,
    variables: Iterable[str | IDSVariableModel],
    *,
    ids: str,
    squash: bool = True,
    **kwargs,
) -> xr.Dataset:
    """Get variables from data set.

    This function looks up the data location from the
    `imas2xarray.var_lookup` table, and returns an xarray dataset.
    Variable dimensions are automatically retrieved if available.

    Parameters
    ----------
    variables : Iterable[str | Variable]
        Variable names of the data to load.
    ids : str
        The IDS to write to (i.e. 'core_profiles')
    squash : bool
        Squash placeholder variables
    **kwargs
        These keyword arguments are passed to `IDSMapping.to_xarray()`

    Returns
    -------
    ds : xarray
        The data in `xarray` format.

    Raises
    ------
    ValueError
        When variables are from different IDS.
    """
    var_models = var_lookup.lookup(variables)

    # Attempt to automatically load associated dimensions
    dims = {dim for variable in var_models for dim in variable.dims}
    var_models |= var_lookup.lookup(dims, skip_missing=True)

    for var in var_models:
        if var.ids != ids:
            raise ValueError(f'Variable {var} does not belong to {ids}.')

    with self.open_ids(ids, 'r') as group:
        ds = _mapping_to_xarray(group, variables=var_models, **kwargs)

    if squash:
        ds = squash_placeholders(ds)

    return ds

open_ids(ids='core_profiles', mode='r')

Context manager to open the IDS file.

Parameters:

  • ids (str, default: 'core_profiles' ) –

    Name of profiles to open

Returns:

  • File –
Source code in imas2xarray/_io.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
@contextmanager
def open_ids(self, ids: str = 'core_profiles', mode='r') -> h5py.File:
    """Context manager to open the IDS file.

    Parameters
    ----------
    ids : str, optional
        Name of profiles to open

    Returns
    -------
    h5py.File
    """
    data_file = (self.path / ids).with_suffix('.h5')
    assert data_file.exists()

    with h5py.File(data_file, mode) as f:
        yield f[ids]

set_variables(dataset, *, ids, variables=None)

Update variables in corresponding ids datafile.

Parameters:

  • dataset (Dataset) –

    Dataset with variables to write. Their dimensions must match those of the target dataset.

  • ids (str) –

    IDS to write to.

  • variables (Iterable[str | IDSVariableModel], default: None ) –

    List of data variables to write.

Source code in imas2xarray/_io.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
def set_variables(
    self,
    dataset: xr.Dataset,
    *,
    ids: str,
    variables: None | Iterable[str | IDSVariableModel] = None,
):
    """Update variables in corresponding ids datafile.

    Parameters
    ----------
    dataset : xr.Dataset
        Dataset with variables to write. Their dimensions must match those of the
        target dataset.
    ids : str
        IDS to write to.
    variables : Iterable[str | Variable], optional
        List of data variables to write.
    """
    if not variables:
        variables = list(dataset.variables)
        # TODO: check variables in var_lookup

    var_models = var_lookup.lookup(variables)

    for var in var_models:
        if var.ids != ids:
            raise ValueError(f'Variable {var} does not belong to {ids}.')

    with self.open_ids(ids, 'r+') as group:
        for var in var_models:
            arr = dataset[var.name]

            key, slices = _var_path_to_hdf5_key_and_slices(var.path)

            group[key][slices] = arr

Variable

Bases: IDSPath

Variable for describing data within a IMAS database.

The variable can be given a name, which will be used in the rest of the config to reference the variable. It will also be used as the column labels or on plots.

The dimensions for each variable must be specified. This ensures the the data will be self-consistent. For example for 1D data, you can use [x] and for 2D data, [x, y].

The IDS path may contain indices. You can point to a single index, by simply giving the complete path (i.e. profiles_1d/0/t_i_ave for the 0th time slice). To retrieve all time slices, you can use profiles_1d/*/t_i_ave.

VariableConfigLoader(*, model=VariableConfigModel, var_dir='imas2xarray', var_env='IMAS2XARRAY_VARDEF', module=files('imas2xarray.data'))

Source code in imas2xarray/_lookup.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def __init__(
    self,
    *,
    model: type = VariableConfigModel,
    var_dir: str = 'imas2xarray',
    var_env: str = 'IMAS2XARRAY_VARDEF',
    module: Path | Any = files('imas2xarray.data'),
):
    self.model = model
    self.var_dir = var_dir
    self.var_env = var_env
    self.module = module

    self.paths = self.get_config_path()

get_config_path()

Try to get the config file with variable definitions.

Search order: 1. environment variable (2. local directory, not sure if this should be implemented) 3. config home (first $XDG_CONFIG_HOME/imas2xarray then $HOME/.config/imas2xarray) 4. fall back to variable definitions in package

Source code in imas2xarray/_lookup.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def get_config_path(self) -> tuple[Path, ...]:
    """Try to get the config file with variable definitions.

    Search order:
    1. environment variable
    (2. local directory, not sure if this should be implemented)
    3. config home (first $XDG_CONFIG_HOME/imas2xarray then `$HOME/.config/imas2xarray`)
    4. fall back to variable definitions in package
    """
    for paths in (
        self._get_paths_from_environment_variable(),
        self._get_paths_from_config_home(),
        self._get_paths_local_directory(),
    ):
        if paths:
            return paths

    return self._get_paths_fallback()

load(var_lookup=None)

Load the variables config.

Parameters:

  • var_lookup (None | VarLookup, default: None ) –

    Populate initial variable lookup table with entries from this lookup table. Use this to load variables from different locations.

Returns:

  • var_lookup ( VarLookup ) –

    Variable lookup table

Source code in imas2xarray/_lookup.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def load(self, var_lookup: None | VarLookup = None) -> VarLookup:
    """Load the variables config.

    Parameters
    ----------
    var_lookup : None | VarLookup
        Populate initial variable lookup table with entries from this lookup table.
        Use this to load variables from different locations.

    Returns
    -------
    var_lookup : VarLookup
        Variable lookup table
    """
    if not var_lookup:
        var_lookup = VarLookup()

    for path in self.paths:
        logger.debug(f'Loading variables from: {path}')
        with open(path) as f:
            var_config = parse_yaml_raw_as(self.model, f)
        var_lookup.update(var_config.to_variable_dict())

    return var_lookup

VariableConfigModel

Bases: RootModel

to_variable_dict()

Return dict of variables.

Source code in imas2xarray/_models.py
 99
100
101
def to_variable_dict(self) -> dict:
    """Return dict of variables."""
    return {variable.name: variable for variable in self}

rebase_all_coords(datasets, reference_dataset)

Rebase all coords, by applying rebase operations.

Parameters:

  • datasets (Sequence[Dataset]) –

    datasets

  • reference_dataset (Dataset) –

    reference_dataset

Returns:

  • tuple[Dataset, ...] –
Source code in imas2xarray/_rebase.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def rebase_all_coords(
    datasets: Sequence[xr.Dataset],
    reference_dataset: xr.Dataset,
) -> tuple[xr.Dataset, ...]:
    """Rebase all coords, by applying rebase operations.

    Parameters
    ----------
    datasets : Sequence[xr.Dataset]
        datasets
    reference_dataset : xr.Dataset
        reference_dataset

    Returns
    -------
    tuple[xr.Dataset, ...]
    """

    interp_dict = {name: dim for name, dim in reference_dataset.coords.items() if dim.size > 1}

    return tuple(
        ds.interp(coords=interp_dict, kwargs={'fill_value': 'extrapolate'}) for ds in datasets
    )

rebase_on_grid(ds, *, coord_dim, new_coords)

Rebase (interpolate) the coordinate dimension to the new coordinates.

Thin wrapper around xarray.Dataset.interp.

Parameters:

  • ds (Dataset) –

    Source dataset

  • coord_dim (str) –

    Name of the grid dimension (i.e. grid variable).

  • new_coords (ndarray) –

    The coordinates to interpolate to

Returns:

  • Dataset –

    Rebased dataset

Source code in imas2xarray/_rebase.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def rebase_on_grid(ds: xr.Dataset, *, coord_dim: str, new_coords: np.ndarray) -> xr.Dataset:
    """Rebase (interpolate) the coordinate dimension to the new coordinates.

    Thin wrapper around `xarray.Dataset.interp`.

    Parameters
    ----------
    ds : xr.Dataset
        Source dataset
    coord_dim : str
        Name of the grid dimension (i.e. grid variable).
    new_coords : np.ndarray
        The coordinates to interpolate to

    Returns
    -------
    xr.Dataset
        Rebased dataset
    """
    return ds.interp(coords={coord_dim: new_coords}, kwargs={'fill_value': 'extrapolate'})

rebase_on_time(ds, *, time_dim='time', new_coords)

Rebase (interpolate) the time dimension to the new coordinates.

Thin wrapper around xarray.Dataset.interp.

Parameters:

  • ds (Dataset) –

    Source dataset

  • time_dim (str, default: 'time' ) –

    Name of the time dimension (i.e. time variable).

  • new_coords (ndarray) –

    The coordinates to interpolate to

Returns:

  • Dataset –

    Rebased dataset

Source code in imas2xarray/_rebase.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def rebase_on_time(
    ds: xr.Dataset,
    *,
    time_dim='time',
    new_coords: np.ndarray,
) -> xr.Dataset:
    """Rebase (interpolate) the time dimension to the new coordinates.

    Thin wrapper around `xarray.Dataset.interp`.

    Parameters
    ----------
    ds : xr.Dataset
        Source dataset
    time_dim : str
        Name of the time dimension (i.e. time variable).
    new_coords : np.ndarray
        The coordinates to interpolate to

    Returns
    -------
    xr.Dataset
        Rebased dataset
    """
    if len(ds[time_dim]) < 2:
        # nothing to rebase with only 1 timestep
        return ds
    else:
        return rebase_on_grid(ds, coord_dim=time_dim, new_coords=new_coords)

rezero_time(ds, *, start=0, key='time')

Standardize the time within a dataset by setting the first timestep to 0.

Simply subtracts time[0] from all time entries and adds start Note: this does not interpolate the times between different datasets

Parameters:

  • ds (Dataset) –

    Source dataset

  • key (str, default: 'time' ) –

    Name of the time dimension

  • start (int, default: 0 ) –

    Where to start the returned time series

Source code in imas2xarray/_rebase.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def rezero_time(ds: xr.Dataset, *, start: int = 0, key: str = 'time') -> None:
    """Standardize the time within a dataset by setting the first timestep to
    0.

    Simply subtracts time[0] from all time entries and adds `start`
    Note: this does not interpolate the times between different datasets

    Parameters
    ----------
    ds : xr.Dataset
        Source dataset
    key : str
        Name of the time dimension
    start : int, optional
        Where to start the returned time series
    """
    ds[key] = ds[key] - ds[key][0] + start

squash_placeholders(ds)

Squash placeholder variables. Data are grouped along the first dimension (usually time).

If the data contains dimensions with a $-prefix, these are all interpolated to the first array of that type.

Parameters:

  • ds (Dataset) –

    xarray Dataset

Returns:

  • ds ( Dataset ) –

    xarray Dataset

Source code in imas2xarray/_rebase.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def squash_placeholders(ds: xr.Dataset) -> xr.Dataset:
    """Squash placeholder variables. Data are grouped along the first dimension
    (usually time).

    If the data contains dimensions with a `$`-prefix,
    these are all interpolated to the first array of that type.

    Parameters
    ----------
    ds : xr.Dataset
        xarray Dataset

    Returns
    -------
    ds : xr.Dataset
        xarray Dataset
    """
    prefix = '$'

    dimensions = tuple(str(dim) for dim in ds.dims)

    placeholder_vars = [dim for dim in dimensions if dim.startswith(prefix)]

    for var in placeholder_vars:
        new_dim = var.lstrip(prefix)

        if new_dim not in ds:
            raise KeyError(f'Cannot squash placeholder, `{new_dim}` not in dataset.')

        var_index = dimensions.index(var)
        group_dims = dimensions[:var_index]

        groupby = group_dims[0]

        ds = standardize_grid(ds, new_dim=new_dim, old_dim=var, group=groupby)

    return ds

standardize_grid(ds, *, new_dim, old_dim, group=None, new_dim_data=0)

Standardize the grid within a dataset.

Perform split-apply-combine routine on the data. Split by the group, standardize the data in new_dim using new_dim_data (interpolate if necessary), and combine replacing old_dim by new_dim.

Parameters:

  • ds (Dataset) –

    Source dataset

  • new_dim (str) –

    Must be an existing variable with group as a dimension.

  • old_dim (str) –

    Must be an existing dimension without coordinates.

  • group (str, default: None ) –

    Split the data in groups over this dimension.

  • new_dim_data (Union[ndarray, int], default: 0 ) –

    The data to be used for new_dim. If it is an integer, use it as an index to grab the data from new_dim.

Returns:

  • Dataset –

    New dataset with new_dim as a coordinate dimension.

Source code in imas2xarray/_rebase.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def standardize_grid(
    ds: xr.Dataset,
    *,
    new_dim: str,
    old_dim: str,
    group: Optional[str] = None,
    new_dim_data: Union[np.ndarray, int] = 0,
) -> xr.Dataset:
    """Standardize the grid within a dataset.

    Perform `split-apply-combine` routine on the data. Split
    by the `group`, standardize the data in `new_dim` using
    `new_dim_data` (interpolate if necessary),
    and combine replacing `old_dim` by `new_dim`.

    Parameters
    ----------
    ds : xr.Dataset
        Source dataset
    new_dim : str
        Must be an existing variable with `group` as a dimension.
    old_dim : str
        Must be an existing dimension without coordinates.
    group : str, optional
        Split the data in groups over this dimension.
    new_dim_data : Union[np.ndarray, int], optional
        The data to be used for `new_dim`. If it is an integer,
        use it as an index to grab the data from `new_dim`.

    Returns
    -------
    xr.Dataset
        New dataset with `new_dim` as a coordinate dimension.
    """
    if isinstance(new_dim_data, int):
        new_dim_data = ds.isel(  # type: ignore
            **{group: new_dim_data}
        )[new_dim].data  # type:ignore

    gb = ds.groupby(group)

    interp_kwargs = {new_dim: new_dim_data}

    def standardize(group):
        group = group.swap_dims({old_dim: new_dim})
        group = group.interp(**interp_kwargs)
        return group

    return gb.map(standardize)

standardize_grid_and_time(datasets, *, grid_var='rho_tor_norm', time_var='time', reference_dataset=0)

Standardize list of datasets by applying standard rebase operations.

Applies, in sequence: 1. rezero_time 2. standardize_grid 3. rebase_on_grid 4. rebase_on_time

Parameters:

  • datasets (Sequence[Dataset]) –

    List of source datasets

  • grid_var (str, default: 'rho_tor_norm' ) –

    Name of the grid dimension (i.e. grid variable)

  • time_var (str, default: 'time' ) –

    Name of the time dimension (i.e. time variable)

  • reference_dataset (int, default: 0 ) –

    The dataset with this index will be used as the reference for rebasing. The grid and time coordinates of the other datasets will be rebased to the reference.

Returns:

  • tuple[Dataset] –

    Tuple of output datasets

Source code in imas2xarray/_rebase.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def standardize_grid_and_time(
    datasets: Sequence[xr.Dataset],
    *,
    grid_var: str = 'rho_tor_norm',
    time_var: str = 'time',
    reference_dataset: int = 0,
) -> tuple[xr.Dataset, ...]:
    """Standardize list of datasets by applying standard rebase operations.

    Applies, in sequence:
    1. `rezero_time`
    2. `standardize_grid`
    3. `rebase_on_grid`
    4. `rebase_on_time`

    Parameters
    ----------
    datasets : Sequence[xr.Dataset]
        List of source datasets
    grid_var : str, optional
        Name of the grid dimension (i.e. grid variable)
    time_var : str, optional
        Name of the time dimension (i.e. time variable)
    reference_dataset : int, optional
        The dataset with this index will be used as the reference for rebasing.
        The grid and time coordinates of the other datasets will be rebased
        to the reference.

    Returns
    -------
    tuple[xr.Dataset]
        Tuple of output datasets
    """
    reference_grid = datasets[reference_dataset][grid_var].data

    datasets = tuple(
        rebase_on_grid(ds, coord_dim=grid_var, new_coords=reference_grid) for ds in datasets
    )

    reference_time = datasets[reference_dataset][time_var].data

    datasets = tuple(
        rebase_on_time(ds, time_dim=time_var, new_coords=reference_time) for ds in datasets
    )

    return datasets

to_imas(path, dataset, *, ids, variables=None)

Write variables in xarray dataset back to IMAS data at given path.

Update only, IMAS data must be in HDF5 format.

Parameters:

  • path (str | Path) –

    Path to the data

  • dataset (Dataset) –

    Input dataset

  • ids (str) –

    The IDS to write to (i.e. 'core_profiles')

  • variables (Iterable[str | IDSVariableModel], default: None ) –

    List of variables to write back. If None, attempt to write back all variables known to imas2xarray

Source code in imas2xarray/_io.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def to_imas(
    path: str | Path,
    dataset: xr.Dataset,
    *,
    ids: str,
    variables: None | Iterable[str | IDSVariableModel] = None,
):
    """Write variables in xarray dataset back to IMAS data at given path.

    Update only, IMAS data must be in HDF5 format.

    Parameters
    ----------
    path : str | Path
        Path to the data
    dataset : xr.Dataset
        Input dataset
    ids : str
        The IDS to write to (i.e. 'core_profiles')
    variables : Iterable[str | Variable]
        List of variables to write back. If None, attempt to write back
        all variables known to `imas2xarray`
    """
    h = H5Handle(path)

    h.set_variables(dataset, ids=ids, variables=variables)

to_xarray(path, *, ids, variables=None)

Load IDS from given path to IMAS data into an xarray dataset.

IMAS data must be in HDF5 format.

Parameters:

  • path (str | Path) –

    Path to the data

  • ids (str) –

    The IDS to load (i.e. 'core_profiles')

  • variables (None | Iterable[str | IDSVariableModel], default: None ) –

    List of variables to load. If None, attempt to load all variables known to imas2xarray

Returns:

  • dataset ( Dataset ) –

    Xarray dataset with all specified variables

Source code in imas2xarray/_io.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def to_xarray(
    path: str | Path, *, ids: str, variables: None | Iterable[str | IDSVariableModel] = None
) -> xr.Dataset:
    """Load IDS from given path to IMAS data into an xarray dataset.

    IMAS data must be in HDF5 format.

    Parameters
    ----------
    path : str | Path
        Path to the data
    ids : str
        The IDS to load (i.e. 'core_profiles')
    variables : None | Iterable[str | Variable], optional
        List of variables to load. If None, attempt to load
        all variables known to `imas2xarray`

    Returns
    -------
    dataset : xr.Dataset
        Xarray dataset with all specified variables
    """
    h = H5Handle(path)

    if variables:
        return h.get_variables(variables=variables, ids=ids)
    else:
        return h.get_all_variables(ids=ids)