import os
import logging
import h5py
import xarray as xr
from ..util.exceptions import ToolBoxFileError
__all__ = [
'get_data_formatted',
'load_xarray',
'save_attributes_h5',
'save_xarray',
]
log = logging.getLogger(__name__)
def _to_netcdf(fname, data, group, mode):
f_exists = os.path.isfile(fname)
if (f_exists and mode == 'w'):
data.to_netcdf(fname, group=group, mode='w', engine='h5netcdf')
log.warning(f"File {fname} existed: overwritten")
log.info(f"Stored data in file {fname}")
elif f_exists and mode == 'a':
try:
data.to_netcdf(fname, group=group, mode='a', engine='h5netcdf')
log.info(f"Created group {group} in file {fname}")
except (ValueError, TypeError):
msg = f"Group {group} exists and has incompatible dimensions."
log.warning(f"Could not store data: {msg}")
raise ToolBoxFileError(msg, fname)
else:
data.to_netcdf(fname, group=group, mode='w', engine='h5netcdf')
log.info(f"Stored data in file {fname}")
[docs]def save_xarray(fname, data, group='data', mode='a'):
"""
Store xarray Dataset in the specified location
Parameters
----------
data: xarray.DataSet
The data to be stored
fname: str, int
filename
overwrite: bool
overwrite existing data
Raises
------
ToolBoxFileError: Exception
File existed, but overwrite was set to False.
"""
try:
_to_netcdf(fname, data, group, mode)
except ToolBoxFileError as err:
raise err
[docs]def save_attributes_h5(fname, data={}):
"""
Adding attributes to a hdf5 file. This function is intended to be used to
attach metadata to a processed run.
Parameters
----------
fname: str
filename as string
data: dictionary
the data that should be added to the file in form of a dictionary.
"""
f = h5py.File(fname, mode='a')
for d in data.keys():
f.attrs[d] = data[d]
f.close()
log.info(f"added attributes to file {fname}")
[docs]def load_xarray(fname, group='data', form='dataset'):
"""
Load stored xarray Dataset.
Comment: This function exists because of a problem with the standard
netcdf engine that is malfunctioning due to related software installed
in the exfel-python environment. May be dropped at some point.
Parameters
----------
fname: str
filename as string
group: str
the name of the xarray dataset (group in h5 file).
form: str
specify whether the data to be loaded is a 'dataset' or a 'array'.
"""
f_exists = os.path.isfile(fname)
if f_exists:
if form == 'dataset':
log.debug(f'open xarray dataset {fname}')
return xr.load_dataset(fname, group=group, engine='h5netcdf')
elif form == 'array':
log.debug(f'open xarray dataarray {fname}')
return xr.load_dataarray(fname, group=group, engine='h5netcdf')
else:
msg = "File does not exists."
raise ToolBoxFileError(msg, fname)
def _data_from_list(filenames):
"""
Helper function for data formatting routines. Loads the specified files
given by their names. This subroutine expects the name of the group to be
'data'.
Parameters
----------
filenames: list
list of valid xarray filenames
Returns
-------
data: list
a list containing the loaded data
Raises
------
ToolBoxFileError
raises ToolBoxFileError in case file does not exist.
"""
data = []
for name in filenames:
f_exists = os.path.isfile(name)
if f_exists:
data.append(load_xarray(name, group='data'))
else:
msg = "File does not exists."
raise ToolBoxFileError(msg, name)
return data
def search_files(run_folder):
"""
Search folder for h5 files.
Parameters
----------
run_folder: str
the path to a folder containing h5 files.
Returns
-------
a list of the filenames of all .h5 files in the given folder.
Raises
------
ToolBoxFileError: Exception
raises ToolBoxFileError in case there are no .h5 files in the folder,
or the folder does not exist.
"""
try:
filenames = os.listdir(run_folder)
return [run_folder+name for name in filenames if ".h5" in name]
except:
msg = "No files in folder"
raise ToolBoxFileError(msg, run_folder)