Source code for scqubits.io_utils.fileio_backends

# fileio_backends.py
#
# This file is part of scqubits: a Python package for superconducting qubits,
# Quantum 5, 583 (2021). https://quantum-journal.org/papers/q-2021-11-17-583/
#
#    Copyright (c) 2019 and later, Jens Koch and Peter Groszkowski
#    All rights reserved.
#
#    This source code is licensed under the BSD-style license found in the
#    LICENSE file in the root directory of this source tree.
############################################################################
"""
Helper routines for writing data to h5 files.
"""

import ast
import csv
import os
import re

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union

import numpy as np
from numpy import ndarray

import scqubits.io_utils.fileio as io
import scqubits.utils.misc as utils

try:
    import h5py
    from h5py import AttributeManager, File, Group
except ImportError:
    _HAS_H5PY = False
else:
    _HAS_H5PY = True

if TYPE_CHECKING:
    from h5py import File, Group


[docs]class IOWriter(ABC): """ ABC for writing class instance data to file. Parameters ---------- filename: str file_handle: h5.Group, optional """ def __init__(self, filename: str, file_handle: "Group" = None) -> None: self.filename = filename self.io_data: io.IOData self.file_handle = file_handle @abstractmethod def to_file(self, io_data: io.IOData, **kwargs): pass @abstractmethod def write_attributes(self, *args, **kwargs): pass @abstractmethod def write_ndarrays(self, *args, **kwargs): pass @abstractmethod def write_objects(self, *args, **kwargs): pass
@utils.Required(h5py=_HAS_H5PY) class H5Writer(IOWriter): """Writes IOData to a custom-format h5 file""" def write_attributes(self, h5file_group: Union["Group", "File"]) -> None: # type: ignore """ Attribute data consists of 1. `__init__` parameters that are of type str or numerical. These are directly written into `h5py.Group.attrs` 2. lists are stored under `<h5py.Group>/__lists` 3. dicts are stored under `<h5py.Group>/__dicts` """ h5file_group.attrs.create( "__type", self.io_data.typename ) # Record the type of the current class instance attributes = self.io_data.attributes for attr_name, attr_value in attributes.items(): if isinstance( attr_value, dict ): # h5py does not serialize dicts automatically, so have to do it manually group_name = f"__dicts/{attr_name}" h5file_group.create_group(group_name) io.write( attr_value, self.filename, file_handle=h5file_group[group_name] ) elif isinstance(attr_value, (list, tuple)): group_name = f"__lists/{attr_name}" h5file_group.create_group(group_name) io.write( attr_value, self.filename, file_handle=h5file_group[group_name] ) else: h5file_group.attrs[attr_name] = attr_value def write_ndarrays(self, h5file_group: Union["Group", "File"]) -> None: # type: ignore """ Writes ndarray (float or complex) data contained in `self.iodata` to the provided `h5py.Group` as a `h5py.Dataset`, using gzip compression. """ data_group = h5file_group.file.require_group("__data") for name, array in self.io_data.ndarrays.items(): array_id = hash(array.tobytes()) h5file_group.create_dataset(name, data=[array_id], dtype="int64") if str(array_id) not in data_group: data_group.create_dataset( str(array_id), data=array, dtype=array.dtype, compression="gzip" ) def write_objects(self, h5file_group: Union["Group", "File"]) -> None: # type: ignore """ Writes data representing a Python object other than ndarray, list and dict, contained in `self.iodata` to the provided `h5py.Group` und `<h5py.Group>/__objects`. """ h5file_group = h5file_group.create_group("__objects") for obj_name in self.io_data.objects.keys(): new_h5group = h5file_group.create_group(obj_name) io.write( self.io_data.objects[obj_name], self.filename, file_handle=new_h5group ) def to_file(self, io_data: io.IOData, file_handle: "Group" = None) -> None: """ Takes the serialized IOData and writes it either to a new h5 file with file name given by `self.filename` to to the given h5py.Group of an open h5 file. """ self.io_data = io_data if file_handle is None: h5file_group = h5py.File(self.filename, "w", rdcc_nbytes=1024**2 * 200) _ = h5file_group.create_group("__data") close_when_done = True else: h5file_group = file_handle close_when_done = False self.write_attributes(h5file_group) self.write_ndarrays(h5file_group) self.write_objects(h5file_group) if close_when_done: h5file_group.close() @utils.Required(h5py=_HAS_H5PY) class H5Reader: """ Enables reading h5 files generated with scqubits. """ def __init__(self, filename: str, file_handle: "Group" = None) -> None: self.filename = filename self.io_data = None self.file_handle = file_handle @staticmethod def h5_attrs_to_dict( h5_attrs: "AttributeManager", ) -> Dict[str, Union[float, str, int]]: """ Converts h5 attribute data to a Python dictionary. Parameters ---------- h5_attrs: h5py.AttributeManager as obtained by accessing `<h5py.Group>.attrs` """ return {attr_name: attr_value for attr_name, attr_value in h5_attrs.items()} def read_attributes(self, h5file_group: Union["Group", "File"]) -> Dict[str, Any]: """ Read data from h5 file group that is stored directly as `<h5py.Group>.attrs`, or saved in subgroups titled `<h5py.Group>/__lists` and `<h5py.Group>/__dicts`. """ attributes = self.h5_attrs_to_dict(h5file_group.attrs) if "__dicts" in h5file_group: for dict_name in h5file_group["__dicts"]: attributes[dict_name] = io.read( self.filename, h5file_group[f"__dicts/{dict_name}"] ) if "__lists" in h5file_group: for list_name in h5file_group["__lists"]: attributes[list_name] = io.read( self.filename, h5file_group[f"__lists/{list_name}"] ) return attributes def read_ndarrays(self, h5file_group: Union["Group", "File"]) -> Dict[str, ndarray]: """ Read numpy array data from h5 file group. """ ndarrays = {} if "__data" in h5file_group.file: datagroup = h5file_group.file.require_group("__data") for name, id_dataset in h5file_group.items(): if isinstance(id_dataset, h5py.Dataset): id_int = id_dataset[:][0] data = datagroup[str(id_int)][:] ndarrays[name] = data return ndarrays # legacy support ndarrays = { name: array[:] for name, array in h5file_group.items() if isinstance(array, h5py.Dataset) } return ndarrays def read_objects( self, h5file_group: Union["Group", "File"] ) -> Dict[str, io.IOData]: """ Read data from the given h5 file group that represents a Python object other than an ndarray, list, or dict. """ inner_objects = {} h5file_group = h5file_group["__objects"] for obj_name in h5file_group: inner_objects[obj_name] = io.read(self.filename, h5file_group[obj_name]) return inner_objects def from_file(self, filename: str, file_handle: "Group" = None) -> io.IOData: """ Either opens a new h5 file for reading or accesses an already opened file via the given h5.Group handle. Reads all data from the three categories of attributes (incl. lists and dicts), ndarrays, and objects. """ if file_handle is None: h5file_group = h5py.File(filename, "r", rdcc_nbytes=1024**2 * 200) else: h5file_group = file_handle attributes = self.read_attributes(h5file_group) typename = attributes["__type"] assert isinstance(typename, str) del attributes["__type"] ndarrays = self.read_ndarrays(h5file_group) inner_objects = self.read_objects(h5file_group) return io.IOData(typename, attributes, ndarrays, inner_objects)
[docs]class CSVWriter(IOWriter): """ Given `filename="somename.csv"`, write initdata into somename.csv Then, additional csv files are written for each dataset, with filenames: `"somename_" + dataname0 + ".csv"` etc. """
[docs] def append_ndarray_info(self, attributes): """Add data set information to attributes, so that dataset names and dimensions are available in attributes CSV file.""" for index, dataname in enumerate(self.io_data.ndarrays.keys()): data = self.io_data.ndarrays[dataname] attributes[f"dataset{index}"] = dataname if data.ndim == 3: slice_count = len(data) else: slice_count = 1 attributes[f"dataset{index}.slices"] = slice_count return attributes
def write_attributes(self, filename: str): # type: ignore attributes = self.io_data.attributes attributes["__type"] = self.io_data.typename attributes = self.append_ndarray_info(attributes) with open(filename, mode="w", newline="") as meta_file: file_writer = csv.writer(meta_file, delimiter=",") file_writer.writerow(attributes.keys()) file_writer.writerow(attributes.values()) def write_ndarrays(self, filename: str): # type: ignore filename_stub, _ = os.path.splitext(filename) for dataname, dataset in self.io_data.ndarrays.items(): filename = f"{filename_stub}_{dataname}.csv" self.write_data(filename, dataset) def write_data(self, filename: str, dataset: ndarray): # type: ignore if dataset.ndim <= 2: np.savetxt(filename, dataset) elif dataset.ndim == 3: np_savetxt_3d(dataset, filename) else: raise Exception("Dataset has dimensions > 3. Cannot write to CSV file.") def write_objects(self, *args, **kwargs): # type: ignore raise NotImplementedError def to_file(self, io_data: io.IOData, **kwargs): self.io_data = io_data self.write_attributes(self.filename) self.write_ndarrays(self.filename)
# no support for write_objects in CSV format
[docs]class CSVReader: @staticmethod def read_attributes(filename): with open(filename, mode="r") as meta_file: file_reader = csv.reader(meta_file, delimiter=",") meta_keys = file_reader.__next__() meta_values = file_reader.__next__() return dict(zip(meta_keys, meta_values)) def process_metadict(self, meta_dict: Dict) -> Tuple[Dict, List[str], ndarray]: attributes = { attr_name: utils.to_expression_or_string(attr_value) for attr_name, attr_value in meta_dict.items() if not re.match(r"dataset\d+", attr_name) } data_names = [ dataname for datalabel, dataname in meta_dict.items() if re.match(r"dataset\d+$", datalabel) ] data_slices = np.asarray( [ ast.literal_eval(value) for key, value in meta_dict.items() if re.match(r"dataset\d+.slices", key) ] ) return attributes, data_names, data_slices @staticmethod def read_data(filename, slices): try: data_array = np.loadtxt(filename) except ValueError: data_array = np.loadtxt(filename, dtype=np.complex_) if slices > 1: nrows, ncols = data_array.shape return data_array.reshape((slices, nrows // slices, ncols)) return data_array
[docs] def from_file(self, filename: str, **kwargs) -> io.IOData: """ Returns ------- class instance generated from file data """ ext_attributes = self.read_attributes(filename) typename = ext_attributes["__type"] del ext_attributes["__type"] attributes, data_names, data_slices = self.process_metadict(ext_attributes) filename_stub, _ = os.path.splitext(filename) ndarrays = {} for index, dataname in enumerate(data_names): data_filename = f"{filename_stub}_{dataname}.csv" slices = data_slices[index] ndarrays[dataname] = self.read_data(data_filename, slices) return io.IOData(typename, attributes, ndarrays, objects=None)
[docs]def np_savetxt_3d(array3d: ndarray, filename: str): """ Helper function that splits a 3d numpy array into 2d slices for writing as csv data to a new file. Slices are separated by a comment row `# New slice`. Parameters ---------- array3d: ndarray with ndim = 3 filename: name of output file """ with open(filename, mode="w", newline="") as datafile: datafile.write("# Array shape: {0}\n".format(array3d.shape)) for data_slice in array3d: np.savetxt(datafile, data_slice) datafile.write("# New slice\n")