diff --git a/doc/design/model.notation b/doc/design/model.notation index 6b46722fef..cb353433d4 100644 --- a/doc/design/model.notation +++ b/doc/design/model.notation @@ -2395,9 +2395,87 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -3009,36 +3087,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -3935,45 +3983,12 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -4158,11 +4173,11 @@ - + - + @@ -4193,7 +4208,7 @@ - + @@ -4201,6 +4216,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -6379,4 +6425,265 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/design/model.uml b/doc/design/model.uml index 24e87b765d..fc98943734 100644 --- a/doc/design/model.uml +++ b/doc/design/model.uml @@ -109,7 +109,9 @@ spezialization of this type. - + + + @@ -155,6 +157,37 @@ spezialization of this type. + + + + + + + + + +
+ + + + + + + + + + +
+ + + + + + + + + + @@ -273,12 +306,12 @@ this for the container adapter. - + - + @@ -359,7 +392,7 @@ this for the container adapter. - + diff --git a/doc/source/images/CMakeLists.txt b/doc/source/images/CMakeLists.txt index 3ef26a4926..0ada162ca6 100644 --- a/doc/source/images/CMakeLists.txt +++ b/doc/source/images/CMakeLists.txt @@ -22,6 +22,9 @@ set(IMAGES attribute_manager_uml.png hyperslab_1.svg hyperslab_2.svg hyperslab_3.svg + node_types.svg + node.svg + h5cpp_link.svg ) add_sphinx_source(${IMAGES}) diff --git a/doc/source/images/h5cpp_link.svg b/doc/source/images/h5cpp_link.svg new file mode 100644 index 0000000000..e2147bd83b --- /dev/null +++ b/doc/source/images/h5cpp_link.svg @@ -0,0 +1,123 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Link + + + + + +Path + + + +File + + + + + 1 + + 1 + + 1 + + + 1 + + + 1 + + 1 + + + + + 1 + + + + + 1 + + + + + diff --git a/doc/source/images/node.svg b/doc/source/images/node.svg new file mode 100644 index 0000000000..0f04f65578 --- /dev/null +++ b/doc/source/images/node.svg @@ -0,0 +1,132 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Node + + + + +(attribute) + +Manager + + + +Link + + + + + 1 + + 1 + + 1 + + + 1 + + + 1 + + 1 + + + + + 1 + + + + + 1 + + + + + + + diff --git a/doc/source/images/node_types.svg b/doc/source/images/node_types.svg new file mode 100644 index 0000000000..8c3ef6e196 --- /dev/null +++ b/doc/source/images/node_types.svg @@ -0,0 +1,115 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Node + + + + + +Dataset + + + +Datatype + + + +Group + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/users_guide/dataspace_custom_types.rst b/doc/source/users_guide/dataspace_custom_types.rst index 065748285b..4d62947820 100644 --- a/doc/source/users_guide/dataspace_custom_types.rst +++ b/doc/source/users_guide/dataspace_custom_types.rst @@ -2,9 +2,15 @@ Using dataspaces with custom types ================================== -When working with user defined types a new type trait to create a dataspace -must be provided if something else than a scalar dataspace should be -returned for this type. +*h5cpp* makes some default assumptions about which dataspace to use for a +particular type + +* for the container templates :cpp:class:`std::vector` and + :cpp:class:`std::array` a :cpp:class:`Simple` dataspace of rank 1 is used +* for everything else :cpp:class:`hdf5::dataspace::Scalar` is used. + +However, in the case of user defined data types the user has to provide +a type trait to tell *h5cpp* what dataspace to use for the type. As an example we consider here a trait for a 3x3 matrix type. The C++ class template for such a class could look like this @@ -21,9 +27,9 @@ template for such a class could look like this const T *data() const; }; -Now as a dataspace for such a type we would like to have a simple dataspace -of shape 3x3 and fixed size. The type trait which must be provided could -look like this +The most natural choice for a dataspace for such a type would be a simple +dataspace of shape 3x3 with fixed dimensions. To achive this we have to provide +a type trait as follows .. code-block:: cpp diff --git a/doc/source/users_guide/files.rst b/doc/source/users_guide/files.rst index 2ea1056bd9..5615560685 100644 --- a/doc/source/users_guide/files.rst +++ b/doc/source/users_guide/files.rst @@ -7,7 +7,8 @@ Working with files .. todo:: - How do we deal with the meta-data cache? + For the time being, *h5cpp* provides no interface to the metadata + cache. This feature might be added in future. The first things you have to do when using HDF5 is to create a new file or open an already existing one. This chapter deals with this very basic topic. @@ -20,7 +21,7 @@ reflecetd by a simple string. Identifying an HDF5 file ======================== -Before opening an HDF5 file we should check whether or not the object +Before opening an HDF5 file we should check whether or not the filesystem object referenced by :cpp:class:`boost::filesystem::path` is indeed an HDF5 file. The simpliest way to achieve this goal is to use the :cpp:func:`hdf5::file::is_hdf5_file` utiltiy function function. @@ -98,7 +99,6 @@ you have to provide a custom file access property list file::AccessFlags::TRUNCATE, fcpl,fapl); - Opening an existing file ======================== diff --git a/doc/source/users_guide/groups.rst b/doc/source/users_guide/groups.rst index e23cdcf187..29603e832f 100644 --- a/doc/source/users_guide/groups.rst +++ b/doc/source/users_guide/groups.rst @@ -18,19 +18,157 @@ can iterate either over the links directly attached to it or over the objects referenced by those links. You can access links via the publik :cpp:member:`hdf5::node::Group::links` member or the objects via the :cpp:member:`hdf5::node::Group::nodes` member. They are instances of -:cpp:class:`hdf5::node::LinkView` and :cpp:class:`hdf5::node::LinkView` +:cpp:class:`hdf5::node::LinkView` and :cpp:class:`hdf5::node::NodeView` respectively. Iterating over nodes ==================== +There are basically two ways how to iterate over the children of a group + +* simpel iteration over the *immediate* children of a group +* or recursive iteration + +The former one works like expected from an STL container either with the +for-each construction + +.. code-block:: cpp + + hdf5::node::Group group = ...; + + for(auto node: group.nodes) + { + ... + } + +or with the standard iterator interface using :cpp:func:`begin` and :cpp:func:`end` +member functions to obtain the iterators. In the following example we +collect all the datasets immediately attached to a particular group + +.. code-block:: cpp + + hdf5::node::Group group = ...; + std::vector datasets; + + std::copy_if(group.nodes.begin(),group.nodes.end(), + std::back_inserter(datasets), + [](const hdf5::node::Node &node) + { + return node.type()==hdf5::node::Type::DATASET; + }); + +For recursive iteration we currently cannot use the for-each construction +but we can definitely use the iterator interface. To extend the above +example we look for all datasets below a particular group and its subgroups + +.. code-block:: cpp + + hdf5::node::Group group = ...; + std::vector datasets; + + std::copy_if(hdf5::node::RecursiveNodeIterator::begin(group), + hdf5::node::RecursiveNodeIterator::end(group), + std::back_inserter(datasets), + [](const hdf5::node::Node &node) + { + return node.type()==hdf5::node::Type::DATASET; + }); + +the major difference here is to use :cpp:class:`RecursiveNodeIterator` and +its static factory functions :cpp:func:`begin` and :cpp:func:`end`. + Iterating over links ==================== -Accessing a groups children -=========================== +Iteration over links works pretty much the same as iteration over nodes. +In order to iterate over the links *immediately* attached to a group we could +use the following code snippet + +.. code-block:: cpp + + hdf5::node::Group group = ...; + + std::for_each(group.links.begin(),group.links.end(), + [](const hdf5::node::Link &link) + { + std::cout< -This chapter provides a brief overview over the design of *h5cpp* and the -underlying assumptins and concepts. It is not a full design documentation -but will introduce all terms required to read the users guide as well as -to read the API documentation. +This chapter will provide you with a short introduction in to HDF5 and its +unerlying concepts as well as *h5cpp*'s approach how to map these concepts +onto C++ classes. + +*h5cpp* namespaces +================== + +In order to use *h5cpp* you need to include the :file:`hdf5.hpp` header file +like this + +.. code-block:: cpp + + #include + +This will pull in everything you need. The entire library is organized in +several namespaces with a top level namespace :cpp:any:`hdf5` + +.. figure:: ../images/hdf5_package_overview.svg + :align: center + :width: 85% + +Every namespace contains classes associated with a particular aspect of the +library and HDF5. + ++----------------------------+------------------------------------------------+ +| namespace | description | ++============================+================================================+ +| :cpp:any:`hdf5::property` | namespace with property list implementations. | ++----------------------------+------------------------------------------------+ +| :cpp:any:`hdf5::attribute` | contains all classes related to attributes and | +| | attribute management. | ++----------------------------+------------------------------------------------+ +| :cpp:any:`hdf5::datatype` | datatypes and related utility functions. The | +| | classes in this namespace should not be | +| | confused with *commited datatypes* which are | +| | indeed nodes. | ++----------------------------+------------------------------------------------+ +| :cpp:any:`hdf5::node` | the most imporant namespace providing all | +| | functionality to deal with nodes. | ++----------------------------+------------------------------------------------+ +| :cpp:any:`hdf5::dataspace` | dataspaces and related utilities | ++----------------------------+------------------------------------------------+ +| :cpp:any:`hdf5::error` | error management and exceptions | ++----------------------------+------------------------------------------------+ +| :cpp:any:`hdf5::file` | everything releated to files | ++----------------------------+------------------------------------------------+ + +The top level namespace contains also some more esoteric classes like +:cpp:class:`hdf5::ObjectId` or :cpp:class:`ObjectHandle` which we can savely +ignore for now. + +The most important classes in the top-level namspace might be +:cpp:class:`hdf5::Dimensions` which is a type alias of the form + +.. code-block:: cpp + + using Dimensions = std::vector; + +in order to get rid of the rather nasty + +.. code-block:: cpp + + hsize_t *dims; +which is heavily used throughout the C-API and thus a rather potential source +for memory leaks. Using :cpp:class:`std::vector` serves the same purpose but +is far less easier to use and avoids problems with memory leaks. + A high level view on HDF5 ========================= -Nodes and Links ---------------- +Nodes, links, and paths +----------------------- An HDF5 tree can be considered a tree of objects connected by links. @@ -27,23 +90,59 @@ An HDF5 tree can be considered a tree of objects connected by links. From a very high level point of view we can assume that there are two kind of objects -1. container objects which can store links to other object +1. container objects (*Group*) which can store links to other object 2. leafe like objects which cannot hold links to other objects + (*Datasets* and *committed Datatypes*). + +Technically, we can refer to all of these objects as *nodes* and thus consider +an HDF5 tree as a collection of *nodes* connected by *links*. *h5cpp* maps +this hierachy of node types rather straight forward onto C++ classes like this + +.. figure:: ../images/node_types.svg + :align: center + :width: 60% + + Hierarchy of node types in *h5cpp*. + +with :cpp:class:`hdf5::node::Node` being the top level class. The other classes +represent the following HDF5 objects: + ++-----------------------------------+----------------------------+ +| *h5cpp* class | HDF5 type | ++===================================+============================+ +| :cpp:class:`hdf5::node::Dataset` | an HDF5 dataset | ++-----------------------------------+----------------------------+ +| :cpp:class:`hdf5::node::Group` | an HDF5 group | ++-----------------------------------+----------------------------+ +| :cpp:class:`hdf5::node::Datatype` | an HDF5 committed datatype | ++-----------------------------------+----------------------------+ + +.. important:: -There is only one container type, a *Group*. For the leaf type of objects there -are only two: *Datasets* and *commited Datatypes*. We can collect all theses -objects under one master term: a *node*. From that point of view an HDF5 file -is a tree of nodes connected by links. + Do not confuse :cpp:class:`hdf5::node::Datatype` with + :cpp:class:`hdf5::datatype::Datatype`. Thoug the former one is constructed + from the latter one, the latter one cannot be accessed via a path. + A committed datatype is a means to store complex datatype within a file. -In addition each node can be augumented with attributes which can store -additional metadata about an objects +Each node can be augumented with attributes which can store meta-data related +to a node .. figure:: ../images/hdf5_attributes.svg :align: center :width: 50% -Attributes can be accessed via their name. Lets have a closer look on the -links. +As attributes are common to all node types the :cpp:class:`Manager` interface +providing access to them is already exposed on the :cpp:class:`Node` class. + +.. figure:: ../images/node.svg + :align: center + :width: 60% + + Attributes are accessd via an attribute manager associated with each + node instance. + +To understand why every instance of :cpp:class:`Node` also stores an instance +of :cpp:class:`hdf5::node::Link` we need to have a closer look on links .. figure:: ../images/hdf5_links.svg :align: center @@ -64,23 +163,19 @@ has taken a rather pragmatic approach how to solve it as will be shown later. Furthermore it is important to note that *Nodes* in an HDF5 file do not have names. This is an unfortunate widespread misconception about HDF5. *Nodes* -can be accessed via a list of links which have names but the *Nodes* -themeselfes have no idea about a name. Which would not even make sense if -we take the ambiguity shown above into account. Which of the three link chains -leading to the *Dataset* instance would be the correct name of the *Dataset*? - - - - -Paths ------ - -An important concept throughout *h5cpp* is a *Path*. A *Path* is used to -reference a particular *Node* within an HDF5 file. Essentially it is the -list of *Link* names used to access an object. -As we have already seen the path to a *Node* is by no means unique. It is -possible to access the same *Node* via different paths. - +can be accessed via a list of named links starting at the root node (root group) +of a file but the *Nodes* themeselfes have no idea about a name. +Which would not even make sense if we take the ambiguity shown above into +account. Which of the three link chains leading to the *Dataset* instance +would be the correct name of the *Dataset*? + +We have chosen a rather pragmatic approach to solve this naming problem +in *h5cpp*. However, before we can discuss this we have to take a little +detour and introduce some more classes. + +The list of link names used to access a particular object is in general +refered to as the path to an object. *h5cpp* thus has a class +:cpp:class:`hdf5::Path` which represents such a list of link names. The string representation of a path looks quite like a Unix filesystem path. It is the list of names separated by `/`. In the above example two possible paths to the *Dataset* would be @@ -90,9 +185,31 @@ paths to the *Dataset* would be /sensors/temp /plot/y +Like for a Unix filesystem path an HDF5 path can be either absolute (starting +with a ``/`` and thus at the root node of a given file) or relative to a +particular node (no ``/`` at the beginning). + +To represent links *h5cpp* provides the :cpp:class:`hdf5::node::Link` class + +.. figure:: ../images/h5cpp_link.svg + :align: center + :width: 50% + +As shown in this UML diagram :cpp:class:`hdf5::node::Link` stores a reference +to the file where the link resides in and the path. In other words, the +:cpp:class:`Link` class is a complete roadmap to a particular node. + +Now, as shown previously, every :cpp:class:`Node` also stores a reference +to an instance of :cpp:class:`Link`. This is the link (in particular the path) +used to access the node. We thus solved the name ambiguity of an object by +defining its name as the link (and thus the path) used to access the object. +With this we are now able to ask a node for its name (path) and expect +the path used to access the node. + Node IDs --------- +~~~~~~~~ +After having solved the naming problem there is still an issue we have to solve. In the figure above we have seen that there are many paths that could lead to the same object. Now, if we do a recursive traversal over all nodes in a file we would face the problem that we get a copy of the same node several times. @@ -100,6 +217,9 @@ One for each path which leads to this object. We thus introduced the concept of a unique ID which is associated with every node. This ID identifies an object uniquely even over file boundaries and remains constant once a node has been created within a particular file. +This ID is represented by the :cpp:class:`hdf5::ObjectId` class. +However, it is for internal use only and you usually do not have to care +about it. .. attention:: @@ -276,77 +396,10 @@ we could map them on points (0),(5) and (11) in a 1D array in memory. * *Selections* make it possible to read only a particular part of a *Dataset* -*h5cpp* a C++ wrapper for *HDF5* -================================ - -Including a single header file is enough to use *h5cpp* in your code - -.. code-block:: cpp - - #include - -This will pull in everything you need. The entire library is organized in -several namespaces with a top level namespace :cpp:any:`hdf5` -.. figure:: ../images/hdf5_package_overview.svg - :align: center - :width: 85% -The most important classes in the top-level namspace might be -:cpp:class:`hdf5::Dimensions` and :cpp:class:`hdf5::Path`. -The former one is merely a type alias - -.. code-block:: cpp - - using Dimensions = std::vector; - -in order to get rid of the rather nasty - -.. code-block:: cpp - - hsize_t *dims; - -which is heavily used throughout the C-API and thus a rather potential source -for memory leaks. Using :cpp:class:`std::vector` for this thus solves a lot -of problems and makes life just easy. - -:cpp:class:`Path` as the name already suggests represents an HDF5 path to -reference a node within a file. We will discuss this class in more detail later. - -+----------------------------+------------------------------------------------+ -| namespace | description | -+============================+================================================+ -| :cpp:any:`hdf5::property` | namespace with property list implementations. | -+----------------------------+------------------------------------------------+ -| :cpp:any:`hdf5::attribute` | contains all classes related to attributes and | -| | attribute management. | -+----------------------------+------------------------------------------------+ -| :cpp:any:`hdf5::datatype` | datatypes and related utility functions. The | -| | classes in this namespace should not be | -| | confused with *commited datatypes* which are | -| | indeed nodes. | -+----------------------------+------------------------------------------------+ -| :cpp:any:`hdf5::node` | the most imporant namespace providing all | -| | functionality to deal with nodes. | -+----------------------------+------------------------------------------------+ -| :cpp:any:`hdf5::dataspace` | dataspaces and related utilities | -+----------------------------+------------------------------------------------+ -| :cpp:any:`hdf5::error` | error management and exceptions | -+----------------------------+------------------------------------------------+ -| :cpp:any:`hdf5::file` | everything releated to files | -+----------------------------+------------------------------------------------+ -Nodes ------ -All classes representing nodes are located in the :cpp:any:`hdf5::node` -namespace. The most prominent ones are -.. figure:: ../images/hdf5_node_types.svg - :align: center - :width: 75% - -As you can see all node classes have a common parent class -:cpp:class:`hdf5::node::Node`.