iodump.h 8.03 KB
Newer Older
1
#pragma once
Stefan Weßel's avatar
ic  
Stefan Weßel committed
2

3
#include <fmt/format.h>
4
#include <hdf5.h>
Lukas Weber's avatar
Lukas Weber committed
5
#include <string>
6
#include <sys/stat.h>
7
#include <vector>
Stefan Weßel's avatar
ic  
Stefan Weßel committed
8

9 10
namespace loadl {

11 12 13
class iodump_exception : public std::exception {
private:
	std::string message_;
14

15
public:
16
	iodump_exception(const std::string &msg);
17

18
	const char *what() const noexcept override;
Stefan Weßel's avatar
ic  
Stefan Weßel committed
19 20
};

21 22 23 24
// This thing is a wrapper around an HDF5 file which can do both reading and
// writing depending on how you open it. If you write to a read only file,
// there will be an error probably.
class iodump {
25 26 27 28 29 30
private:
	// helper class to make sure those pesky HDF5 hid_t handles are always closed
	class h5_handle {
	public:
		h5_handle(hid_t handle, herr_t (*closer)(hid_t));
		~h5_handle();
31 32
		h5_handle(h5_handle &) = delete;
		h5_handle(h5_handle &&) noexcept;
33
		hid_t operator*();
34

35 36 37 38
	private:
		herr_t (*closer_)(hid_t);
		hid_t handle_;
	};
39

40 41 42 43 44 45 46 47
	iodump(std::string filename, hid_t h5_file);

	const std::string filename_;
	const hid_t h5_file_;

	// TODO: make these variable if necessary
	static const H5Z_filter_t compression_filter_ = H5Z_FILTER_DEFLATE;
	static const size_t chunk_size_ = 1000;
48

49 50 51
	template<typename T>
	constexpr static hid_t h5_datatype();

52
public:
53 54 55
	// Wrapper around the concept of a HDF5 group.
	// You can list over the group elements by using the iterators.
	class group {
56 57 58 59 60
	public:
		struct iterator {
			iterator(hid_t group, uint64_t idx);
			std::string operator*();
			iterator operator++();
61
			bool operator!=(const iterator &b);
62 63 64 65

			const hid_t group_;
			uint64_t idx_;
		};
66 67 68 69

		group(hid_t parent, const std::string &path);
		group(const group &) =
		    delete; // did you know this is a thing? Very handy in preventing errors.
70 71 72
		~group();
		iterator begin() const;
		iterator end() const;
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

		template<class T>
		void write(const std::string &name, const std::vector<T> &data) const;
		template<class T>
		void write(const std::string &name,
		           const T &value) const; // this is meant for atomic datatypes like int and double

		// insert_back inserts data at the end of the dataset given by name, extending it if
		// necessary. This only works in read/write mode.
		template<class T>
		void insert_back(const std::string &name, const std::vector<T> &data) const;

		template<class T>
		void read(const std::string &name, std::vector<T> &data) const;
		template<class T>
		void read(const std::string &name, T &value) const;

		size_t get_extent(const std::string &name) const;
91 92 93

		group open_group(const std::string &path) const; // this works like the cd command

94 95
		bool exists(
		    const std::string &path) const; // checks whether an object in the dump file exists
96
	private:
97 98 99
		hid_t group_;

		// chunk_size == 0 means contiguous storage
100 101 102 103 104
		// if the dataset already exists, we try to overwrite it. However it must have the same
		// extent for that to work.
		iodump::h5_handle create_dataset(const std::string &name, hid_t datatype, hsize_t size,
		                                 hsize_t chunk_size, H5Z_filter_t compression_filter,
		                                 bool unlimited) const;
105
	};
106

107
	// delete what was there and create a new file for writing
108
	static iodump create(const std::string &filename);
109

110 111
	static iodump open_readonly(const std::string &filename);
	static iodump open_readwrite(const std::string &filename);
112

113
	group get_root();
114

115 116
	// TODO: once the intel compiler can do guaranteed copy elision,
	// please uncomment this line! and be careful about bugs!
117
	// iodump(iodump &) = delete;
118 119
	~iodump();

120
	friend class group;
Stefan Weßel's avatar
ic  
Stefan Weßel committed
121
};
122 123

template<typename T>
124
constexpr hid_t iodump::h5_datatype() {
Lukas Weber's avatar
Lukas Weber committed
125 126
	if(typeid(T) == typeid(char))
		return H5T_NATIVE_CHAR;
Lukas Weber's avatar
Lukas Weber committed
127 128
	if(typeid(T) == typeid(signed char))
		return H5T_NATIVE_SCHAR;
129 130 131 132 133 134 135 136
	if(typeid(T) == typeid(int))
		return H5T_NATIVE_INT;
	if(typeid(T) == typeid(short))
		return H5T_NATIVE_SHORT;
	if(typeid(T) == typeid(long))
		return H5T_NATIVE_LONG;
	if(typeid(T) == typeid(long long))
		return H5T_NATIVE_LLONG;
Lukas Weber's avatar
Lukas Weber committed
137 138
	if(typeid(T) == typeid(unsigned char))
		return H5T_NATIVE_UCHAR;
139 140 141 142 143 144 145 146 147 148 149 150 151
	if(typeid(T) == typeid(unsigned int))
		return H5T_NATIVE_UINT;
	if(typeid(T) == typeid(unsigned short))
		return H5T_NATIVE_USHORT;
	if(typeid(T) == typeid(unsigned long))
		return H5T_NATIVE_ULONG;
	if(typeid(T) == typeid(unsigned long long))
		return H5T_NATIVE_ULLONG;
	if(typeid(T) == typeid(float))
		return H5T_NATIVE_FLOAT;
	if(typeid(T) == typeid(double))
		return H5T_NATIVE_DOUBLE;

152
	throw std::runtime_error{fmt::format("unsupported datatype: {}", typeid(T).name())};
153
	// If you run into this error, you probably tried to write a non-primitive datatype
Lukas Weber's avatar
Lukas Weber committed
154 155
	// to a dump file. See the other classes’s checkpointing functions for an example of
	// what to do.
156
	// ... or it is a native datatype I forgot to add. Then add it.
157 158 159
}

template<class T>
160
void iodump::group::write(const std::string &name, const std::vector<T> &data) const {
161
	int chunk_size = 0;
162
	H5Z_filter_t compression_filter = 0;
163

164
	// no compression and chunking unless dataset is big enough
165
	if(data.size() >= chunk_size_) {
166
		chunk_size = iodump::chunk_size_;
167
		compression_filter = iodump::compression_filter_;
168 169
	}

170 171 172 173
	h5_handle dataset{
	    create_dataset(name, h5_datatype<T>(), data.size(), chunk_size, compression_filter, false)};
	herr_t status =
	    H5Dwrite(*dataset, h5_datatype<T>(), H5S_ALL, H5S_ALL, H5P_DEFAULT, data.data());
174 175 176 177 178
	if(status < 0)
		throw iodump_exception{"H5Dwrite"};
}

template<class T>
179
void iodump::group::write(const std::string &name, const T &value) const {
180 181 182 183
	// I hope nobody copies a lot of small values...
	write(name, std::vector<T>{value});
}

184
template<>
185
inline void iodump::group::write(const std::string &name, const std::string &value) const {
186 187 188
	write(name, std::vector<char>{value.begin(), value.end()});
}

189 190
template<class T>
void iodump::group::insert_back(const std::string &name, const std::vector<T> &data) const {
191
	// If the dataset does not exist, we create a new unlimited one with 0 extent.
192 193
	if(!exists(name)) {
		create_dataset(name, h5_datatype<T>(), 0, chunk_size_, compression_filter_, true);
194 195
	}

196
	h5_handle dataset{H5Dopen2(group_, name.c_str(), H5P_DEFAULT), H5Dclose};
197

198
	hsize_t mem_size = data.size();
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
	h5_handle memspace{H5Screate_simple(1, &mem_size, nullptr), H5Sclose};
	int size;
	herr_t status;

	{ // limit the scope of the dataspace handle
		h5_handle dataspace{H5Dget_space(*dataset), H5Sclose};

		size = H5Sget_simple_extent_npoints(*dataspace);
		if(size < 0) {
			throw iodump_exception{"H5Sget_simple_extent_npoints"};
		}

		if(data.size() > 0) {
			hsize_t new_size = size + data.size();
			status = H5Dset_extent(*dataset, &new_size);
			if(status < 0) {
				throw iodump_exception{"H5Pset_extent"};
			}
		}
	} // because it will be reopened after this
219

220
	h5_handle dataspace{H5Dget_space(*dataset), H5Sclose};
221 222 223 224

	// select the hyperslab of the extended area
	hsize_t pos = size;
	hsize_t extent = data.size();
225
	status = H5Sselect_hyperslab(*dataspace, H5S_SELECT_SET, &pos, nullptr, &extent, nullptr);
226 227 228
	if(status < 0)
		throw iodump_exception{"H5Sselect_hyperslap"};

229
	status = H5Dwrite(*dataset, h5_datatype<T>(), *memspace, *dataspace, H5P_DEFAULT, data.data());
230 231 232 233 234
	if(status < 0)
		throw iodump_exception{"H5Dwrite"};
}

template<class T>
235
void iodump::group::read(const std::string &name, std::vector<T> &data) const {
236 237
	h5_handle dataset{H5Dopen2(group_, name.c_str(), H5P_DEFAULT), H5Dclose};
	h5_handle dataspace{H5Dget_space(*dataset), H5Sclose};
238

239
	int size = H5Sget_simple_extent_npoints(*dataspace); // rank > 1 will get flattened when loaded.
240 241 242
	if(size < 0)
		throw iodump_exception{"H5Sget_simple_extent_npoints"};
	data.resize(size);
243

244 245 246 247
	if(size == 0) { // handle empty dataset correctly
		return;
	}

248 249
	herr_t status =
	    H5Dread(*dataset, h5_datatype<T>(), H5S_ALL, H5P_DEFAULT, H5P_DEFAULT, data.data());
250 251 252 253
	if(status < 0)
		throw iodump_exception{"H5Dread"};
}

254
template<>
255
inline void iodump::group::read(const std::string &name, std::string &value) const {
256 257 258 259 260
	std::vector<char> buf;
	read(name, buf);
	value = std::string{buf.begin(), buf.end()};
}

261
template<class T>
262
void iodump::group::read(const std::string &name, T &value) const {
263 264
	std::vector<T> buf;
	read(name, buf);
Lukas Weber's avatar
Lukas Weber committed
265
	assert(buf.size() == 1);
266 267
	value = buf.at(0);
}
268 269 270

// utility
bool file_exists(const std::string &path);
271
}