Skip to content

Commit b43e93b

Browse files
committed
rebase & clean up HDF5DataLayer Prefetch
Adapt HDF5DataLayer Prefetch to BVLC#2836
1 parent 87b27d1 commit b43e93b

File tree

5 files changed

+122
-154
lines changed

5 files changed

+122
-154
lines changed

include/caffe/util/hdf5.hpp

+25
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,31 @@
1010

1111
namespace caffe {
1212

13+
/**
14+
* @brief Shapes a Blob to read "num" rows of HDF5 data. If num == -1, take
15+
* the num of the HDF5 dataset.
16+
*
17+
* @param file_id the HDF5 file handle
18+
* @param dataset_name the name of the HDF5 dataset to read
19+
* @param num the number of rows to read: either num >= 0,
20+
* or num == -1 for the number of rows in the HDF5 dataset
21+
* @param blob the Blob to shape
22+
*
23+
* The HDF5 dataset could be N(>=1) dimensions as long as N doesn't exceed
24+
* Blob's maximum dimension.
25+
*/
26+
template <typename Dtype>
27+
void HDF5PrepareBlob(hid_t file_id, const char* dataset_name, int num,
28+
Blob<Dtype>* blob);
29+
30+
/**
31+
* @brief Reads rows [offset, offset + data->num() - 1] into Blob* data, which
32+
* must have been pre-shaped using HDF5PrepareBlob (or otherwise).
33+
*/
34+
template <typename Dtype>
35+
int HDF5ReadRowsToBlob(hid_t file_id, const char* dataset_name,
36+
int h5_offset, int blob_offset, Blob<Dtype>* blob);
37+
1338
template <typename Dtype>
1439
void hdf5_load_nd_dataset_helper(
1540
hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,

include/caffe/util/io.hpp

-28
Original file line numberDiff line numberDiff line change
@@ -136,34 +136,6 @@ cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color);
136136

137137
void CVMatToDatum(const cv::Mat& cv_img, Datum* datum);
138138

139-
/**
140-
* @brief Shapes a Blob to read "num" rows of HDF5 data. If num == -1, take
141-
* the num of the HDF5 dataset.
142-
*
143-
* @param file_id the HDF5 file handle
144-
* @param dataset_name the name of the HDF5 dataset to read
145-
* @param num the number of rows to read: either num >= 0,
146-
* or num == -1 for the number of rows in the HDF5 dataset
147-
* @param blob the Blob to shape
148-
*
149-
* The HDF5 dataset could be N(>=1) dimensions as long as N doesn't exceed Blob's maximum dimension.
150-
*/
151-
template <typename Dtype>
152-
void HDF5PrepareBlob(hid_t file_id, const char* dataset_name, int num,
153-
Blob<Dtype>* blob);
154-
155-
/**
156-
* @brief Reads rows [offset, offset + data->num() - 1] into Blob* data, which
157-
* must have been pre-shaped using HDF5PrepareBlob (or otherwise).
158-
*/
159-
template <typename Dtype>
160-
int HDF5ReadRowsToBlob(hid_t file_id, const char* dataset_name,
161-
int h5_offset, int blob_offset, Blob<Dtype>* blob);
162-
163-
template <typename Dtype>
164-
void hdf5_save_nd_dataset(
165-
const hid_t file_id, const string& dataset_name, const Blob<Dtype>& blob);
166-
167139
} // namespace caffe
168140

169141
#endif // CAFFE_UTIL_IO_H_

src/caffe/layers/hdf5_data_layer.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88
#include <string>
99
#include <vector>
1010

11-
#include "hdf5.h"
12-
#include "hdf5_hl.h"
1311
#include "stdint.h"
1412

1513
#include "caffe/data_layers.hpp"
1614
#include "caffe/layer.hpp"
1715
#include "caffe/util/hdf5.hpp"
16+
#include "caffe/util/io.hpp"
1817

1918
namespace caffe {
2019

src/caffe/util/hdf5.cpp

+96-6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,98 @@
55

66
namespace caffe {
77

8+
// Verifies format of data stored in HDF5 file and reshapes blob accordingly.
9+
template <typename Dtype>
10+
void HDF5PrepareBlob(hid_t file_id, const char* dataset_name, int num,
11+
Blob<Dtype>* blob) {
12+
// Verify that the dataset exists.
13+
CHECK(H5LTfind_dataset(file_id, dataset_name))
14+
<< "Failed to find HDF5 dataset " << dataset_name;
15+
herr_t status;
16+
int ndims;
17+
CHECK_LE(0, H5LTget_dataset_ndims(file_id, dataset_name, &ndims))
18+
<< "Failed to get dataset ndims for " << dataset_name;
19+
CHECK_GE(ndims, 1) << "HDF5 dataset must have at least 1 dimension.";
20+
CHECK_LE(ndims, kMaxBlobAxes)
21+
<< "HDF5 dataset must have at most "
22+
<< kMaxBlobAxes << " dimensions, to fit in a Blob.";
23+
24+
// Verify that the data format is what we expect: float or double.
25+
std::vector<hsize_t> dims(ndims);
26+
H5T_class_t h5_class;
27+
status = H5LTget_dataset_info(
28+
file_id, dataset_name, dims.data(), &h5_class, NULL);
29+
CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name;
30+
CHECK_EQ(h5_class, H5T_FLOAT) << "Expected float or double data";
31+
CHECK_GE(num, -1) << "num must be -1 (to indicate the number of rows"
32+
"in the dataset) or non-negative.";
33+
34+
vector<int> blob_dims(dims.size());
35+
blob_dims[0] = (num == -1) ? dims[0] : num;
36+
for (int i = 1; i < dims.size(); ++i) {
37+
blob_dims[i] = dims[i];
38+
}
39+
blob->Reshape(blob_dims);
40+
}
41+
42+
template
43+
void HDF5PrepareBlob<float>(hid_t file_id, const char* dataset_name, int num,
44+
Blob<float>* blob);
45+
46+
template
47+
void HDF5PrepareBlob<double>(hid_t file_id, const char* dataset_name, int num,
48+
Blob<double>* blob);
49+
50+
template <typename Dtype>
51+
int HDF5ReadRowsToBlob(hid_t file_id, const char* dataset_name,
52+
int h5_offset, int blob_offset, Blob<Dtype>* blob) {
53+
int ndims;
54+
CHECK_LE(0, H5LTget_dataset_ndims(file_id, dataset_name, &ndims))
55+
<< "Failed to get dataset ndims for " << dataset_name;
56+
std::vector<hsize_t> dims(ndims);
57+
H5T_class_t h5_class;
58+
herr_t status = H5LTget_dataset_info(
59+
file_id, dataset_name, dims.data(), &h5_class, NULL);
60+
CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name;
61+
CHECK_EQ(h5_class, H5T_FLOAT) << "Expected float or double data";
62+
hid_t dataset = H5Dopen2(file_id, dataset_name, H5P_DEFAULT);
63+
hid_t dataspace = H5Dget_space(dataset);
64+
vector<hsize_t> slab_start(ndims, 0);
65+
slab_start[0] = h5_offset;
66+
const int num_rows_available = dims[0] - h5_offset;
67+
const int num_rows = std::min(blob->num() - blob_offset, num_rows_available);
68+
if (num_rows <= 0) {
69+
return 0;
70+
}
71+
vector<hsize_t> slab_count(ndims, num_rows);
72+
for (int i = 1; i < ndims; ++i) {
73+
slab_count[i] = dims[i];
74+
}
75+
status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET,
76+
slab_start.data(), NULL, slab_count.data(), NULL);
77+
CHECK_GE(status, 0) << "Failed to select slab.";
78+
hid_t memspace = H5Screate_simple(ndims, slab_count.data(), NULL);
79+
const int data_size = blob->count() / blob->num();
80+
// separate multiplication to avoid a possible overflow
81+
const int blob_offset_size = blob_offset * data_size;
82+
hid_t type = (sizeof(Dtype) == 4) ? H5T_NATIVE_FLOAT : H5T_NATIVE_DOUBLE;
83+
status = H5Dread(dataset, type, memspace, dataspace, H5P_DEFAULT,
84+
blob->mutable_cpu_data() + blob_offset_size);
85+
CHECK_GE(status, 0) << "Failed to read dataset " << dataset_name;
86+
H5Dclose(dataset);
87+
H5Sclose(dataspace);
88+
H5Sclose(memspace);
89+
return num_rows;
90+
}
91+
92+
template
93+
int HDF5ReadRowsToBlob<float>(hid_t file_id, const char* dataset_name,
94+
int h5_offset, int blob_offset, Blob<float>* data);
95+
96+
template
97+
int HDF5ReadRowsToBlob<double>(hid_t file_id, const char* dataset_name,
98+
int h5_offset, int blob_offset, Blob<double>* data);
99+
8100
// Verifies format of data stored in HDF5 file and reshapes blob accordingly.
9101
template <typename Dtype>
10102
void hdf5_load_nd_dataset_helper(
@@ -59,7 +151,7 @@ void hdf5_save_nd_dataset<float>(
59151
const hid_t file_id, const string& dataset_name, const Blob<float>& blob,
60152
bool write_diff) {
61153
int num_axes = blob.num_axes();
62-
hsize_t *dims = new hsize_t[num_axes];
154+
std::vector<hsize_t> dims(num_axes);
63155
for (int i = 0; i < num_axes; ++i) {
64156
dims[i] = blob.shape(i);
65157
}
@@ -70,17 +162,16 @@ void hdf5_save_nd_dataset<float>(
70162
data = blob.cpu_data();
71163
}
72164
herr_t status = H5LTmake_dataset_float(
73-
file_id, dataset_name.c_str(), num_axes, dims, data);
165+
file_id, dataset_name.c_str(), num_axes, dims.data(), data);
74166
CHECK_GE(status, 0) << "Failed to make float dataset " << dataset_name;
75-
delete[] dims;
76167
}
77168

78169
template <>
79170
void hdf5_save_nd_dataset<double>(
80171
hid_t file_id, const string& dataset_name, const Blob<double>& blob,
81172
bool write_diff) {
82173
int num_axes = blob.num_axes();
83-
hsize_t *dims = new hsize_t[num_axes];
174+
std::vector<hsize_t> dims(num_axes);
84175
for (int i = 0; i < num_axes; ++i) {
85176
dims[i] = blob.shape(i);
86177
}
@@ -91,9 +182,8 @@ void hdf5_save_nd_dataset<double>(
91182
data = blob.cpu_data();
92183
}
93184
herr_t status = H5LTmake_dataset_double(
94-
file_id, dataset_name.c_str(), num_axes, dims, data);
185+
file_id, dataset_name.c_str(), num_axes, dims.data(), data);
95186
CHECK_GE(status, 0) << "Failed to make double dataset " << dataset_name;
96-
delete[] dims;
97187
}
98188

99189
string hdf5_load_string(hid_t loc_id, const string& dataset_name) {

src/caffe/util/io.cpp

-118
Original file line numberDiff line numberDiff line change
@@ -228,122 +228,4 @@ void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) {
228228
datum->set_data(buffer);
229229
}
230230

231-
// Verifies format of data stored in HDF5 file and reshapes blob accordingly.
232-
template <typename Dtype>
233-
void HDF5PrepareBlob(hid_t file_id, const char* dataset_name, int num,
234-
Blob<Dtype>* blob) {
235-
// Verify that the dataset exists.
236-
CHECK(H5LTfind_dataset(file_id, dataset_name))
237-
<< "Failed to find HDF5 dataset " << dataset_name;
238-
herr_t status;
239-
int ndims;
240-
CHECK_LE(0, H5LTget_dataset_ndims(file_id, dataset_name, &ndims))
241-
<< "Failed to get dataset ndims for " << dataset_name;
242-
CHECK_GE(ndims, 1) << "HDF5 dataset must have at least 1 dimension.";
243-
CHECK_LE(ndims, kMaxBlobAxes)
244-
<< "HDF5 dataset must have at most "
245-
<< kMaxBlobAxes << " dimensions, to fit in a Blob.";
246-
247-
// Verify that the data format is what we expect: float or double.
248-
std::vector<hsize_t> dims(ndims);
249-
H5T_class_t h5_class;
250-
status = H5LTget_dataset_info(
251-
file_id, dataset_name, dims.data(), &h5_class, NULL);
252-
CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name;
253-
CHECK_EQ(h5_class, H5T_FLOAT) << "Expected float or double data";
254-
CHECK_GE(num, -1) << "num must be -1 (to indicate the number of rows"
255-
"in the dataset) or non-negative.";
256-
257-
vector<int> blob_dims(dims.size());
258-
blob_dims[0] = (num == -1) ? dims[0] : num;
259-
for (int i = 1; i < dims.size(); ++i) {
260-
blob_dims[i] = dims[i];
261-
}
262-
blob->Reshape(blob_dims);
263-
}
264-
265-
template
266-
void HDF5PrepareBlob<float>(hid_t file_id, const char* dataset_name, int num,
267-
Blob<float>* blob);
268-
269-
template
270-
void HDF5PrepareBlob<double>(hid_t file_id, const char* dataset_name, int num,
271-
Blob<double>* blob);
272-
273-
template <typename Dtype>
274-
int HDF5ReadRowsToBlob(hid_t file_id, const char* dataset_name,
275-
int h5_offset, int blob_offset, Blob<Dtype>* blob) {
276-
int ndims;
277-
CHECK_LE(0, H5LTget_dataset_ndims(file_id, dataset_name, &ndims))
278-
<< "Failed to get dataset ndims for " << dataset_name;
279-
std::vector<hsize_t> dims(ndims);
280-
H5T_class_t h5_class;
281-
herr_t status = H5LTget_dataset_info(
282-
file_id, dataset_name, dims.data(), &h5_class, NULL);
283-
CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name;
284-
CHECK_EQ(h5_class, H5T_FLOAT) << "Expected float or double data";
285-
hid_t dataset = H5Dopen2(file_id, dataset_name, H5P_DEFAULT);
286-
hid_t dataspace = H5Dget_space(dataset);
287-
vector<hsize_t> slab_start(ndims, 0);
288-
slab_start[0] = h5_offset;
289-
const int num_rows_available = dims[0] - h5_offset;
290-
const int num_rows = std::min(blob->num() - blob_offset, num_rows_available);
291-
if (num_rows <= 0) {
292-
return 0;
293-
}
294-
vector<hsize_t> slab_count(ndims, num_rows);
295-
for (int i = 1; i < ndims; ++i) {
296-
slab_count[i] = dims[i];
297-
}
298-
status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET,
299-
slab_start.data(), NULL, slab_count.data(), NULL);
300-
CHECK_GE(status, 0) << "Failed to select slab.";
301-
hid_t memspace = H5Screate_simple(ndims, slab_count.data(), NULL);
302-
const int data_size = blob->count() / blob->num();
303-
// separate multiplication to avoid a possible overflow
304-
const int blob_offset_size = blob_offset * data_size;
305-
hid_t type = (sizeof(Dtype) == 4) ? H5T_NATIVE_FLOAT : H5T_NATIVE_DOUBLE;
306-
status = H5Dread(dataset, type, memspace, dataspace, H5P_DEFAULT,
307-
blob->mutable_cpu_data() + blob_offset_size);
308-
CHECK_GE(status, 0) << "Failed to read dataset " << dataset_name;
309-
H5Dclose(dataset);
310-
H5Sclose(dataspace);
311-
H5Sclose(memspace);
312-
return num_rows;
313-
}
314-
315-
template
316-
int HDF5ReadRowsToBlob<float>(hid_t file_id, const char* dataset_name,
317-
int h5_offset, int blob_offset, Blob<float>* data);
318-
319-
template
320-
int HDF5ReadRowsToBlob<double>(hid_t file_id, const char* dataset_name,
321-
int h5_offset, int blob_offset, Blob<double>* data);
322-
323-
template <>
324-
void hdf5_save_nd_dataset<float>(
325-
const hid_t file_id, const string& dataset_name, const Blob<float>& blob) {
326-
hsize_t dims[HDF5_NUM_DIMS];
327-
dims[0] = blob.num();
328-
dims[1] = blob.channels();
329-
dims[2] = blob.height();
330-
dims[3] = blob.width();
331-
herr_t status = H5LTmake_dataset_float(
332-
file_id, dataset_name.c_str(), HDF5_NUM_DIMS, dims, blob.cpu_data());
333-
CHECK_GE(status, 0) << "Failed to make float dataset " << dataset_name;
334-
}
335-
336-
template <>
337-
void hdf5_save_nd_dataset<double>(
338-
const hid_t file_id, const string& dataset_name, const Blob<double>& blob) {
339-
hsize_t dims[HDF5_NUM_DIMS];
340-
dims[0] = blob.num();
341-
dims[1] = blob.channels();
342-
dims[2] = blob.height();
343-
dims[3] = blob.width();
344-
herr_t status = H5LTmake_dataset_double(
345-
file_id, dataset_name.c_str(), HDF5_NUM_DIMS, dims, blob.cpu_data());
346-
CHECK_GE(status, 0) << "Failed to make double dataset " << dataset_name;
347-
}
348-
349231
} // namespace caffe

0 commit comments

Comments
 (0)