Skip to content

Commit 325f5a0

Browse files
authored
* Pushing for build tests * Contrib files * Removing deprecated checks
1 parent 1bf0d8d commit 325f5a0

17 files changed

+281
-266
lines changed

apex/contrib/csrc/groupbn/batch_norm.cu

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include <cuda.h>
1010

11+
#include "compat.h"
12+
1113
#define cudaCheckErrors(msg) \
1214
do { \
1315
cudaError_t __err = cudaGetLastError(); \
@@ -72,7 +74,7 @@ at::Tensor nhwc_bn_fwd_train(
7274
const int C = x.size(3);
7375

7476
// generating new magic number and use that for sync
75-
int* magic = magic_tensor.data<int>();
77+
int* magic = magic_tensor.DATA_PTR<int>();
7678
*magic = (*magic + 1) & 0xff;
7779

7880
// Allocate output tensor
@@ -87,13 +89,13 @@ at::Tensor nhwc_bn_fwd_train(
8789
bn->setConstants(momentum, epsilon);
8890

8991
// set pointers within the wrapper
90-
bn->setInputOutputPointers(x.data<at::Half>(),
92+
bn->setInputOutputPointers(x.DATA_PTR<at::Half>(),
9193
nullptr,
92-
y.data<at::Half>(),
94+
y.DATA_PTR<at::Half>(),
9395
nullptr);
9496

95-
bn->setWeightPointers({scale.data<float>(), bias.data<float>()}, {nullptr, nullptr});
96-
bn->setParameterPointers({running_mean.data<float>(), running_inv_var.data<float>()});
97+
bn->setWeightPointers({scale.DATA_PTR<float>(), bias.DATA_PTR<float>()}, {nullptr, nullptr});
98+
bn->setParameterPointers({running_mean.DATA_PTR<float>(), running_inv_var.DATA_PTR<float>()});
9799

98100
// deal with workspace(s)
99101
auto workspace_bytes = bn->numWorkspaceBytes();
@@ -114,12 +116,12 @@ at::Tensor nhwc_bn_fwd_train(
114116
Workspace ws(total_workspace_bytes);
115117

116118
std::vector<void *> workspace;
117-
workspace.push_back(minibatch_mean.data<float>());
118-
workspace.push_back(minibatch_inv_var.data<float>());
119+
workspace.push_back(minibatch_mean.DATA_PTR<float>());
120+
workspace.push_back(minibatch_inv_var.DATA_PTR<float>());
119121

120122
auto stream = at::cuda::getCurrentCUDAStream().stream();
121123
const int retired_cta_bytes = workspace_bytes[2];
122-
void* retired_ctas = ret_cta.data<uint8_t>();
124+
void* retired_ctas = ret_cta.DATA_PTR<uint8_t>();
123125
assert(ret_cta.size(0)>=retired_cta_bytes);
124126
workspace.push_back(retired_ctas);
125127

@@ -165,13 +167,13 @@ at::Tensor nhwc_bn_fwd_eval(
165167
bn->setConstants(momentum, epsilon);
166168

167169
// set pointers within the wrapper
168-
bn->setInputOutputPointers(x.data<at::Half>(),
170+
bn->setInputOutputPointers(x.DATA_PTR<at::Half>(),
169171
nullptr,
170-
y.data<at::Half>(),
172+
y.DATA_PTR<at::Half>(),
171173
nullptr);
172174

173-
bn->setWeightPointers({scale.data<float>(), bias.data<float>()}, {nullptr, nullptr});
174-
bn->setParameterPointers({running_mean.data<float>(), running_inv_var.data<float>()});
175+
bn->setWeightPointers({scale.DATA_PTR<float>(), bias.DATA_PTR<float>()}, {nullptr, nullptr});
176+
bn->setParameterPointers({running_mean.DATA_PTR<float>(), running_inv_var.DATA_PTR<float>()});
175177

176178
// deal with workspace(s)
177179
auto workspace_bytes = bn->numWorkspaceBytes();
@@ -197,7 +199,7 @@ at::Tensor nhwc_bn_fwd_eval(
197199

198200
auto stream = at::cuda::getCurrentCUDAStream().stream();
199201
const int retired_cta_bytes = workspace_bytes[2];
200-
void* retired_ctas = ret_cta.data<uint8_t>();
202+
void* retired_ctas = ret_cta.DATA_PTR<uint8_t>();
201203
assert(ret_cta.size(0)>=retired_cta_bytes);
202204
workspace.push_back(retired_ctas);
203205

@@ -244,7 +246,7 @@ std::vector<at::Tensor> nhwc_bn_bwd(
244246
const int C = x.size(3);
245247

246248
// generating new magic number and use that for sync
247-
int* magic = magic_tensor.data<int>();
249+
int* magic = magic_tensor.DATA_PTR<int>();
248250
*magic = (*magic + 1) & 0xff;
249251

250252
// outputs
@@ -264,13 +266,13 @@ std::vector<at::Tensor> nhwc_bn_bwd(
264266
bn->setConstants(momentum, epsilon);
265267

266268
// set pointers within the wrapper
267-
bn->setInputOutputPointers(x.data<at::Half>(),
268-
x_grad.data<at::Half>(),
269+
bn->setInputOutputPointers(x.DATA_PTR<at::Half>(),
270+
x_grad.DATA_PTR<at::Half>(),
269271
nullptr,
270-
dy.data<at::Half>());
272+
dy.DATA_PTR<at::Half>());
271273

272-
bn->setWeightPointers({scale.data<float>(), bias.data<float>()}, {scale_grad.data<float>(), bias_grad.data<float>()});
273-
bn->setParameterPointers({running_mean.data<float>(), running_inv_var.data<float>()});
274+
bn->setWeightPointers({scale.DATA_PTR<float>(), bias.DATA_PTR<float>()}, {scale_grad.DATA_PTR<float>(), bias_grad.DATA_PTR<float>()});
275+
bn->setParameterPointers({running_mean.DATA_PTR<float>(), running_inv_var.DATA_PTR<float>()});
274276

275277
// deal with workspace(s)
276278
auto workspace_bytes = bn->numWorkspaceBytes();
@@ -291,12 +293,12 @@ std::vector<at::Tensor> nhwc_bn_bwd(
291293
Workspace ws(total_workspace_bytes);
292294

293295
std::vector<void *> workspace;
294-
workspace.push_back(minibatch_mean.data<float>());
295-
workspace.push_back(minibatch_inv_var.data<float>());
296+
workspace.push_back(minibatch_mean.DATA_PTR<float>());
297+
workspace.push_back(minibatch_inv_var.DATA_PTR<float>());
296298

297299
auto stream = at::cuda::getCurrentCUDAStream().stream();
298300
const int retired_cta_bytes = workspace_bytes[2];
299-
void* retired_ctas = ret_cta.data<uint8_t>();
301+
void* retired_ctas = ret_cta.DATA_PTR<uint8_t>();
300302
assert(ret_cta.size(0)>=retired_cta_bytes);
301303
workspace.push_back(retired_ctas);
302304

apex/contrib/csrc/groupbn/batch_norm_add_relu.cu

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include <cuda.h>
1010

11+
#include "compat.h"
12+
1113
//FIXME move the common stuff to common h file
1214
#define cudaCheckErrors(msg) \
1315
do { \
@@ -74,7 +76,7 @@ at::Tensor nhwc_bn_addrelu_fwd_train(
7476
const int C = x.size(3);
7577

7678
// generating new magic number and use that for sync
77-
int* magic = magic_tensor.data<int>();
79+
int* magic = magic_tensor.DATA_PTR<int>();
7880
*magic = (*magic + 1) & 0xff;
7981

8082
// Allocate output tensor
@@ -89,15 +91,15 @@ at::Tensor nhwc_bn_addrelu_fwd_train(
8991
bn->setConstants(momentum, epsilon);
9092

9193
// set pointers within the wrapper
92-
bn->setInputOutputPointers(x.data<at::Half>(),
94+
bn->setInputOutputPointers(x.DATA_PTR<at::Half>(),
9395
nullptr,
94-
y.data<at::Half>(),
96+
y.DATA_PTR<at::Half>(),
9597
nullptr,
96-
z.data<at::Half>(),
98+
z.DATA_PTR<at::Half>(),
9799
nullptr);
98100

99-
bn->setWeightPointers({scale.data<float>(), bias.data<float>()}, {nullptr, nullptr});
100-
bn->setParameterPointers({running_mean.data<float>(), running_inv_var.data<float>()});
101+
bn->setWeightPointers({scale.DATA_PTR<float>(), bias.DATA_PTR<float>()}, {nullptr, nullptr});
102+
bn->setParameterPointers({running_mean.DATA_PTR<float>(), running_inv_var.DATA_PTR<float>()});
101103

102104
// deal with workspace(s)
103105
auto workspace_bytes = bn->numWorkspaceBytes();
@@ -118,13 +120,13 @@ at::Tensor nhwc_bn_addrelu_fwd_train(
118120
Workspace ws(total_workspace_bytes);
119121

120122
std::vector<void *> workspace;
121-
workspace.push_back(minibatch_mean.data<float>());
122-
workspace.push_back(minibatch_inv_var.data<float>());
123-
workspace.push_back(bitmask.data<int32_t>());
123+
workspace.push_back(minibatch_mean.DATA_PTR<float>());
124+
workspace.push_back(minibatch_inv_var.DATA_PTR<float>());
125+
workspace.push_back(bitmask.DATA_PTR<int32_t>());
124126

125127
auto stream = at::cuda::getCurrentCUDAStream().stream();
126128
const int retired_cta_bytes = workspace_bytes[3];
127-
void* retired_ctas = ret_cta.data<uint8_t>();
129+
void* retired_ctas = ret_cta.DATA_PTR<uint8_t>();
128130
assert(ret_cta.size(0)>=retired_cta_bytes);
129131

130132
workspace.push_back(retired_ctas);
@@ -171,15 +173,15 @@ at::Tensor nhwc_bn_addrelu_fwd_eval(
171173
bn->setConstants(momentum, epsilon);
172174

173175
// set pointers within the wrapper
174-
bn->setInputOutputPointers(x.data<at::Half>(),
176+
bn->setInputOutputPointers(x.DATA_PTR<at::Half>(),
175177
nullptr,
176-
y.data<at::Half>(),
178+
y.DATA_PTR<at::Half>(),
177179
nullptr,
178-
z.data<at::Half>(),
180+
z.DATA_PTR<at::Half>(),
179181
nullptr);
180182

181-
bn->setWeightPointers({scale.data<float>(), bias.data<float>()}, {nullptr, nullptr});
182-
bn->setParameterPointers({running_mean.data<float>(), running_inv_var.data<float>()});
183+
bn->setWeightPointers({scale.DATA_PTR<float>(), bias.DATA_PTR<float>()}, {nullptr, nullptr});
184+
bn->setParameterPointers({running_mean.DATA_PTR<float>(), running_inv_var.DATA_PTR<float>()});
183185

184186
// deal with workspace(s)
185187
auto workspace_bytes = bn->numWorkspaceBytes();
@@ -206,7 +208,7 @@ at::Tensor nhwc_bn_addrelu_fwd_eval(
206208

207209
auto stream = at::cuda::getCurrentCUDAStream().stream();
208210
const int retired_cta_bytes = workspace_bytes[3];
209-
void* retired_ctas = ret_cta.data<uint8_t>();
211+
void* retired_ctas = ret_cta.DATA_PTR<uint8_t>();
210212
assert(ret_cta.size(0)>=retired_cta_bytes);
211213
workspace.push_back(retired_ctas);
212214

@@ -253,7 +255,7 @@ std::vector<at::Tensor> nhwc_bn_addrelu_bwd(
253255
const int C = x.size(3);
254256

255257
// generating new magic number and use that for sync
256-
int* magic = magic_tensor.data<int>();
258+
int* magic = magic_tensor.DATA_PTR<int>();
257259
*magic = (*magic + 1) & 0xff;
258260

259261
// outputs
@@ -274,15 +276,15 @@ std::vector<at::Tensor> nhwc_bn_addrelu_bwd(
274276
bn->setConstants(momentum, epsilon);
275277

276278
// set pointers within the wrapper
277-
bn->setInputOutputPointers(x.data<at::Half>(),
278-
x_grad.data<at::Half>(),
279+
bn->setInputOutputPointers(x.DATA_PTR<at::Half>(),
280+
x_grad.DATA_PTR<at::Half>(),
279281
nullptr,
280-
dy.data<at::Half>(),
282+
dy.DATA_PTR<at::Half>(),
281283
nullptr,
282-
z_grad.data<at::Half>());
284+
z_grad.DATA_PTR<at::Half>());
283285

284-
bn->setWeightPointers({scale.data<float>(), bias.data<float>()}, {scale_grad.data<float>(), bias_grad.data<float>()});
285-
bn->setParameterPointers({running_mean.data<float>(), running_inv_var.data<float>()});
286+
bn->setWeightPointers({scale.DATA_PTR<float>(), bias.DATA_PTR<float>()}, {scale_grad.DATA_PTR<float>(), bias_grad.DATA_PTR<float>()});
287+
bn->setParameterPointers({running_mean.DATA_PTR<float>(), running_inv_var.DATA_PTR<float>()});
286288

287289
// deal with workspace(s)
288290
auto workspace_bytes = bn->numWorkspaceBytes();
@@ -303,13 +305,13 @@ std::vector<at::Tensor> nhwc_bn_addrelu_bwd(
303305
Workspace ws(total_workspace_bytes);
304306

305307
std::vector<void *> workspace;
306-
workspace.push_back(minibatch_mean.data<float>());
307-
workspace.push_back(minibatch_inv_var.data<float>());
308-
workspace.push_back(bitmask.data<int32_t>());
308+
workspace.push_back(minibatch_mean.DATA_PTR<float>());
309+
workspace.push_back(minibatch_inv_var.DATA_PTR<float>());
310+
workspace.push_back(bitmask.DATA_PTR<int32_t>());
309311

310312
auto stream = at::cuda::getCurrentCUDAStream().stream();
311313
const int retired_cta_bytes = workspace_bytes[3];
312-
void* retired_ctas = ret_cta.data<uint8_t>();
314+
void* retired_ctas = ret_cta.DATA_PTR<uint8_t>();
313315
assert(ret_cta.size(0)>=retired_cta_bytes);
314316
workspace.push_back(retired_ctas);
315317

apex/contrib/csrc/groupbn/ipc.cu

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
#include <cuda.h>
88

9+
#include "compat.h"
10+
911

1012
#define cudaCheckErrors(msg) \
1113
do { \
@@ -114,17 +116,17 @@ int64_t get_buffer_size(const int bn_sync_steps) {
114116

115117
void* get_remote_data_ptr(const at::Tensor& handle, const int64_t offset) {
116118
cudaIpcMemHandle_t my_handle;
117-
memcpy((unsigned char *)(&my_handle), handle.data<uint8_t>(), sizeof(my_handle));
119+
memcpy((unsigned char *)(&my_handle), handle.DATA_PTR<uint8_t>(), sizeof(my_handle));
118120
return ipc_mem_registry.getPtr(my_handle, offset);
119121
}
120122

121123
void close_remote_data(const at::Tensor& handle) {
122124
cudaIpcMemHandle_t my_handle;
123-
memcpy((unsigned char *)(&my_handle), handle.data<uint8_t>(), sizeof(my_handle));
125+
memcpy((unsigned char *)(&my_handle), handle.DATA_PTR<uint8_t>(), sizeof(my_handle));
124126
ipc_mem_registry.releasePtr(my_handle);
125127
}
126128

127129
void* get_data_ptr(
128130
const at::Tensor& data) {
129-
return data.data<uint8_t>();
131+
return data.DATA_PTR<uint8_t>();
130132
}

apex/contrib/csrc/xentropy/xentropy_kernel.cu

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
#include <THC/THCThrustAllocator.cuh>
8383

8484
#include "type_shim.h"
85+
#include "compat.h"
8586

8687
using Tensor = at::Tensor;
8788
using TensorList = at::TensorList;
@@ -492,7 +493,7 @@ std::vector<Tensor> host_softmax_xentropy(
492493
inner_size *= input.size(i);
493494
// This kernel spawns a block per each element in the batch.
494495
// XXX: it assumes that inner_size == 1
495-
AT_CHECK(inner_size == 1, "Currently only inner size 1 supported");
496+
TORCH_CHECK(inner_size == 1, "Currently only inner size 1 supported");
496497

497498
const int ILP = 2;
498499
dim3 grid(outer_size);
@@ -504,15 +505,15 @@ std::vector<Tensor> host_softmax_xentropy(
504505
if (!half_to_float) {
505506
cunn_SoftMaxXEntropyForward<ILP, scalar_t_0, accscalar_t, scalar_t_0, Epilogue>
506507
<<<grid, block, 2 * block.x * sizeof(accscalar_t), stream>>>(
507-
losses.data<accscalar_t>(), max_log_sum_exp.data<scalar_t_0>(),
508-
input.data<scalar_t_0>(), labels_.data<int64_t>(),
508+
losses.DATA_PTR<accscalar_t>(), max_log_sum_exp.DATA_PTR<scalar_t_0>(),
509+
input.DATA_PTR<scalar_t_0>(), labels_.DATA_PTR<int64_t>(),
509510
dim_size, smoothing
510511
);
511512
} else {
512513
cunn_SoftMaxXEntropyForward<ILP, scalar_t_0, accscalar_t, accscalar_t, Epilogue>
513514
<<<grid, block, 2 * block.x * sizeof(accscalar_t), stream>>>(
514-
losses.data<accscalar_t>(), max_log_sum_exp.data<accscalar_t>(),
515-
input.data<scalar_t_0>(), labels_.data<int64_t>(),
515+
losses.DATA_PTR<accscalar_t>(), max_log_sum_exp.DATA_PTR<accscalar_t>(),
516+
input.DATA_PTR<scalar_t_0>(), labels_.DATA_PTR<int64_t>(),
516517
dim_size, smoothing
517518
);
518519
}
@@ -561,7 +562,7 @@ Tensor host_softmax_xentropy_backward(
561562
inner_size *= logits.size(i);
562563
// See descriptions of kernels above.
563564
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
564-
AT_CHECK(inner_size == 1, "Currently only inner size 1 supported");
565+
TORCH_CHECK(inner_size == 1, "Currently only inner size 1 supported");
565566

566567
const int ILP = 2;
567568
dim3 grid(outer_size);
@@ -572,17 +573,17 @@ Tensor host_softmax_xentropy_backward(
572573
if (!half_to_float) {
573574
cunn_SoftMaxXEntropyBackward<ILP, scalar_t_0, accscalar_t, scalar_t_0, Epilogue>
574575
<<<grid, block, block.x * sizeof(accscalar_t), stream>>>(
575-
gI.data<scalar_t_0>(), logits.data<scalar_t_0>(),
576-
max_log_sum_exp.data<scalar_t_0>(),
577-
grad.data<scalar_t_0>(), labels.data<int64_t>(),
576+
gI.DATA_PTR<scalar_t_0>(), logits.DATA_PTR<scalar_t_0>(),
577+
max_log_sum_exp.DATA_PTR<scalar_t_0>(),
578+
grad.DATA_PTR<scalar_t_0>(), labels.DATA_PTR<int64_t>(),
578579
smoothing, dim_size
579580
);
580581
} else {
581582
cunn_SoftMaxXEntropyBackward<ILP, scalar_t_0, accscalar_t, accscalar_t, Epilogue>
582583
<<<grid, block, block.x * sizeof(accscalar_t), stream>>>(
583-
gI.data<scalar_t_0>(), logits.data<scalar_t_0>(),
584-
max_log_sum_exp.data<accscalar_t>(),
585-
grad.data<accscalar_t>(), labels.data<int64_t>(),
584+
gI.DATA_PTR<scalar_t_0>(), logits.DATA_PTR<scalar_t_0>(),
585+
max_log_sum_exp.DATA_PTR<accscalar_t>(),
586+
grad.DATA_PTR<accscalar_t>(), labels.DATA_PTR<int64_t>(),
586587
smoothing, dim_size
587588
);
588589
}

csrc/compat.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
11
#ifndef TORCH_CHECK
22
#define TORCH_CHECK AT_CHECK
33
#endif
4+
5+
#ifdef VERSION_GE_1_3
6+
#define DATA_PTR data_ptr
7+
#else
8+
#define DATA_PTR data
9+
#endif

0 commit comments

Comments
 (0)