Skip to content

Commit e255a4e

Browse files
zasdfgbnmfacebook-github-bot
authored andcommittedSep 18, 2020
Enable bfloat16 random kernels on Windows (pytorch#44918)
Summary: Fixes pytorch#33793 Pull Request resolved: pytorch#44918 Reviewed By: pbelevich Differential Revision: D23777548 Pulled By: ngimel fbshipit-source-id: 9cf13166d7deba17bc72e402b82ed0afe347cb9b
1 parent 0638940 commit e255a4e

File tree

3 files changed

+3
-57
lines changed

3 files changed

+3
-57
lines changed
 

‎aten/src/ATen/native/cuda/DistributionTemplates.h

-24
Original file line numberDiff line numberDiff line change
@@ -273,12 +273,6 @@ namespace cuda {
273273

274274
template<typename RNG>
275275
void random_from_to_kernel(TensorIterator& iter, uint64_t range, int64_t base, RNG gen) {
276-
#ifdef _WIN32
277-
// TODO: https://github.com/pytorch/pytorch/issues/33793
278-
if (iter.dtype() == ScalarType::BFloat16) {
279-
TORCH_CHECK(false, "random_() is not supported for bfloat16 CUDA tensors on Windows. Please see https://github.com/pytorch/pytorch/issues/33793");
280-
}
281-
#endif
282276
AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel_cuda", [&] {
283277
if ((
284278
std::is_same<scalar_t, int64_t>::value ||
@@ -319,12 +313,6 @@ void random_from_to_kernel(TensorIterator& iter, uint64_t range, int64_t base, R
319313
// to(exclusive) = None (= std::numeric_limits<int64_t>::max() + 1)
320314
template<typename RNG>
321315
void random_full_64_bits_range_kernel(TensorIterator& iter, RNG gen) {
322-
#ifdef _WIN32
323-
// TODO: https://github.com/pytorch/pytorch/issues/33793
324-
if (iter.dtype() == ScalarType::BFloat16) {
325-
TORCH_CHECK(false, "random_() is not supported for bfloat16 CUDA tensors on Windows. Please see https://github.com/pytorch/pytorch/issues/33793");
326-
}
327-
#endif
328316
AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::BFloat16, iter.dtype(), "random_full_64_bits_range_kernel_cuda", [&] {
329317
if (std::is_same<scalar_t, int64_t>::value ||
330318
std::is_same<scalar_t, double>::value ||
@@ -361,12 +349,6 @@ struct RandomFromToKernel {
361349

362350
template<typename RNG>
363351
void random_kernel(TensorIterator& iter, RNG gen) {
364-
#ifdef _WIN32
365-
// TODO: https://github.com/pytorch/pytorch/issues/33793
366-
if (iter.dtype() == ScalarType::BFloat16) {
367-
TORCH_CHECK(false, "random_() is not supported for bfloat16 CUDA tensors on Windows. Please see https://github.com/pytorch/pytorch/issues/33793");
368-
}
369-
#endif
370352
AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Half, at::ScalarType::BFloat16, at::ScalarType::Bool, iter.dtype(), "random_kernel_cuda", [&] {
371353
if (std::is_same<scalar_t, double>::value || std::is_same<scalar_t, int64_t>::value) {
372354
auto random_func = [] __device__ (uint64_t rand) {
@@ -462,12 +444,6 @@ struct NormalKernel {
462444

463445
template<typename RNG>
464446
void uniform_kernel(TensorIterator& iter, double from_, double to_, RNG gen) {
465-
#ifdef _WIN32
466-
// TODO: https://github.com/pytorch/pytorch/issues/33793
467-
if (iter.dtype() == ScalarType::BFloat16) {
468-
TORCH_CHECK(false, "uniform_() is not supported for bfloat16 CUDA tensors on Windows. Please see https://github.com/pytorch/pytorch/issues/33793");
469-
}
470-
#endif
471447
AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "uniform_kernel_cuda", [&] {
472448
auto from = static_cast<scalar_t>(from_);
473449
auto to = static_cast<scalar_t>(to_);

‎test/test_tensor_creation_ops.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from torch.testing._internal.common_utils import \
99
(TestCase, run_tests, do_test_empty_full, TEST_NUMPY, suppress_warnings,
10-
IS_WINDOWS, torch_to_numpy_dtype_dict, slowTest)
10+
torch_to_numpy_dtype_dict, slowTest)
1111
from torch.testing._internal.common_device_type import \
1212
(instantiate_device_type_tests, deviceCountAtLeast, onlyOnCPUAndCUDA,
1313
onlyCPU, skipCUDAIfNotRocm, largeCUDATensorTest, precisionOverride, dtypes,
@@ -822,10 +822,7 @@ def test_tensor_factories_empty(self, device):
822822
self.assertEqual(shape, torch.empty_like(torch.zeros(shape, device=device, dtype=dt)).shape)
823823
self.assertEqual(shape, torch.empty_strided(shape, (0,) * len(shape), device=device, dtype=dt).shape)
824824

825-
if dt == torch.bfloat16 and device.startswith('cuda') and IS_WINDOWS:
826-
# TODO: https://github.com/pytorch/pytorch/issues/33793
827-
self.assertRaises(RuntimeError, lambda: torch.randint(6, shape, device=device, dtype=dt).shape)
828-
elif dt == torch.bool:
825+
if dt == torch.bool:
829826
self.assertEqual(shape, torch.randint(2, shape, device=device, dtype=dt).shape)
830827
self.assertEqual(shape, torch.randint_like(torch.zeros(shape, device=device, dtype=dt), 2).shape)
831828
elif dt.is_complex:

‎test/test_torch.py

+1-28
Original file line numberDiff line numberDiff line change
@@ -10897,10 +10897,6 @@ def helper(self, device, dtype, ptype, t_transform, std_transform):
1089710897
@dtypes(torch.float, torch.double, torch.half)
1089810898
@dtypesIfCUDA(torch.float, torch.double, torch.half, torch.bfloat16)
1089910899
def test_uniform_from_to(self, device, dtype):
10900-
# TODO: https://github.com/pytorch/pytorch/issues/33793
10901-
if IS_WINDOWS and device.startswith('cuda') and dtype == torch.bfloat16:
10902-
raise unittest.SkipTest("Crashes with CUDA error: unspecified launch failure")
10903-
1090410900
size = 2000
1090510901
alpha = 0.1
1090610902

@@ -11119,10 +11115,6 @@ def test_exp(self, device, dtype):
1111911115
@skipIfNoSciPy
1112011116
@dtypes(*torch.testing.get_all_fp_dtypes())
1112111117
def test_uniform_kstest(self, device, dtype):
11122-
# TODO: https://github.com/pytorch/pytorch/issues/33793
11123-
if IS_WINDOWS and device.startswith('cuda') and dtype == torch.bfloat16:
11124-
raise unittest.SkipTest("Crashes with CUDA error: unspecified launch failure")
11125-
1112611118
from scipy import stats
1112711119
size = 1000
1112811120
for from_ in [-42, 0, 4.2]:
@@ -12244,10 +12236,7 @@ def test_bool_tensor_value_change(self, device):
1224412236
def test_unfold_all_devices_and_dtypes(self, device):
1224512237
for dt in torch.testing.get_all_dtypes():
1224612238

12247-
if dt == torch.bfloat16 and device.startswith('cuda') and IS_WINDOWS:
12248-
# TODO: https://github.com/pytorch/pytorch/issues/33793
12249-
self.assertRaises(RuntimeError, lambda: torch.randint(5, (0, 1, 3, 0), dtype=dt, device=device))
12250-
elif dt == torch.bool:
12239+
if dt == torch.bool:
1225112240
x = torch.empty((0, 1, 3, 0), dtype=dt, device=device)
1225212241
self.assertEqual((0, 1, 1, 0, 3), x.unfold(2, 3, 2).shape)
1225312242
else:
@@ -17629,10 +17618,6 @@ def test_random_from_to_bool(self, device):
1762917618

1763017619
@dtypes(*(torch.testing.get_all_int_dtypes() + torch.testing.get_all_fp_dtypes()))
1763117620
def test_random_full_range(self, device, dtype):
17632-
# TODO: https://github.com/pytorch/pytorch/issues/33793
17633-
if IS_WINDOWS and device.startswith('cuda') and dtype == torch.bfloat16:
17634-
raise unittest.SkipTest("Crashes with CUDA error: unspecified launch failure")
17635-
1763617621
size = 2000
1763717622
alpha = 0.1
1763817623

@@ -17667,10 +17652,6 @@ def test_random_full_range(self, device, dtype):
1766717652

1766817653
@dtypes(*(torch.testing.get_all_int_dtypes() + torch.testing.get_all_fp_dtypes()))
1766917654
def test_random_from_to(self, device, dtype):
17670-
# TODO: https://github.com/pytorch/pytorch/issues/33793
17671-
if IS_WINDOWS and device.startswith('cuda') and dtype == torch.bfloat16:
17672-
raise unittest.SkipTest("Crashes with CUDA error: unspecified launch failure")
17673-
1767417655
size = 2000
1767517656
alpha = 0.1
1767617657

@@ -17760,10 +17741,6 @@ def test_random_from_to(self, device, dtype):
1776017741

1776117742
@dtypes(*(torch.testing.get_all_int_dtypes() + torch.testing.get_all_fp_dtypes()))
1776217743
def test_random_to(self, device, dtype):
17763-
# TODO: https://github.com/pytorch/pytorch/issues/33793
17764-
if IS_WINDOWS and device.startswith('cuda') and dtype == torch.bfloat16:
17765-
raise unittest.SkipTest("Crashes with CUDA error: unspecified launch failure")
17766-
1776717744
size = 2000
1776817745
alpha = 0.1
1776917746

@@ -17822,10 +17799,6 @@ def test_random_to(self, device, dtype):
1782217799

1782317800
@dtypes(*(torch.testing.get_all_int_dtypes() + torch.testing.get_all_fp_dtypes()))
1782417801
def test_random_default(self, device, dtype):
17825-
# TODO: https://github.com/pytorch/pytorch/issues/33793
17826-
if IS_WINDOWS and device.startswith('cuda') and dtype == torch.bfloat16:
17827-
raise unittest.SkipTest("Crashes with CUDA error: unspecified launch failure")
17828-
1782917802
size = 2000
1783017803
alpha = 0.1
1783117804

0 commit comments

Comments
 (0)
Please sign in to comment.