Skip to content

Commit f07ac6a

Browse files
gunandrose4ufacebook-github-bot
authored andcommittedSep 25, 2020
Fix Windows build failure after DDP PR merged (pytorch#45335)
Summary: Fixes #{issue number} This is resubmit for PR pytorch#42897 . Together with fix for Windows build issue introduced by PR pytorch#44344 . Pull Request resolved: pytorch#45335 Reviewed By: zou3519 Differential Revision: D23931471 Pulled By: mrshenli fbshipit-source-id: f49b5a114944c1450b32934b3292170be064f494
1 parent c8166d4 commit f07ac6a

39 files changed

+464
-167
lines changed
 

‎.jenkins/pytorch/win-test-helpers/installation-helpers/install_miniconda3.bat

+7
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,11 @@ call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Minic
1212
if "%REBUILD%"=="" (
1313
call conda install -y -q python=%PYTHON_VERSION% numpy cffi pyyaml boto3
1414
call conda install -y -q -c conda-forge cmake
15+
call conda install -y -q -c rdonnelly libuv
1516
)
17+
18+
:: Get installed libuv path
19+
@echo off
20+
set libuv_ROOT=%CONDA_PARENT_DIR%\Miniconda3\Library
21+
@echo on
22+
echo libuv_ROOT=%libuv_ROOT%

‎CMakeLists.txt

+7-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ endif()
103103

104104
# For non-supported platforms, turn USE_DISTRIBUTED off by default.
105105
# It is not tested and likely won't work without additional changes.
106-
if(NOT LINUX)
106+
if(NOT LINUX AND NOT WIN32)
107107
set(USE_DISTRIBUTED OFF CACHE STRING "Use distributed")
108108
# On macOS, if USE_DISTRIBUTED is enabled (specified by the user),
109109
# then make Gloo build with the libuv transport.
@@ -226,6 +226,12 @@ option(USE_TBB "Use TBB" OFF)
226226
option(ONNX_ML "Enable traditional ONNX ML API." ON)
227227
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
228228

229+
# Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
230+
if(WIN32)
231+
set(USE_TENSORPIPE OFF)
232+
message(WARNING "TensorPipe cannot be used on Windows. Set it to OFF")
233+
endif()
234+
229235
# Linux distributions do not want too many embedded sources, in that sense we
230236
# need to be able to build pytorch with an (almost) empty third_party
231237
# directory.

‎caffe2/CMakeLists.txt

+29-20
Original file line numberDiff line numberDiff line change
@@ -291,26 +291,29 @@ endif()
291291

292292
if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
293293
if(USE_DISTRIBUTED)
294-
add_library(process_group_agent "${TORCH_SRC_DIR}/csrc/distributed/rpc/process_group_agent.cpp" "${TORCH_SRC_DIR}/csrc/distributed/rpc/process_group_agent.h")
295-
target_link_libraries(process_group_agent PRIVATE torch c10d fmt::fmt-header-only)
296-
add_dependencies(process_group_agent torch c10d)
297294

298295
# Define this target even if we're building without TensorPipe, to make life
299296
# easier to other targets that depend on this. However, in that case, by not
300297
# setting the USE_TENSORPIPE compile definition, this target will just end
301298
# up being empty. Downstream targets should also add a #ifdef guard.
302-
add_library(tensorpipe_agent
303-
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_agent.cpp"
304-
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_agent.h"
305-
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_utils.cpp"
306-
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_utils.h"
307-
)
308-
target_link_libraries(tensorpipe_agent PRIVATE torch c10d tensorpipe fmt::fmt-header-only)
309-
add_dependencies(tensorpipe_agent torch c10d)
310-
if(USE_TENSORPIPE)
311-
target_compile_definitions(tensorpipe_agent PUBLIC USE_TENSORPIPE)
312-
target_link_libraries(tensorpipe_agent PRIVATE tensorpipe)
313-
add_dependencies(tensorpipe_agent tensorpipe)
299+
if(NOT WIN32)
300+
add_library(process_group_agent "${TORCH_SRC_DIR}/csrc/distributed/rpc/process_group_agent.cpp" "${TORCH_SRC_DIR}/csrc/distributed/rpc/process_group_agent.h")
301+
target_link_libraries(process_group_agent PRIVATE torch c10d fmt::fmt-header-only)
302+
add_dependencies(process_group_agent torch c10d)
303+
304+
add_library(tensorpipe_agent
305+
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_agent.cpp"
306+
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_agent.h"
307+
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_utils.cpp"
308+
"${TORCH_SRC_DIR}/csrc/distributed/rpc/tensorpipe_utils.h"
309+
)
310+
target_link_libraries(tensorpipe_agent PRIVATE torch c10d tensorpipe fmt::fmt-header-only)
311+
add_dependencies(tensorpipe_agent torch c10d)
312+
if(USE_TENSORPIPE)
313+
target_compile_definitions(tensorpipe_agent PUBLIC USE_TENSORPIPE)
314+
target_link_libraries(tensorpipe_agent PRIVATE tensorpipe)
315+
add_dependencies(tensorpipe_agent tensorpipe)
316+
endif()
314317
endif()
315318
endif()
316319

@@ -493,7 +496,7 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
493496
PROPERTIES COMPILE_FLAGS "-DC10_DISABLE_LEGACY_IMPORT"
494497
)
495498
endif()
496-
if(USE_DISTRIBUTED)
499+
if(USE_DISTRIBUTED AND NOT WIN32)
497500
append_filelist("libtorch_distributed_sources" TORCH_SRCS)
498501
endif()
499502
endif()
@@ -841,7 +844,7 @@ endif()
841844
if(BUILD_TEST AND NOT USE_ROCM)
842845
add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit)
843846
add_subdirectory(${TORCH_ROOT}/test/cpp/tensorexpr ${CMAKE_BINARY_DIR}/test_tensorexpr)
844-
if(USE_DISTRIBUTED)
847+
if(USE_DISTRIBUTED AND NOT WIN32)
845848
add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc)
846849
endif()
847850
endif()
@@ -893,9 +896,7 @@ endif()
893896
DESTINATION share/cmake/Torch)
894897

895898
if(USE_DISTRIBUTED)
896-
if(NOT MSVC)
897-
add_subdirectory(${TORCH_SRC_DIR}/lib/c10d lib_c10d)
898-
endif()
899+
add_subdirectory(${TORCH_SRC_DIR}/lib/c10d lib_c10d)
899900
endif()
900901

901902

@@ -970,6 +971,14 @@ if(USE_DISTRIBUTED)
970971
target_compile_definitions(torch_cpu PRIVATE
971972
USE_DISTRIBUTED
972973
)
974+
# Pass USE_RPC in order to reduce use of
975+
# #if defined(USE_DISTRIBUTED) && !defined(_WIN32)
976+
# need to be removed when RPC is supported
977+
if(NOT WIN32)
978+
target_compile_definitions(torch_cpu PRIVATE
979+
USE_RPC
980+
)
981+
endif()
973982
# Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp
974983
# can only be compiled with USE_TENSORPIPE is set.
975984
if(USE_TENSORPIPE)

‎cmake/Dependencies.cmake

+1-4
Original file line numberDiff line numberDiff line change
@@ -1253,10 +1253,7 @@ if(USE_CUDA)
12531253
endif()
12541254

12551255
if(USE_GLOO)
1256-
if(MSVC)
1257-
message(WARNING "Gloo can not be used on Windows.")
1258-
caffe2_update_option(USE_GLOO OFF)
1259-
elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
1256+
if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
12601257
message(WARNING "Gloo can only be used on 64-bit systems.")
12611258
caffe2_update_option(USE_GLOO OFF)
12621259
else()

‎test/cpp/dist_autograd/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
if(USE_DISTRIBUTED)
1+
if(USE_DISTRIBUTED AND NOT WIN32)
22
set(DIST_AUTOGRAD_TEST_DIR "${TORCH_ROOT}/test/cpp/dist_autograd")
33
set(DIST_AUTOGRAD_TEST_SOURCES
44
${TORCH_ROOT}/test/cpp/common/main.cpp

‎test/distributed/test_c10d.py

+35-14
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from torch.testing._internal.common_distributed import MultiProcessTestCase, \
3030
requires_gloo, requires_nccl, requires_nccl_version, \
3131
skip_if_not_multigpu, skip_if_lt_x_gpu, get_timeout, skip_if_rocm, \
32-
simple_sparse_reduce_tests
32+
simple_sparse_reduce_tests, skip_if_win32, create_device
3333

3434
from torch.testing._internal.common_utils import TestCase, load_tests, run_tests, \
3535
retry_on_connect_failures, ADDRESS_IN_USE, CONNECT_TIMEOUT, TEST_WITH_TSAN
@@ -255,6 +255,7 @@ def create_tcp_store(addr):
255255
raise RuntimeError("Unable to find free port (tried %s)" % ", ".join(ports))
256256

257257

258+
@skip_if_win32()
258259
class TCPStoreTest(TestCase, StoreTestBase):
259260
def _create_store(self):
260261
store = create_tcp_store('localhost')
@@ -273,6 +274,7 @@ def test_address_already_in_use(self):
273274
store2 = c10d.TCPStore(addr, port, 1, True) # noqa: F841
274275

275276

277+
@skip_if_win32()
276278
class PrefixTCPStoreTest(TestCase, StoreTestBase):
277279
def setUp(self):
278280
super(PrefixTCPStoreTest, self).setUp()
@@ -329,6 +331,7 @@ def test_unknown_handler(self):
329331
c10d.rendezvous('invalid://')
330332

331333

334+
@skip_if_win32()
332335
class RendezvousEnvTest(TestCase):
333336
@retry_on_connect_failures
334337
def test_common_errors(self):
@@ -455,7 +458,7 @@ def test_common_errors(self):
455458

456459
def test_nominal(self):
457460
with tempfile.NamedTemporaryFile(delete=False) as file:
458-
url = 'file://%s?world_size=%d' % (file.name, 2)
461+
url = f'file:///{file.name.replace(os.path.sep, "/")}?world_size=2'
459462
gen0 = c10d.rendezvous(url + "&rank=0")
460463
store0, rank0, size0 = next(gen0)
461464
self.assertEqual(0, rank0)
@@ -474,6 +477,7 @@ def test_nominal(self):
474477
self.assertEqual(b"value1", store0.get("key1"))
475478

476479

480+
@skip_if_win32()
477481
class RendezvousTCPTest(TestCase):
478482

479483
def create_tcp_url(self):
@@ -544,9 +548,13 @@ def _test_store_timeout(self, backend, init_method, c2p):
544548

545549
def _init_methods(self):
546550
f = tempfile.NamedTemporaryFile(delete=False)
547-
yield "file://%s" % f.name
548-
f.close()
549-
yield "tcp://127.0.0.1:%d" % common.find_free_port()
551+
if sys.platform == 'win32':
552+
yield "file:///%s" % f.name.replace("\\", "/")
553+
f.close()
554+
else:
555+
yield "file://%s" % f.name
556+
f.close()
557+
yield "tcp://127.0.0.1:%d" % common.find_free_port()
550558

551559
def _test_default_store_timeout(self, backend):
552560
for init_method in self._init_methods():
@@ -584,11 +592,16 @@ def test_default_store_timeout_gloo(self):
584592
class ProcessGroupGlooTest(MultiProcessTestCase):
585593
def setUp(self):
586594
super(ProcessGroupGlooTest, self).setUp()
587-
self._fork_processes()
595+
596+
# For Windows platform, Python does not support fork, change it to spawn here.
597+
if sys.platform == 'win32':
598+
self._spawn_processes()
599+
else:
600+
self._fork_processes()
588601

589602
def opts(self, threads=2):
590603
opts = c10d.ProcessGroupGloo.Options()
591-
opts.devices = [c10d.ProcessGroupGloo.create_device(interface=LOOPBACK)]
604+
opts.devices = [create_device(interface=LOOPBACK)]
592605
opts.timeout = 5.0
593606
opts.threads = threads
594607
return opts
@@ -598,8 +611,8 @@ def test_multi_device_constructor(self):
598611
opts = c10d.ProcessGroupGloo.Options()
599612
opts.timeout = 5.0
600613
opts.devices = [
601-
c10d.ProcessGroupGloo.create_device(interface=LOOPBACK),
602-
c10d.ProcessGroupGloo.create_device(interface=LOOPBACK),
614+
create_device(interface=LOOPBACK),
615+
create_device(interface=LOOPBACK),
603616
]
604617
pg = c10d.ProcessGroupGloo(store, self.rank, self.world_size, opts)
605618

@@ -1514,6 +1527,7 @@ def test_barrier_implies_wait(self):
15141527
for i, tensor in enumerate(tensors):
15151528
self.assertEqual(torch.full(size, float(i * self.world_size)), tensor)
15161529

1530+
@skip_if_win32()
15171531
def test_round_robin(self):
15181532
num_process_groups = 2
15191533
store = c10d.FileStore(self.file_name, self.world_size)
@@ -1531,6 +1545,7 @@ def test_round_robin(self):
15311545
pg.broadcast(tensor, root=0).wait()
15321546
self.assertEqual(torch.full([100, 100], 0.), tensor)
15331547

1548+
@skip_if_win32()
15341549
def test_round_robin_create_destroy(self):
15351550
store = c10d.FileStore(self.file_name, self.world_size)
15361551

@@ -1959,7 +1974,10 @@ def forward(self, x):
19591974
class DistributedDataParallelTest(MultiProcessTestCase):
19601975
def setUp(self):
19611976
super(DistributedDataParallelTest, self).setUp()
1962-
self._fork_processes()
1977+
if sys.platform == 'win32':
1978+
self._spawn_processes()
1979+
else:
1980+
self._fork_processes()
19631981

19641982
def tearDown(self):
19651983
# DistributedDataParallel test doesn't seem to call FileStore destructor
@@ -2068,7 +2086,7 @@ def update_parameters(model):
20682086
def _test_gloo_backend(self, devices, device_ids, multi_device=False, gradient_as_bucket_view=False):
20692087
store = c10d.FileStore(self.file_name, self.world_size)
20702088
options = c10d.ProcessGroupGloo.Options()
2071-
options.devices = [c10d.ProcessGroupGloo.create_device(interface=LOOPBACK)]
2089+
options.devices = [create_device(interface=LOOPBACK)]
20722090
process_group = c10d.ProcessGroupGloo(store, self.rank, self.world_size, options)
20732091
self._test_ddp_with_process_group(process_group, devices, device_ids, multi_device, gradient_as_bucket_view)
20742092

@@ -3947,7 +3965,10 @@ def test_nccl_timeout(self):
39473965
class CommTest(MultiProcessTestCase):
39483966
def setUp(self):
39493967
super(CommTest, self).setUp()
3950-
self._fork_processes()
3968+
if sys.platform == 'win32':
3969+
self._spawn_processes()
3970+
else:
3971+
self._fork_processes()
39513972

39523973
def tearDown(self):
39533974
super(CommTest, self).tearDown()
@@ -4013,7 +4034,7 @@ def test_broadcast_coalesced_nccl(self):
40134034
def test_broadcast_coalesced_gloo_cuda(self):
40144035
store = c10d.FileStore(self.file_name, self.world_size)
40154036
options = c10d.ProcessGroupGloo.Options()
4016-
options.devices = [c10d.ProcessGroupGloo.create_device(interface=LOOPBACK)]
4037+
options.devices = [create_device(interface=LOOPBACK)]
40174038
process_group = c10d.ProcessGroupGloo(store, self.rank, self.world_size, options)
40184039
device = torch.device("cuda:%d" % self.rank)
40194040
ranks = list(range(self.world_size))
@@ -4024,7 +4045,7 @@ def test_broadcast_coalesced_gloo_cuda(self):
40244045
def test_broadcast_coalesced_gloo_cpu(self):
40254046
store = c10d.FileStore(self.file_name, self.world_size)
40264047
options = c10d.ProcessGroupGloo.Options()
4027-
options.devices = [c10d.ProcessGroupGloo.create_device(interface=LOOPBACK)]
4048+
options.devices = [create_device(interface=LOOPBACK)]
40284049
process_group = c10d.ProcessGroupGloo(store, self.rank, self.world_size, options)
40294050
device = torch.device("cpu")
40304051
ranks = list(range(self.world_size))

‎test/distributed/test_c10d_spawn.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
import torch.nn as nn
1111

1212
from torch.testing._internal.common_cuda import TEST_CUDA, TEST_MULTIGPU
13-
from torch.testing._internal.common_distributed import requires_gloo
14-
from torch.testing._internal.common_utils import TestCase, load_tests, run_tests, skipIfRocm
13+
from torch.testing._internal.common_distributed import requires_gloo, \
14+
create_device
15+
from torch.testing._internal.common_utils import TestCase, load_tests, \
16+
run_tests, skipIfRocm
1517
from torch.testing._internal.common_utils import NO_MULTIPROCESSING_SPAWN, TEST_WITH_TSAN
1618

1719

@@ -39,7 +41,7 @@ class ProcessGroupShareTensorTest(TestCase):
3941
@classmethod
4042
def opts(cls, threads=2):
4143
opts = c10d.ProcessGroupGloo.Options()
42-
opts.devices = [c10d.ProcessGroupGloo.create_device(interface="lo")]
44+
opts.devices = [create_device(interface='lo')]
4345
opts.timeout = 5.0
4446
opts.threads = threads
4547
return opts

‎test/run_test.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import torch
1414
import torch._six
1515
from torch.utils import cpp_extension
16-
from torch.testing._internal.common_utils import TEST_WITH_ROCM, shell
16+
from torch.testing._internal.common_utils import TEST_WITH_ROCM, shell, FILE_SCHEMA
1717
import torch.distributed as dist
1818
from typing import Dict, Optional
1919

@@ -100,7 +100,6 @@
100100
'distributed/rpc/test_process_group_agent',
101101
'distributed/rpc/test_tensorpipe_agent',
102102
'distributed/test_distributed_fork',
103-
'distributed/test_distributed_spawn',
104103
]
105104

106105
ROCM_BLOCKLIST = [
@@ -307,9 +306,13 @@ def test_distributed(test_module, test_directory, options):
307306
'MPI not available -- MPI backend tests will be skipped')
308307
config = DISTRIBUTED_TESTS_CONFIG
309308
for backend, env_vars in config.items():
309+
if sys.platform == 'win32' and backend != 'gloo':
310+
continue
310311
if backend == 'mpi' and not mpi_available:
311312
continue
312313
for with_init_file in {True, False}:
314+
if sys.platform == 'win32' and not with_init_file:
315+
continue
313316
tmp_dir = tempfile.mkdtemp()
314317
if options.verbose:
315318
init_str = "with {} init_method"
@@ -323,9 +326,9 @@ def test_distributed(test_module, test_directory, options):
323326
os.environ.update(env_vars)
324327
if with_init_file:
325328
if test_module in ["test_distributed_fork", "test_distributed_spawn"]:
326-
init_method = 'file://{}/'.format(tmp_dir)
329+
init_method = f'{FILE_SCHEMA}{tmp_dir}/'
327330
else:
328-
init_method = 'file://{}/shared_init_file'.format(tmp_dir)
331+
init_method = f'{FILE_SCHEMA}{tmp_dir}/shared_init_file'
329332
os.environ['INIT_METHOD'] = init_method
330333
try:
331334
os.mkdir(os.path.join(tmp_dir, 'barrier'))

‎tools/build_variables.bzl

+5-2
Original file line numberDiff line numberDiff line change
@@ -542,11 +542,14 @@ libtorch_python_core_sources = [
542542
"torch/csrc/utils/disable_torch_function.cpp",
543543
]
544544

545-
libtorch_python_distributed_sources = [
546-
"torch/csrc/distributed/autograd/init.cpp",
545+
libtorch_python_distributed_core_sources = [
547546
"torch/csrc/distributed/c10d/comm.cpp",
548547
"torch/csrc/distributed/c10d/init.cpp",
549548
"torch/csrc/distributed/c10d/reducer.cpp",
549+
]
550+
551+
libtorch_python_distributed_sources = libtorch_python_distributed_core_sources + [
552+
"torch/csrc/distributed/autograd/init.cpp",
550553
"torch/csrc/distributed/rpc/init.cpp",
551554
"torch/csrc/distributed/rpc/process_group_agent.cpp",
552555
"torch/csrc/distributed/rpc/py_rref.cpp",

0 commit comments

Comments
 (0)
Please sign in to comment.