@@ -2876,6 +2876,38 @@ def _return_gpu_tensor_list():
2876
2876
def _gpu_tensor_list_arg (tensor_list ):
2877
2877
return torch .rand (3 , 3 )
2878
2878
2879
+ @skip_if_lt_x_gpu (2 )
2880
+ @dist_init
2881
+ def test_cuda (self ):
2882
+ dst = worker_name ((self .rank + 1 ) % self .world_size )
2883
+ t1 = torch .rand (3 , 3 ).cuda (0 )
2884
+ t2 = torch .rand (3 , 3 ).cuda (1 )
2885
+ t3 = torch .rand (3 , 3 )
2886
+
2887
+ # cuda tensors as args fail.
2888
+ with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
2889
+ rpc .rpc_sync (dst , torch .add , args = (t1 , t2 ))
2890
+
2891
+ # mix of cpu and cuda tensors as args fail.
2892
+ with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
2893
+ rpc .rpc_sync (dst , torch .add , args = (t1 , t3 ))
2894
+
2895
+ # gpu tensor list as args fails.
2896
+ with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
2897
+ rpc .rpc_sync (dst , RpcTest ._gpu_tensor_list_arg , args = ([t1 , t2 ]))
2898
+
2899
+ # cuda tensors as return values fail.
2900
+ with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
2901
+ rpc .rpc_sync (dst , RpcTest ._return_gpu_tensor , args = ())
2902
+
2903
+ # cuda tensors as a list of return value fails
2904
+ with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
2905
+ rpc .rpc_sync (dst , RpcTest ._return_gpu_tensor_list , args = ())
2906
+
2907
+ # Sending to self should fail too.
2908
+ with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
2909
+ rpc .rpc_sync (worker_name (self .rank ), torch .add , args = (t1 , t2 ))
2910
+
2879
2911
def _create_rref (self ):
2880
2912
owner_rank = (self .rank + 2 ) % self .world_size
2881
2913
return rpc .remote (
@@ -3609,39 +3641,6 @@ def test_logs_deprecation_warning(self):
3609
3641
"\n " .join (cm .output ),
3610
3642
)
3611
3643
3612
- @skip_if_lt_x_gpu (2 )
3613
- @dist_init
3614
- def test_cuda (self ):
3615
- dst = worker_name ((self .rank + 1 ) % self .world_size )
3616
- t1 = torch .rand (3 , 3 ).cuda (0 )
3617
- t2 = torch .rand (3 , 3 ).cuda (1 )
3618
- t3 = torch .rand (3 , 3 )
3619
-
3620
- # cuda tensors as args fail.
3621
- with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
3622
- rpc .rpc_sync (dst , torch .add , args = (t1 , t2 ))
3623
-
3624
- # mix of cpu and cuda tensors as args fail.
3625
- with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
3626
- rpc .rpc_sync (dst , torch .add , args = (t1 , t3 ))
3627
-
3628
- # gpu tensor list as args fails.
3629
- with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
3630
- rpc .rpc_sync (dst , RpcTest ._gpu_tensor_list_arg , args = ([t1 , t2 ]))
3631
-
3632
- # cuda tensors as return values fail.
3633
- with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
3634
- rpc .rpc_sync (dst , RpcTest ._return_gpu_tensor , args = ())
3635
-
3636
- # cuda tensors as a list of return value fails
3637
- with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
3638
- rpc .rpc_sync (dst , RpcTest ._return_gpu_tensor_list , args = ())
3639
-
3640
- # Sending to self should fail too.
3641
- with self .assertRaisesRegex (RuntimeError , "RPC backend only supports CPU tensors.*Found tensor on device: cuda:0" ):
3642
- rpc .rpc_sync (worker_name (self .rank ), torch .add , args = (t1 , t2 ))
3643
-
3644
-
3645
3644
def test_single_threaded_rref_owner (self ):
3646
3645
# We need a process group in order to perform a barrier at the end.
3647
3646
dist .init_process_group (
@@ -4288,319 +4287,3 @@ def test_tensorpipe_options_throw_on_timedelta_timeout(self):
4288
4287
num_worker_threads = self .rpc_backend_options .num_worker_threads ,
4289
4288
rpc_timeout = timeout ,
4290
4289
)
4291
-
4292
- def _test_device_maps (self , options , errMsg = "Invalid device_map" ):
4293
- with self .assertRaisesRegex (ValueError , errMsg ):
4294
- rpc .init_rpc (
4295
- name = worker_name (self .rank ),
4296
- backend = self .rpc_backend ,
4297
- rank = self .rank ,
4298
- world_size = self .world_size ,
4299
- rpc_backend_options = options ,
4300
- )
4301
-
4302
- self .assertFalse (rpc .api ._is_current_rpc_agent_set ())
4303
-
4304
- @skip_if_lt_x_gpu (2 )
4305
- def test_device_maps_wrong_worker_name (self ):
4306
- options = self .rpc_backend_options
4307
- options .set_device_map ("none_exist" , {0 : 1 })
4308
- self ._test_device_maps (options , "Wrong worker names" )
4309
-
4310
- @skip_if_lt_x_gpu (1 )
4311
- def test_device_maps_invalid_max_local_device (self ):
4312
- options = self .rpc_backend_options
4313
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4314
- options .set_device_map (dst , {torch .cuda .device_count (): 0 })
4315
-
4316
- self ._test_device_maps (options )
4317
-
4318
- @skip_if_lt_x_gpu (1 )
4319
- def test_device_maps_invalid_max_remote_device (self ):
4320
- options = self .rpc_backend_options
4321
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4322
- options .set_device_map (dst , {0 : torch .cuda .device_count ()})
4323
-
4324
- self ._test_device_maps (options )
4325
-
4326
- @skip_if_lt_x_gpu (2 )
4327
- def test_device_maps_many_to_one (self ):
4328
- options = self .rpc_backend_options
4329
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4330
- options .set_device_map (dst , {1 : 0 })
4331
- options .set_device_map (dst , {0 : 0 })
4332
-
4333
- self ._test_device_maps (options )
4334
-
4335
- @skip_if_lt_x_gpu (2 )
4336
- def test_device_maps_one_to_many (self ):
4337
- if self .rank == 0 :
4338
- options = self .rpc_backend_options
4339
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4340
- options .set_device_map (dst , {0 : 1 })
4341
- with self .assertRaisesRegex (
4342
- ValueError , "`set_device_map` only supports 1-to-1 mapping"
4343
- ):
4344
- options .set_device_map (dst , {0 : 0 })
4345
-
4346
- @skip_if_lt_x_gpu (1 )
4347
- def test_device_maps_invalid_min_device (self ):
4348
- options = self .rpc_backend_options
4349
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4350
- with self .assertRaisesRegex (
4351
- RuntimeError , "Device index must not be negative"
4352
- ):
4353
- options .set_device_map (dst , {- 1 : 0 })
4354
-
4355
- with self .assertRaisesRegex (
4356
- RuntimeError , "Device index must not be negative"
4357
- ):
4358
- options .set_device_map (dst , {0 : - 1 })
4359
-
4360
- @staticmethod
4361
- def _gpu_add (x , y ):
4362
- if all ([x .is_cuda , x .device .index == 1 , y .is_cuda , y .device .index == 1 ]):
4363
- return (x + y ).to (0 )
4364
- else :
4365
- raise ValueError ("Wrong device affinity" )
4366
-
4367
- @skip_if_lt_x_gpu (2 )
4368
- def test_device_maps_gpu (self ):
4369
- options = self .rpc_backend_options
4370
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4371
- options .set_device_map (dst , {0 : 1 , 1 : 0 })
4372
-
4373
- rpc .init_rpc (
4374
- name = worker_name (self .rank ),
4375
- backend = self .rpc_backend ,
4376
- rank = self .rank ,
4377
- world_size = self .world_size ,
4378
- rpc_backend_options = options ,
4379
- )
4380
-
4381
- ret = rpc .rpc_sync (
4382
- dst ,
4383
- TensorPipeAgentRpcTest ._gpu_add ,
4384
- args = (torch .zeros (2 ).to (0 ), torch .ones (2 ).to (0 ))
4385
- )
4386
- self .assertEqual (ret .device , torch .device (1 ))
4387
- self .assertEqual (ret , (torch .zeros (2 ) + torch .ones (2 )).to (1 ))
4388
- rpc .shutdown ()
4389
-
4390
- @staticmethod
4391
- def _gpu_add_multi_gpu (x , y ):
4392
- if all ([x .is_cuda , x .device .index == 0 , y .is_cuda , y .device .index == 1 ]):
4393
- return x + y .to (0 ), x .to (1 ) - y
4394
- else :
4395
- raise ValueError ("Wrong device affinity" )
4396
-
4397
- def _test_device_maps_multi_gpu (self , dst ):
4398
- options = self .rpc_backend_options
4399
- options .set_device_map (dst , {1 : 0 })
4400
- options .set_device_map (dst , {0 : 1 })
4401
-
4402
- rpc .init_rpc (
4403
- name = worker_name (self .rank ),
4404
- backend = self .rpc_backend ,
4405
- rank = self .rank ,
4406
- world_size = self .world_size ,
4407
- rpc_backend_options = options ,
4408
- )
4409
-
4410
- rets = rpc .rpc_sync (
4411
- dst ,
4412
- TensorPipeAgentRpcTest ._gpu_add_multi_gpu ,
4413
- args = (torch .zeros (2 ).to (1 ), torch .ones (2 ).to (0 ))
4414
- )
4415
- self .assertEqual (rets [0 ].device , torch .device (1 ))
4416
- self .assertEqual (rets [1 ].device , torch .device (0 ))
4417
- self .assertEqual (rets [0 ], (torch .zeros (2 ) + torch .ones (2 )).to (1 ))
4418
- self .assertEqual (rets [1 ], (torch .zeros (2 ) - torch .ones (2 )).to (0 ))
4419
- rpc .shutdown ()
4420
-
4421
- @skip_if_lt_x_gpu (2 )
4422
- def test_device_maps_multi_gpu (self ):
4423
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4424
- self ._test_device_maps_multi_gpu (dst )
4425
-
4426
- @skip_if_lt_x_gpu (2 )
4427
- def test_device_maps_multi_gpu_self (self ):
4428
- dst = worker_name (self .rank )
4429
- self ._test_device_maps_multi_gpu (dst )
4430
-
4431
- @staticmethod
4432
- def _gpu_add_return_to_gpu (x , y ):
4433
- if x .device .type == 'cpu' and y .device .type == 'cpu' :
4434
- return (x + y ).to (0 ), (x - y ).to (1 ), (x * y ).to (2 ), (x / y ).to (3 )
4435
- else :
4436
- raise ValueError ("Wrong device affinity" )
4437
-
4438
- @skip_if_lt_x_gpu (2 )
4439
- def test_device_maps_in_options (self ):
4440
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4441
- options = self .rpc_backend_options
4442
-
4443
- rpc .init_rpc (
4444
- name = worker_name (self .rank ),
4445
- backend = self .rpc_backend ,
4446
- rank = self .rank ,
4447
- world_size = self .world_size ,
4448
- rpc_backend_options = rpc .TensorPipeRpcBackendOptions (
4449
- init_method = options .init_method ,
4450
- num_worker_threads = options .num_worker_threads ,
4451
- device_maps = {dst : {0 : 1 , 1 : 0 }}
4452
- )
4453
- )
4454
-
4455
- rets = rpc .rpc_sync (
4456
- dst ,
4457
- TensorPipeAgentRpcTest ._gpu_add_multi_gpu ,
4458
- args = (torch .zeros (2 ).to (1 ), torch .ones (2 ).to (0 ))
4459
- )
4460
- self .assertEqual (rets [0 ].device , torch .device (1 ))
4461
- self .assertEqual (rets [1 ].device , torch .device (0 ))
4462
- self .assertEqual (rets [0 ], (torch .zeros (2 ) + torch .ones (2 )).to (1 ))
4463
- self .assertEqual (rets [1 ], (torch .zeros (2 ) - torch .ones (2 )).to (0 ))
4464
- rpc .shutdown ()
4465
-
4466
- def _test_device_maps_return_to_gpu (self , dst ):
4467
- options = self .rpc_backend_options
4468
-
4469
- options .set_device_map (dst , {0 : 1 })
4470
- options .set_device_map (dst , {1 : 2 })
4471
- options .set_device_map (dst , {2 : 3 })
4472
- options .set_device_map (dst , {3 : 0 })
4473
-
4474
- rpc .init_rpc (
4475
- name = worker_name (self .rank ),
4476
- backend = self .rpc_backend ,
4477
- rank = self .rank ,
4478
- world_size = self .world_size ,
4479
- rpc_backend_options = options ,
4480
- )
4481
-
4482
- rets = rpc .rpc_sync (
4483
- dst ,
4484
- TensorPipeAgentRpcTest ._gpu_add_return_to_gpu ,
4485
- args = (torch .zeros (2 ), torch .ones (2 ))
4486
- )
4487
- for i in range (len (rets )):
4488
- self .assertEqual (rets [i ].device , torch .device ((3 + i ) % 4 ))
4489
- self .assertEqual (rets [0 ], (torch .zeros (2 ) + torch .ones (2 )).to (3 ))
4490
- self .assertEqual (rets [1 ], (torch .zeros (2 ) - torch .ones (2 )).to (0 ))
4491
- self .assertEqual (rets [2 ], (torch .zeros (2 ) * torch .ones (2 )).to (1 ))
4492
- self .assertEqual (rets [3 ], (torch .zeros (2 ) / torch .ones (2 )).to (2 ))
4493
- rpc .shutdown ()
4494
-
4495
- @skip_if_lt_x_gpu (4 )
4496
- def test_device_maps_return_to_gpu (self ):
4497
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4498
- self ._test_device_maps_return_to_gpu (dst )
4499
-
4500
- @skip_if_lt_x_gpu (4 )
4501
- def test_device_maps_return_to_gpu_self (self ):
4502
- dst = worker_name (self .rank )
4503
- self ._test_device_maps_return_to_gpu (dst )
4504
-
4505
- @staticmethod
4506
- def _add_to_gpu (x , y ):
4507
- return (x + y ).to (0 )
4508
-
4509
- def _test_device_maps_missing_config (self , mode ):
4510
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4511
- errMsg = (
4512
- "TensorPipeAgent only supports CPU tensors by default.*"
4513
- "`set_device_map` on `TensorPipeRpcBackendOptions`"
4514
- )
4515
-
4516
- with self .assertRaisesRegex (RuntimeError , errMsg ):
4517
- if mode == RPCExecMode .SYNC :
4518
- rpc .rpc_sync (dst , torch .add , args = (torch .zeros (2 ).to (0 ), 1 ))
4519
- elif mode == RPCExecMode .REMOTE :
4520
- rpc .remote (dst , torch .add , args = (torch .zeros (2 ).to (0 ), 1 )).to_here ()
4521
- else :
4522
- raise ValueError (f"unexpected mode { mode } " )
4523
-
4524
- # make sure RPC is still functioning
4525
- ret = rpc .rpc_sync (dst , torch .add , args = (torch .ones (2 ), 1 ))
4526
- self .assertEqual (ret , torch .ones (2 ) + 1 )
4527
-
4528
- def _test_device_maps_missing_config_response (self , mode ):
4529
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4530
- errMsg = "Response device mapping is not available"
4531
-
4532
- with self .assertRaisesRegex (RuntimeError , errMsg ):
4533
- if mode == RPCExecMode .SYNC :
4534
- rpc .rpc_sync (
4535
- dst ,
4536
- TensorPipeAgentRpcTest ._add_to_gpu ,
4537
- args = (torch .zeros (2 ), 1 )
4538
- )
4539
- elif mode == RPCExecMode .REMOTE :
4540
- rpc .remote (
4541
- dst ,
4542
- TensorPipeAgentRpcTest ._add_to_gpu ,
4543
- args = (torch .zeros (2 ), 1 )
4544
- ).to_here ()
4545
- else :
4546
- raise ValueError (f"unexpected mode { mode } " )
4547
-
4548
- # make sure RPC is still functioning
4549
- ret = rpc .rpc_sync (dst , torch .add , args = (torch .ones (2 ), 1 ))
4550
- self .assertEqual (ret , torch .ones (2 ) + 1 )
4551
-
4552
- @skip_if_lt_x_gpu (1 )
4553
- @dist_init
4554
- def test_device_maps_missing_config (self ):
4555
- self ._test_device_maps_missing_config (RPCExecMode .SYNC )
4556
-
4557
- @skip_if_lt_x_gpu (1 )
4558
- @dist_init
4559
- def test_device_maps_missing_config_loop (self ):
4560
- for _ in range (self .rpc_backend_options .num_worker_threads + 5 ):
4561
- self ._test_device_maps_missing_config (RPCExecMode .SYNC )
4562
-
4563
- @skip_if_lt_x_gpu (1 )
4564
- @dist_init
4565
- def test_device_maps_missing_config_response (self ):
4566
- self ._test_device_maps_missing_config_response (RPCExecMode .SYNC )
4567
-
4568
- @skip_if_lt_x_gpu (1 )
4569
- @dist_init
4570
- def test_device_maps_missing_config_response_loop (self ):
4571
- for _ in range (self .rpc_backend_options .num_worker_threads + 5 ):
4572
- self ._test_device_maps_missing_config_response (RPCExecMode .SYNC )
4573
-
4574
- @skip_if_lt_x_gpu (1 )
4575
- @dist_init
4576
- def test_device_maps_missing_config_remote (self ):
4577
- self ._test_device_maps_missing_config (RPCExecMode .REMOTE )
4578
-
4579
- @skip_if_lt_x_gpu (1 )
4580
- @dist_init
4581
- def test_device_maps_missing_config_remote_response (self ):
4582
- self ._test_device_maps_missing_config_response (RPCExecMode .REMOTE )
4583
-
4584
- @skip_if_lt_x_gpu (2 )
4585
- def test_device_maps_remote (self ):
4586
- options = self .rpc_backend_options
4587
- dst = worker_name ((self .rank + 1 ) % self .world_size )
4588
- options .set_device_map (dst , {1 : 0 })
4589
-
4590
- rpc .init_rpc (
4591
- name = worker_name (self .rank ),
4592
- backend = self .rpc_backend ,
4593
- rank = self .rank ,
4594
- world_size = self .world_size ,
4595
- rpc_backend_options = options ,
4596
- )
4597
-
4598
- rref = rpc .remote (
4599
- dst ,
4600
- TensorPipeAgentRpcTest ._add_to_gpu ,
4601
- args = (torch .zeros (2 ), 1 )
4602
- )
4603
-
4604
- self .assertEqual (rref .to_here (), torch .ones (2 ).to (1 ))
4605
-
4606
- rpc .shutdown ()
0 commit comments