Skip to content

Commit 26001a2

Browse files
Xinyu Lifacebook-github-bot
Xinyu Li
authored andcommittedSep 24, 2020
Revert D23753711: [pytorch][PR] Add foreach APIs for binary ops with ScalarList
Test Plan: revert-hammer Differential Revision: D23753711 (pytorch@71d1b5b) Original commit changeset: bf3e8c54bc07 fbshipit-source-id: 192692e0d3fff4cade9983db0a1760fedfc9674c
1 parent c79d493 commit 26001a2

15 files changed

+119
-843
lines changed
 

‎aten/src/ATen/native/ForeachOpsKernels.cpp

-24
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,6 @@ std::vector<Tensor> foreach_tensor_##NAME##_scalar_kernel_slow(TensorList tensor
2424
return result; \
2525
}
2626

27-
#define FOREACH_BINARY_OP_SCALARLIST(NAME) \
28-
void foreach_tensor_##NAME##_scalarlist_kernel_slow_(TensorList tensors, at::ArrayRef<double> scalars) { \
29-
check_foreach_api_restrictions(tensors, scalars); \
30-
\
31-
for (int i = 0; i < tensors.size(); i++) { \
32-
tensors[i].NAME##_(scalars[i]); \
33-
} \
34-
} \
35-
\
36-
std::vector<Tensor> foreach_tensor_##NAME##_scalarlist_kernel_slow(TensorList tensors, at::ArrayRef<double> scalars) { \
37-
check_foreach_api_restrictions(tensors, scalars); \
38-
std::vector<Tensor> result; \
39-
result.reserve(tensors.size()); \
40-
for (int i = 0; i < tensors.size(); i++) { \
41-
result.emplace_back(tensors[i].NAME(scalars[i])); \
42-
} \
43-
\
44-
return result; \
45-
}
46-
4727
#define FOREACH_BINARY_OP_LIST(NAME) \
4828
std::vector<Tensor> foreach_tensor_##NAME##_list_kernel_slow(TensorList tensors1, TensorList tensors2) { \
4929
check_foreach_api_restrictions(tensors1, tensors2); \
@@ -137,10 +117,6 @@ FOREACH_BINARY_OP_SCALAR(add);
137117
FOREACH_BINARY_OP_SCALAR(sub);
138118
FOREACH_BINARY_OP_SCALAR(mul);
139119
FOREACH_BINARY_OP_SCALAR(div);
140-
FOREACH_BINARY_OP_SCALARLIST(add);
141-
FOREACH_BINARY_OP_SCALARLIST(sub);
142-
FOREACH_BINARY_OP_SCALARLIST(mul);
143-
FOREACH_BINARY_OP_SCALARLIST(div);
144120
FOREACH_BINARY_OP_LIST(mul);
145121
FOREACH_BINARY_OP_LIST(div);
146122
FOREACH_UNARY_OP(sqrt);

‎aten/src/ATen/native/ForeachUtils.h

-14
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,6 @@ void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2) {
3131
}
3232
}
3333

34-
void check_foreach_api_restrictions(TensorList tensors, ArrayRef<double> scalars) {
35-
TORCH_CHECK(tensors.size() > 0, "Tensor list must have at least one tensor.");
36-
TORCH_CHECK(scalars.size() > 0, "Scalars list must have at least one value.");
37-
TORCH_CHECK(tensors.size() == scalars.size(), "Tensor list must have same number of elements as scalar list.");
38-
}
39-
4034
// To go via 'fast' path, several conditions must be satisfied
4135
// - All tensors must be on the same device
4236
// - All tensors must have strided layout
@@ -138,13 +132,5 @@ bool can_use_fast_route(TensorList tensors) {
138132
return true;
139133
}
140134

141-
bool can_use_fast_route(TensorList tensors, ArrayRef<double> scalars) {
142-
TORCH_CHECK(tensors.size() > 0, "Tensor list must have at least one tensor.");
143-
TORCH_CHECK(scalars.size() > 0, "Scalars list must have at least one value.");
144-
TORCH_CHECK(tensors.size() == scalars.size(), "Tensor list must have same number of elements as scalar list.");
145-
146-
return can_use_fast_route(tensors);
147-
}
148-
149135
}
150136
}} // at::native

‎aten/src/ATen/native/cuda/ForeachBinaryOpScalarList.cu

-60
This file was deleted.

‎aten/src/ATen/native/cuda/ForeachFunctors.cuh

-115
Original file line numberDiff line numberDiff line change
@@ -118,121 +118,6 @@ struct BinaryOpScalarFunctor {
118118
}
119119
};
120120

121-
template<typename T, template<class> class Op>
122-
struct BinaryOpScalarListFunctor_ {
123-
__device__ void operator() (
124-
int chunk_size,
125-
TensorListScalarListMetadata<1>& tl) {
126-
int tensor_loc = tl.block_to_tensor[blockIdx.x];
127-
int chunk_idx = tl.block_to_chunk[blockIdx.x];
128-
int n = tl.sizes[tensor_loc];
129-
130-
T* x = (T*)tl.addresses[0][tensor_loc];
131-
x += chunk_idx * chunk_size;
132-
133-
double y = tl.scalar_vals[tensor_loc];
134-
135-
n -= chunk_idx * chunk_size;
136-
137-
T r_x[kILP];
138-
139-
// to make things simple, we put aligned case in a different code path
140-
if(n % kILP == 0 && chunk_size % kILP == 0 && is_aligned(x)) {
141-
for(int i_start = threadIdx.x; i_start * kILP < n && i_start * kILP < chunk_size; i_start += blockDim.x) {
142-
// load
143-
load_store(r_x, x, 0 , i_start);
144-
#pragma unroll
145-
for(int ii = 0; ii < kILP; ii++) {
146-
r_x[ii] = Op<T>()(static_cast<T>(r_x[ii]), y);
147-
}
148-
// store
149-
load_store(x, r_x, i_start, 0);
150-
}
151-
}
152-
else {
153-
for(int i_start = 0; i_start < n && i_start < chunk_size; i_start += blockDim.x * kILP) {
154-
#pragma unroll
155-
for(int ii = 0; ii < kILP; ii++) {
156-
r_x[ii] = 0;
157-
int i = i_start + threadIdx.x + ii * blockDim.x;
158-
if(i < n && i < chunk_size) {
159-
r_x[ii] = x[i];
160-
}
161-
}
162-
#pragma unroll
163-
for(int ii = 0; ii < kILP; ii++) {
164-
r_x[ii] = Op<T>()(static_cast<T>(r_x[ii]), y);
165-
}
166-
#pragma unroll
167-
for(int ii = 0; ii < kILP; ii++) {
168-
int i = i_start + threadIdx.x + ii * blockDim.x;
169-
if(i < n && i < chunk_size)
170-
x[i] = r_x[ii];
171-
}
172-
}
173-
}
174-
}
175-
};
176-
177-
template<typename T, template<class> class Op>
178-
struct BinaryOpScalarListFunctor {
179-
__device__ void operator() (
180-
int chunk_size,
181-
TensorListScalarListMetadata<2>& tl) {
182-
int tensor_loc = tl.block_to_tensor[blockIdx.x];
183-
int chunk_idx = tl.block_to_chunk[blockIdx.x];
184-
int n = tl.sizes[tensor_loc];
185-
186-
T* x = (T*)tl.addresses[0][tensor_loc];
187-
x += chunk_idx * chunk_size;
188-
189-
T* out = (T*)tl.addresses[1][tensor_loc];
190-
out += chunk_idx * chunk_size;
191-
192-
double y = tl.scalar_vals[tensor_loc];
193-
194-
n -= chunk_idx * chunk_size;
195-
196-
T r_x[kILP];
197-
198-
// to make things simple, we put aligned case in a different code path
199-
if(n % kILP == 0 && chunk_size % kILP == 0 && is_aligned(x) && is_aligned(out)) {
200-
for(int i_start = threadIdx.x; i_start * kILP < n && i_start * kILP < chunk_size; i_start += blockDim.x) {
201-
// load
202-
load_store(r_x, x, 0 , i_start);
203-
#pragma unroll
204-
for(int ii = 0; ii < kILP; ii++) {
205-
r_x[ii] = Op<T>()(static_cast<T>(r_x[ii]), y);
206-
}
207-
// store
208-
load_store(out, r_x, i_start, 0);
209-
}
210-
}
211-
else {
212-
for(int i_start = 0; i_start < n && i_start < chunk_size; i_start += blockDim.x * kILP) {
213-
#pragma unroll
214-
for(int ii = 0; ii < kILP; ii++) {
215-
r_x[ii] = 0;
216-
int i = i_start + threadIdx.x + ii * blockDim.x;
217-
if(i < n && i < chunk_size) {
218-
r_x[ii] = x[i];
219-
}
220-
}
221-
#pragma unroll
222-
for(int ii = 0; ii < kILP; ii++) {
223-
r_x[ii] = Op<T>()(static_cast<T>(r_x[ii]), y);
224-
}
225-
#pragma unroll
226-
for(int ii = 0; ii < kILP; ii++) {
227-
int i = i_start + threadIdx.x + ii * blockDim.x;
228-
if(i < n && i < chunk_size)
229-
out[i] = r_x[ii];
230-
}
231-
}
232-
}
233-
}
234-
};
235-
236121
template<typename T, template<class> class Op>
237122
struct BinaryOpListAlphaFunctor_ {
238123
__device__ void operator() (

‎aten/src/ATen/native/cuda/MultiTensorApply.cuh

-70
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ __device__ __forceinline__ void load_store(T* dst, T* src, int dst_offset, int s
2626
// TensorListMetadata has to be < 4KB - the limit for kernel launch argument
2727
static constexpr int depth_to_max_tensors[5] = {110, 64, 48, 36, 30};
2828
static constexpr int depth_to_max_blocks[5] = {320, 320, 320, 320, 320};
29-
static constexpr int depth_to_max_tensors_scalarlist[5] = {96, 64, 48, 36, 30};
3029

3130
template<int n> struct TensorListMetadata
3231
{
@@ -36,15 +35,6 @@ template<int n> struct TensorListMetadata
3635
int block_to_chunk[depth_to_max_blocks[n-1]];
3736
};
3837

39-
template<int n> struct TensorListScalarListMetadata
40-
{
41-
void* addresses[n][depth_to_max_tensors_scalarlist[n-1]];
42-
int sizes[depth_to_max_tensors_scalarlist[n-1]];
43-
double scalar_vals[depth_to_max_tensors_scalarlist[n-1]];
44-
unsigned char block_to_tensor[depth_to_max_blocks[n-1]];
45-
int block_to_chunk[depth_to_max_blocks[n-1]];
46-
};
47-
4838
template<typename T, typename U, typename... ArgTypes>
4939
C10_LAUNCH_BOUNDS_1(kBlockSize)
5040
__global__ void
@@ -59,71 +49,11 @@ multi_tensor_apply_kernel(
5949
template<int depth, typename T, typename... ArgTypes>
6050
void multi_tensor_apply(
6151
std::vector<std::vector<at::Tensor>>& tensor_lists,
62-
at::ArrayRef<double> scalars,
6352
T callable,
6453
ArgTypes... args) {
6554
TORCH_CHECK(tensor_lists.size() == depth, "Number of tensor lists has to match the depth.");
6655
const cuda::OptionalCUDAGuard device_guard(device_of(tensor_lists[0][0]));
67-
size_t n_tensors = tensor_lists[0].size();
68-
TensorListScalarListMetadata<depth> tensorListMeta;
69-
70-
int loc_block_info = 0;
71-
int loc_tensor_info = 0;
72-
for(size_t t = 0; t < n_tensors; t++) {
73-
74-
tensorListMeta.scalar_vals[loc_tensor_info] = scalars[t];
75-
76-
tensorListMeta.sizes[loc_tensor_info] = tensor_lists[0][t].numel();
77-
for (int d = 0; d < depth; d++) {
78-
tensorListMeta.addresses[d][loc_tensor_info] = tensor_lists[d][t].data_ptr();
79-
}
80-
loc_tensor_info++;
81-
82-
int chunks = (tensor_lists[0][t].numel() + kChunkSize - 1)/kChunkSize;
83-
for (int chunk = 0; chunk < chunks; chunk++) {
84-
tensorListMeta.block_to_tensor[loc_block_info] = loc_tensor_info - 1;
85-
tensorListMeta.block_to_chunk[loc_block_info] = chunk;
86-
loc_block_info++;
87-
88-
bool tensors_full = (loc_tensor_info == depth_to_max_tensors_scalarlist[depth-1] &&
89-
chunk == chunks - 1);
90-
bool blocks_full = (loc_block_info == depth_to_max_blocks[depth-1]);
91-
bool last_chunk = (t == n_tensors - 1 && chunk == chunks - 1);
92-
93-
if (tensors_full || blocks_full || last_chunk) {
94-
multi_tensor_apply_kernel<<<loc_block_info, kBlockSize, 0, at::cuda::getCurrentCUDAStream()>>>(
95-
tensorListMeta,
96-
callable,
97-
args...);
98-
99-
AT_CUDA_CHECK(cudaGetLastError());
100-
101-
// Reset.
102-
loc_block_info = 0;
103-
if(chunk == chunks - 1) {
104-
loc_tensor_info = 0;
105-
}
106-
else {
107-
tensorListMeta.sizes[0] = tensorListMeta.sizes[loc_tensor_info-1];
108-
tensorListMeta.scalar_vals[0] = tensorListMeta.scalar_vals[loc_tensor_info-1];
109-
for(int d = 0; d < depth; d++) {
110-
tensorListMeta.addresses[d][0] = tensorListMeta.addresses[d][loc_tensor_info-1];
111-
}
112-
loc_tensor_info = 1;
113-
}
114-
}
115-
}
116-
}
117-
}
118-
11956

120-
template<int depth, typename T, typename... ArgTypes>
121-
void multi_tensor_apply(
122-
std::vector<std::vector<at::Tensor>>& tensor_lists,
123-
T callable,
124-
ArgTypes... args) {
125-
TORCH_CHECK(tensor_lists.size() == depth, "Number of tensor lists has to match the depth.");
126-
const cuda::OptionalCUDAGuard device_guard(device_of(tensor_lists[0][0]));
12757
size_t n_tensors = tensor_lists[0].size();
12858
TensorListMetadata<depth> tensorListMeta;
12959

‎aten/src/ATen/native/native_functions.yaml

+5-92
Original file line numberDiff line numberDiff line change
@@ -6187,247 +6187,160 @@
61876187
CUDA: foreach_tensor_add_scalar_kernel_cuda
61886188

61896189
- func: _foreach_add_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
6190-
use_c10_dispatcher: full
61916190
device_guard: False
61926191
variants: function
61936192
dispatch:
61946193
CPU: foreach_tensor_add_scalar_kernel_slow_
61956194
CUDA: foreach_tensor_add_scalar_kernel_cuda_
61966195

61976196
- func: _foreach_sub.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
6198-
use_c10_dispatcher: full
61996197
device_guard: False
62006198
variants: function
62016199
dispatch:
62026200
CPU: foreach_tensor_sub_scalar_kernel_slow
62036201
CUDA: foreach_tensor_sub_scalar_kernel_cuda
62046202

62056203
- func: _foreach_sub_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
6206-
use_c10_dispatcher: full
62076204
device_guard: False
62086205
variants: function
62096206
dispatch:
62106207
CPU: foreach_tensor_sub_scalar_kernel_slow_
62116208
CUDA: foreach_tensor_sub_scalar_kernel_cuda_
62126209

62136210
- func: _foreach_mul.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
6214-
use_c10_dispatcher: full
62156211
device_guard: False
62166212
variants: function
62176213
dispatch:
62186214
CPU: foreach_tensor_mul_scalar_kernel_slow
62196215
CUDA: foreach_tensor_mul_scalar_kernel_cuda
62206216

62216217
- func: _foreach_mul_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
6222-
use_c10_dispatcher: full
62236218
device_guard: False
62246219
variants: function
62256220
dispatch:
62266221
CPU: foreach_tensor_mul_scalar_kernel_slow_
62276222
CUDA: foreach_tensor_mul_scalar_kernel_cuda_
62286223

62296224
- func: _foreach_div.Scalar(Tensor[] tensors, Scalar scalar) -> Tensor[]
6230-
use_c10_dispatcher: full
62316225
device_guard: False
62326226
variants: function
62336227
dispatch:
62346228
CPU: foreach_tensor_div_scalar_kernel_slow
62356229
CUDA: foreach_tensor_div_scalar_kernel_cuda
62366230

62376231
- func: _foreach_div_.Scalar(Tensor(a!)[] self, Scalar scalar) -> ()
6238-
use_c10_dispatcher: full
62396232
device_guard: False
62406233
variants: function
62416234
dispatch:
62426235
CPU: foreach_tensor_div_scalar_kernel_slow_
62436236
CUDA: foreach_tensor_div_scalar_kernel_cuda_
62446237

6245-
- func: _foreach_add.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
6246-
use_c10_dispatcher: full
6238+
- func: _foreach_add.List(Tensor[] tensors1, Tensor[] tensors2, Scalar alpha=1) -> Tensor[]
62476239
device_guard: False
62486240
variants: function
62496241
dispatch:
62506242
CPU: foreach_tensor_add_list_kernel_slow
62516243
CUDA: foreach_tensor_add_list_kernel_cuda
62526244

6253-
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
6254-
use_c10_dispatcher: full
6245+
- func: _foreach_add_.List(Tensor(a!)[] self, Tensor[] other, Scalar alpha=1) -> ()
62556246
device_guard: False
62566247
variants: function
62576248
dispatch:
62586249
CPU: foreach_tensor_add_list_kernel_slow_
62596250
CUDA: foreach_tensor_add_list_kernel_cuda_
62606251

6261-
- func: _foreach_sub.List(Tensor[] tensors1, Tensor[] tensors2, *, Scalar alpha=1) -> Tensor[]
6262-
use_c10_dispatcher: full
6252+
- func: _foreach_sub.List(Tensor[] tensors1, Tensor[] tensors2, Scalar alpha=1) -> Tensor[]
62636253
device_guard: False
62646254
variants: function
62656255
dispatch:
62666256
CPU: foreach_tensor_sub_list_kernel_slow
62676257
CUDA: foreach_tensor_sub_list_kernel_cuda
62686258

6269-
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, *, Scalar alpha=1) -> ()
6270-
use_c10_dispatcher: full
6259+
- func: _foreach_sub_.List(Tensor(a!)[] self, Tensor[] other, Scalar alpha=1) -> ()
62716260
device_guard: False
62726261
variants: function
62736262
dispatch:
62746263
CPU: foreach_tensor_sub_list_kernel_slow_
62756264
CUDA: foreach_tensor_sub_list_kernel_cuda_
62766265

62776266
- func: _foreach_mul.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
6278-
use_c10_dispatcher: full
62796267
device_guard: False
62806268
variants: function
62816269
dispatch:
62826270
CPU: foreach_tensor_mul_list_kernel_slow
62836271
CUDA: foreach_tensor_mul_list_kernel_cuda
62846272

62856273
- func: _foreach_mul_.List(Tensor(a!)[] self, Tensor[] other) -> ()
6286-
use_c10_dispatcher: full
62876274
device_guard: False
62886275
variants: function
62896276
dispatch:
62906277
CPU: foreach_tensor_mul_list_kernel_slow_
62916278
CUDA: foreach_tensor_mul_list_kernel_cuda_
62926279

6293-
- func: _foreach_div.List(Tensor[] tensors1, Tensor[] tensors2) -> Tensor[]
6294-
use_c10_dispatcher: full
6280+
- func: _foreach_div.List(Tensor(a!)[] self, Tensor[] other) -> Tensor[]
62956281
device_guard: False
62966282
variants: function
62976283
dispatch:
62986284
CPU: foreach_tensor_div_list_kernel_slow
62996285
CUDA: foreach_tensor_div_list_kernel_cuda
63006286

63016287
- func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> ()
6302-
use_c10_dispatcher: full
63036288
device_guard: False
63046289
variants: function
63056290
dispatch:
63066291
CPU: foreach_tensor_div_list_kernel_slow_
63076292
CUDA: foreach_tensor_div_list_kernel_cuda_
63086293

6309-
- func: _foreach_add.ScalarList(Tensor[] tensors, float[] scalars) -> Tensor[]
6310-
use_c10_dispatcher: full
6311-
device_guard: False
6312-
variants: function
6313-
dispatch:
6314-
CPU: foreach_tensor_add_scalarlist_kernel_slow
6315-
CUDA: foreach_tensor_add_scalarlist_kernel_cuda
6316-
6317-
- func: _foreach_add_.ScalarList(Tensor(a!)[] self, float[] scalars) -> ()
6318-
use_c10_dispatcher: full
6319-
device_guard: False
6320-
variants: function
6321-
dispatch:
6322-
CPU: foreach_tensor_add_scalarlist_kernel_slow_
6323-
CUDA: foreach_tensor_add_scalarlist_kernel_cuda_
6324-
6325-
- func: _foreach_sub.ScalarList(Tensor[] tensors, float[] scalars) -> Tensor[]
6326-
use_c10_dispatcher: full
6327-
device_guard: False
6328-
variants: function
6329-
dispatch:
6330-
CPU: foreach_tensor_sub_scalarlist_kernel_slow
6331-
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda
6332-
6333-
- func: _foreach_sub_.ScalarList(Tensor(a!)[] self, float[] scalars) -> ()
6334-
use_c10_dispatcher: full
6335-
device_guard: False
6336-
variants: function
6337-
dispatch:
6338-
CPU: foreach_tensor_sub_scalarlist_kernel_slow_
6339-
CUDA: foreach_tensor_sub_scalarlist_kernel_cuda_
6340-
6341-
- func: _foreach_div.ScalarList(Tensor[] tensors, float[] scalars) -> Tensor[]
6342-
use_c10_dispatcher: full
6343-
device_guard: False
6344-
variants: function
6345-
dispatch:
6346-
CPU: foreach_tensor_div_scalarlist_kernel_slow
6347-
CUDA: foreach_tensor_div_scalarlist_kernel_cuda
6348-
6349-
- func: _foreach_div_.ScalarList(Tensor(a!)[] self, float[] scalars) -> ()
6350-
use_c10_dispatcher: full
6351-
device_guard: False
6352-
variants: function
6353-
dispatch:
6354-
CPU: foreach_tensor_div_scalarlist_kernel_slow_
6355-
CUDA: foreach_tensor_div_scalarlist_kernel_cuda_
6356-
6357-
- func: _foreach_mul.ScalarList(Tensor[] tensors, float[] scalars) -> Tensor[]
6358-
use_c10_dispatcher: full
6359-
device_guard: False
6360-
variants: function
6361-
dispatch:
6362-
CPU: foreach_tensor_mul_scalarlist_kernel_slow
6363-
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda
6364-
6365-
- func: _foreach_mul_.ScalarList(Tensor(a!)[] self, float[] scalars) -> ()
6366-
use_c10_dispatcher: full
6367-
device_guard: False
6368-
variants: function
6369-
dispatch:
6370-
CPU: foreach_tensor_mul_scalarlist_kernel_slow_
6371-
CUDA: foreach_tensor_mul_scalarlist_kernel_cuda_
6372-
63736294
- func: _foreach_exp(Tensor[] tensors) -> Tensor[]
6374-
use_c10_dispatcher: full
63756295
device_guard: False
63766296
variants: function
63776297
dispatch:
63786298
CPU: foreach_tensor_exp_slow
63796299
CUDA: foreach_tensor_exp_cuda
63806300

63816301
- func: _foreach_exp_(Tensor(a!)[] self) -> ()
6382-
use_c10_dispatcher: full
63836302
device_guard: False
63846303
variants: function
63856304
dispatch:
63866305
CPU: foreach_tensor_exp_slow_
63876306
CUDA: foreach_tensor_exp_cuda_
63886307

63896308
- func: _foreach_sqrt(Tensor[] tensors) -> Tensor[]
6390-
use_c10_dispatcher: full
63916309
device_guard: False
63926310
variants: function
63936311
dispatch:
63946312
CPU: foreach_tensor_sqrt_slow
63956313
CUDA: foreach_tensor_sqrt_cuda
63966314

63976315
- func: _foreach_sqrt_(Tensor(a!)[] self) -> ()
6398-
use_c10_dispatcher: full
63996316
device_guard: False
64006317
variants: function
64016318
dispatch:
64026319
CPU: foreach_tensor_sqrt_slow_
64036320
CUDA: foreach_tensor_sqrt_cuda_
64046321

64056322
- func: _foreach_addcdiv_(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
6406-
use_c10_dispatcher: full
64076323
device_guard: False
64086324
variants: function
64096325
dispatch:
64106326
CPU: foreach_tensor_addcdiv_slow_
64116327
CUDA: foreach_tensor_addcdiv_cuda_
64126328

64136329
- func: _foreach_addcmul_(Tensor(a!)[] self, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> ()
6414-
use_c10_dispatcher: full
64156330
device_guard: False
64166331
variants: function
64176332
dispatch:
64186333
CPU: foreach_tensor_addcmul_slow_
64196334
CUDA: foreach_tensor_addcmul_cuda_
64206335

64216336
- func: _foreach_addcdiv(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
6422-
use_c10_dispatcher: full
64236337
device_guard: False
64246338
variants: function
64256339
dispatch:
64266340
CPU: foreach_tensor_addcdiv_slow
64276341
CUDA: foreach_tensor_addcdiv_cuda
64286342

64296343
- func: _foreach_addcmul(Tensor[] input, Tensor[] tensor1, Tensor[] tensor2, Scalar value=1) -> Tensor[]
6430-
use_c10_dispatcher: full
64316344
device_guard: False
64326345
variants: function
64336346
dispatch:

‎test/backward_compatibility/check_backward_compatibility.py

-4
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,6 @@
9999
("preprocess", datetime.date(2020, 10, 1)),
100100
("compile", datetime.date(2020, 10, 1)),
101101
("execute", datetime.date(2020, 10, 1)),
102-
("aten::_foreach_add", datetime.date(2020, 10, 1)),
103-
("aten::_foreach_sub_", datetime.date(2020, 10, 1)),
104-
("aten::_foreach_div", datetime.date(2020, 10, 1)),
105-
("aten::_foreach_sub", datetime.date(2020, 10, 1)),
106102
]
107103

108104

‎test/test_foreach.py

+108-421
Large diffs are not rendered by default.

‎test/test_native_functions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def fake_module(values, const):
5858
self.do_test_optional_floatlist_with_module(fake_module)
5959

6060
def test_optional_floatlist_invalid(self):
61-
with self.assertRaisesRegex(TypeError, "must be tuple of floats, not list"):
61+
with self.assertRaisesRegex(TypeError, "must be .* but found"):
6262
FloatListWrapperModule()(torch.zeros(1), ["hi"])
6363

6464
with self.assertRaisesRegex(RuntimeError, "value of type .* instead found type"):

‎tools/autograd/gen_python_functions.py

-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,6 @@ def create_python_bindings(python_functions, is_python_method, module):
281281
'c10::optional<bool>': 'toBoolOptional',
282282
'c10::optional<double>': 'toDoubleOptional',
283283
'c10::optional<ArrayRef<double>>': 'doublelistOptional',
284-
'ArrayRef<double>': 'doublelist',
285284
'IntArrayRef': 'intlist',
286285
'Scalar': 'scalar',
287286
'ScalarType': 'scalartype',

‎tools/autograd/templates/python_torch_functions.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ using at::Generator;
4444
using at::TensorList;
4545
using at::Dimname;
4646
using at::DimnameList;
47-
using at::ArrayRef;
4847

4948
using namespace torch::autograd::utils;
5049

‎tools/codegen/model.py

-4
Original file line numberDiff line numberDiff line change
@@ -304,10 +304,6 @@ def __post_init__(self) -> None:
304304
# TODO: fixme
305305
if str(self.name) not in [
306306
'_amp_non_finite_check_and_unscale_',
307-
'_foreach_add_.ScalarList',
308-
'_foreach_sub_.ScalarList',
309-
'_foreach_mul_.ScalarList',
310-
'_foreach_div_.ScalarList',
311307
'_foreach_add_.Scalar',
312308
'_foreach_sub_.Scalar',
313309
'_foreach_mul_.Scalar',

‎tools/pyi/gen_pyi.py

-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ def type_to_python(typename, size=None):
146146
'Dimname': 'Union[str, ellipsis, None]',
147147
'DimnameList': 'Sequence[Union[str, ellipsis, None]]',
148148
'QScheme': '_qscheme',
149-
'ArrayRef<double>' : 'Sequence[float]'
150149
}[typename]
151150

152151
return typename

‎torch/csrc/utils/python_arg_parser.cpp

+1-21
Original file line numberDiff line numberDiff line change
@@ -366,23 +366,6 @@ bool is_tensor_list_and_append_overloaded(PyObject* obj, std::vector<py::handle>
366366
return true;
367367
}
368368

369-
bool is_float_list(PyObject* obj) {
370-
auto tuple = six::isTuple(obj);
371-
if (!(tuple || PyList_Check(obj))) {
372-
return false;
373-
}
374-
375-
auto size = tuple ? PyTuple_GET_SIZE(obj) : PyList_GET_SIZE(obj);
376-
if (size > 0) {
377-
PyObject* iobj = tuple ? PyTuple_GET_ITEM(obj, 0) : PyList_GET_ITEM(obj, 0);
378-
if (!THPUtils_checkDouble(iobj) && !PyComplex_Check(iobj)) {
379-
return false;
380-
}
381-
}
382-
383-
return true;
384-
}
385-
386369
// argnum is needed for raising the TypeError, it's used in the error message.
387370
auto FunctionParameter::check(PyObject* obj, std::vector<py::handle> &overloaded_args, int argnum) -> bool
388371
{
@@ -437,9 +420,7 @@ auto FunctionParameter::check(PyObject* obj, std::vector<py::handle> &overloaded
437420
// if a size is specified (e.g. IntArrayRef[2]) we also allow passing a single int
438421
return size > 0 && THPUtils_checkLong(obj);
439422
}
440-
case ParameterType::FLOAT_LIST: {
441-
return is_float_list(obj);
442-
}
423+
case ParameterType::FLOAT_LIST: return (PyTuple_Check(obj) || PyList_Check(obj));
443424
case ParameterType::GENERATOR: return THPGenerator_Check(obj);
444425
case ParameterType::BOOL: return PyBool_Check(obj);
445426
case ParameterType::STORAGE: return isStorage(obj);
@@ -920,7 +901,6 @@ PythonArgs PythonArgParser::raw_parse(PyObject* self, PyObject* args, PyObject*
920901
print_error(self, args, kwargs, parsed_args);
921902
}
922903

923-
924904
void PythonArgParser::print_error(PyObject* self, PyObject* args, PyObject* kwargs, PyObject* parsed_args[]) { // NOLINT
925905
auto num_args = PyTuple_GET_SIZE(args) + (kwargs ? PyDict_Size(kwargs) : 0);
926906
std::vector<int> plausible_idxs;

‎torch/csrc/utils/python_arg_parser.h

+4-14
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,6 @@ struct PythonArgs {
173173
inline c10::optional<bool> toBoolOptional(int i);
174174
inline c10::optional<double> toDoubleOptional(int i);
175175
inline c10::OptionalArray<double> doublelistOptional(int i);
176-
inline std::vector<double> doublelist(int i);
177-
inline std::vector<double> getDoublelist(int i);
178176
inline at::Layout layout(int i);
179177
inline at::Layout layoutWithDefault(int i, at::Layout default_layout);
180178
inline c10::optional<at::Layout> layoutOptional(int i);
@@ -371,7 +369,10 @@ inline c10::OptionalArray<int64_t> PythonArgs::intlistOptional(int i) {
371369
return intlist(i);
372370
}
373371

374-
inline std::vector<double> PythonArgs::getDoublelist(int i) {
372+
inline c10::OptionalArray<double> PythonArgs::doublelistOptional(int i) {
373+
if (!args[i]) {
374+
return {};
375+
}
375376
PyObject* arg = args[i];
376377
auto tuple = PyTuple_Check(arg);
377378
auto size = tuple ? PyTuple_GET_SIZE(arg) : PyList_GET_SIZE(arg);
@@ -389,17 +390,6 @@ inline std::vector<double> PythonArgs::getDoublelist(int i) {
389390
return res;
390391
}
391392

392-
inline c10::OptionalArray<double> PythonArgs::doublelistOptional(int i) {
393-
if (!args[i]) {
394-
return {};
395-
}
396-
return this->getDoublelist(i);
397-
}
398-
399-
inline std::vector<double> PythonArgs::doublelist(int i) {
400-
return this->getDoublelist(i);
401-
}
402-
403393
inline at::ScalarType PythonArgs::scalartypeWithDefault(int i, at::ScalarType default_scalartype) {
404394
if (!args[i]) return default_scalartype;
405395
return scalartype(i);

0 commit comments

Comments
 (0)
Please sign in to comment.