@@ -178,120 +178,6 @@ Tensor& addmm__cuda(Tensor& self, const Tensor& mat1, const Tensor& mat2,
178
178
return self;
179
179
}
180
180
181
- template <typename scalar_t >
182
- void addr_impl_ger_cuda (Tensor &out, const Tensor &self,
183
- const Tensor& vec1, const Tensor& vec2,
184
- scalar_t alpha, scalar_t beta) {
185
- static_assert (std::is_same<scalar_t , float >::value ||
186
- std::is_same<scalar_t , double >::value,
187
- " addr_impl_ger_cuda: only float and double are supported" );
188
- if (&out != &self) {
189
- at::native::resize_as_ (out, self);
190
- at::native::copy_ (out, self);
191
- }
192
- if (beta == 0.0 ) {
193
- at::native::zero_ (out);
194
- }
195
- if (beta != 1.0 ) {
196
- at::native::mul_ (out, beta);
197
- }
198
- if (out.stride (0 ) == 1 ) {
199
- at::cuda::blas::ger<scalar_t >(
200
- vec1.size (0 ), vec2.size (0 ), alpha,
201
- vec1.data_ptr <scalar_t >(), vec1.stride (0 ),
202
- vec2.data_ptr <scalar_t >(), vec2.stride (0 ),
203
- out.data_ptr <scalar_t >(), out.stride (1 )
204
- );
205
- } else if (out.stride (1 ) == 1 ) {
206
- at::cuda::blas::ger<scalar_t >(
207
- vec2.size (0 ), vec1.size (0 ), alpha,
208
- vec2.data_ptr <scalar_t >(), vec2.stride (0 ),
209
- vec1.data_ptr <scalar_t >(), vec1.stride (0 ),
210
- out.data_ptr <scalar_t >(), out.stride (0 )
211
- );
212
- } else {
213
- Tensor cr = out.clone ();
214
- at::cuda::blas::ger<scalar_t >(
215
- vec2.size (0 ), vec1.size (0 ), alpha,
216
- vec2.data_ptr <scalar_t >(), vec2.stride (0 ),
217
- vec1.data_ptr <scalar_t >(), vec1.stride (0 ),
218
- out.data_ptr <scalar_t >(), out.stride (0 )
219
- );
220
- out.set_ (cr);
221
- }
222
- }
223
-
224
- template <typename scalar_t >
225
- void addr_impl_cuda (Tensor &out, const Tensor &self,
226
- const Tensor& vec1, const Tensor& vec2,
227
- scalar_t alpha, scalar_t beta) {
228
- // currently no Hger/SgerEx in Cublas.
229
- Tensor vec2T = vec2.reshape ({1 , vec2.size (0 )});
230
- Tensor vec1M = vec1.reshape ({vec1.size (0 ), 1 });
231
- addmm_out_cuda (out, self, vec1M, vec2T, beta, alpha);
232
- }
233
- template <>
234
- void addr_impl_cuda<float >(Tensor &out, const Tensor &self,
235
- const Tensor& vec1, const Tensor& vec2,
236
- float alpha, float beta) {
237
- addr_impl_ger_cuda<float >(out, self, vec1, vec2, alpha, beta);
238
- }
239
- template <>
240
- void addr_impl_cuda<double >(Tensor &out, const Tensor &self,
241
- const Tensor& vec1, const Tensor& vec2,
242
- double alpha, double beta) {
243
- addr_impl_ger_cuda<double >(out, self, vec1, vec2, alpha, beta);
244
- }
245
-
246
- Tensor& addr_out_cuda (Tensor &out, const Tensor& self,
247
- const Tensor& vec1, const Tensor& vec2,
248
- Scalar beta, Scalar alpha) {
249
- TORCH_CHECK (vec1.dim () == 1 && vec2.dim () == 1 ,
250
- " vec1 and vec2 should be 1-dimensional vectors. Got dimensions " ,
251
- vec1.dim (), " and " , vec2.dim ());
252
-
253
- Tensor self_;
254
- if (&out != &self) {
255
- std::tie (self_) = expand_size (self, {vec1.size (0 ), vec2.size (0 )}, " addr" );
256
- } else {
257
- self_ = self;
258
- }
259
-
260
- TORCH_CHECK (out.device () == self_.device () &&
261
- out.device () == vec1.device () &&
262
- out.device () == vec2.device (),
263
- " Expected all tensors to be on the same device. Found: " ,
264
- out.device (), " , " , self_.device (), " , " ,
265
- vec1.device (), " and " , vec2.device ());
266
- TORCH_CHECK (self_.dim () == 2 ,
267
- " 2D tensor expected, got " , self_.dim (), " D tensor for input" );
268
- TORCH_CHECK (self_.size (0 ) == vec1.size (0 ) && self_.size (1 ) == vec2.size (0 ),
269
- " size mismatch" ,
270
- " , input: " , self_.sizes (),
271
- " , v1: " , vec1.sizes (),
272
- " , v2: " , vec2.sizes ());
273
- AT_DISPATCH_FLOATING_TYPES_AND2 (kBFloat16 , kHalf , self_.scalar_type (), " addr_out_cuda" , [&] {
274
- addr_impl_cuda<scalar_t >(out, self_, vec1, vec2,
275
- alpha.to <scalar_t >(), beta.to <scalar_t >());
276
- });
277
- return out;
278
- }
279
-
280
- Tensor& addr__cuda (Tensor& self,
281
- const Tensor& vec1, const Tensor& vec2,
282
- Scalar beta, Scalar alpha) {
283
- addr_out_cuda (self, self, vec1, vec2, beta, alpha);
284
- return self;
285
- }
286
-
287
- Tensor addr_cuda (const Tensor& self,
288
- const Tensor& vec1, const Tensor& vec2,
289
- Scalar beta, Scalar alpha) {
290
- Tensor out = at::empty ({0 }, self.options ());
291
- addr_out_cuda (out, self, vec1, vec2, beta, alpha);
292
- return out;
293
- }
294
-
295
181
Tensor& addbmm_out_cuda (Tensor& out, const Tensor& self,
296
182
const Tensor& batch1, const Tensor& batch2,
297
183
Scalar beta, Scalar alpha) {
0 commit comments