Skip to content

Commit 6248266

Browse files
Sandeep Kumarfacebook-github-bot
Sandeep Kumar
authored andcommittedMar 12, 2019
Enable detectron on AMD GPU
Summary: Pull Request resolved: pytorch#17862 Differential Revision: D14429234 Pulled By: bddppq fbshipit-source-id: 5cb8750bd9db0ff8a179977d2bfbb180265cce81
1 parent 1cfb503 commit 6248266

15 files changed

+45
-32
lines changed
 

‎modules/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
project(modules CXX C)
12
add_subdirectory(detectron)
23
add_subdirectory(module_test)
34
add_subdirectory(observers)

‎modules/detectron/CMakeLists.txt

+11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
file(GLOB Detectron_CPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
22
file(GLOB Detectron_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/*.cu)
3+
file(GLOB_RECURSE Detectron_HIP_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/*.hip)
34

45
if (BUILD_CAFFE2_OPS)
56
if (USE_OPENMP AND OPENMP_FOUND)
@@ -19,6 +20,16 @@ if (BUILD_CAFFE2_OPS)
1920
if (MSVC)
2021
install(FILES $<TARGET_PDB_FILE:caffe2_detectron_ops_gpu> DESTINATION lib OPTIONAL)
2122
endif()
23+
elseif(USE_ROCM)
24+
hip_include_directories(${Caffe2_HIP_INCLUDES})
25+
set_source_files_properties(${Detectron_HIP_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
26+
HIP_ADD_LIBRARY(
27+
caffe2_detectron_ops_hip SHARED
28+
${Detectron_CPU_SRCS}
29+
${Detectron_HIP_SRCS})
30+
target_compile_options(caffe2_detectron_ops_hip PRIVATE ${HIP_CXX_FLAGS})
31+
target_link_libraries(caffe2_detectron_ops_hip caffe2_hip)
32+
install(TARGETS caffe2_detectron_ops_hip DESTINATION lib)
2233
elseif(NOT IOS_PLATFORM)
2334
add_library(caffe2_detectron_ops SHARED ${Detectron_CPU_SRCS})
2435
target_link_libraries(caffe2_detectron_ops caffe2 ${OpenMP_link})

‎modules/detectron/batch_permutation_op.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* limitations under the License.
1515
*/
1616

17-
#include "batch_permutation_op.h"
17+
#include "modules/detectron/batch_permutation_op.h"
1818
#include "caffe2/core/context_gpu.h"
1919

2020
namespace caffe2 {

‎modules/detectron/group_spatial_softmax_op.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include <cfloat>
1818

1919
#include "caffe2/core/context_gpu.h"
20-
#include "group_spatial_softmax_op.h"
20+
#include "modules/detectron/group_spatial_softmax_op.h"
2121

2222
namespace caffe2 {
2323

‎modules/detectron/ps_roi_pool_op.cu

+5-5
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
#include <cfloat>
7474

7575
#include "caffe2/core/context_gpu.h"
76-
#include "ps_roi_pool_op.h"
76+
#include "modules/detectron/ps_roi_pool_op.h"
7777

7878
namespace caffe2 {
7979

@@ -123,8 +123,8 @@ __global__ void PSRoIPoolForward(
123123
roundf(offset_bottom_rois[4]) + 1.) * spatial_scale;
124124

125125
// Force too small ROIs to be 1x1
126-
T roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0
127-
T roi_height = max(roi_end_h - roi_start_h, 0.1);
126+
T roi_width = c10::cuda::compat::max(roi_end_w - roi_start_w, static_cast<T>(0.1)); // avoid 0
127+
T roi_height = c10::cuda::compat::max(roi_end_h - roi_start_h, static_cast<T>(0.1));
128128

129129
// Compute w and h at bottom
130130
T bin_size_h = roi_height / static_cast<T>(pooled_height);
@@ -200,8 +200,8 @@ __global__ void PSRoIPoolBackward(
200200
roundf(offset_bottom_rois[4]) + 1.) * spatial_scale;
201201

202202
// Force too small ROIs to be 1x1
203-
T roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0
204-
T roi_height = max(roi_end_h - roi_start_h, 0.1);
203+
T roi_width = c10::cuda::compat::max(roi_end_w - roi_start_w, static_cast<T>(0.1)); //avoid 0
204+
T roi_height = c10::cuda::compat::max(roi_end_h - roi_start_h, static_cast<T>(0.1));
205205

206206
// Compute w and h at bottom
207207
T bin_size_h = roi_height / static_cast<T>(pooled_height);

‎modules/detectron/roi_pool_f_op.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include <cfloat>
1818

1919
#include "caffe2/core/context_gpu.h"
20-
#include "roi_pool_f_op.h"
20+
#include "modules/detectron/roi_pool_f_op.h"
2121

2222
namespace caffe2 {
2323

‎modules/detectron/sample_as_op.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Y's output samples are the samples of X for which L > 0.
2121
#include <cfloat>
2222

2323
#include "caffe2/core/context_gpu.h"
24-
#include "sample_as_op.h"
24+
#include "modules/detectron/sample_as_op.h"
2525

2626
#include <stdio.h>
2727

‎modules/detectron/select_smooth_l1_loss_op.cu

+7-7
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*/
1616

1717
#include "caffe2/core/context_gpu.h"
18-
#include "select_smooth_l1_loss_op.h"
18+
#include "modules/detectron/select_smooth_l1_loss_op.h"
1919

2020
namespace caffe2 {
2121

@@ -38,11 +38,11 @@ __global__ void SelectSmoothL1Kernel(
3838
float y_hat = Y_hat[ind];
3939
float y = Y[i * 4 + j];
4040
float val = y_hat - y;
41-
float abs_val = abs(val);
41+
float abs_val = c10::cuda::compat::abs(val);
4242
if (abs_val < beta) {
43-
out[ind] = (0.5 * val * val / beta) / max(S[0], 1.0);
43+
out[ind] = (0.5 * val * val / beta) / c10::cuda::compat::max(S[0], static_cast<float>(1.0));
4444
} else {
45-
out[ind] = (abs_val - 0.5 * beta) / max(S[0], 1.0);
45+
out[ind] = (abs_val - 0.5 * beta) / c10::cuda::compat::max(S[0], static_cast<float>(1.0));
4646
}
4747
}
4848
}
@@ -75,11 +75,11 @@ __global__ void SelectSmoothL1GradientKernel(
7575
float y_hat = Y_hat[ind];
7676
float y = Y[i * 4 + j];
7777
float val = y_hat - y;
78-
float abs_val = abs(val);
78+
float abs_val = c10::cuda::compat::abs(val);
7979
if (abs_val < beta) {
80-
out[ind] = norm * d_loss * val / beta / max(S[0], 1.0);
80+
out[ind] = norm * d_loss * val / beta / c10::cuda::compat::max(S[0], static_cast<float>(1.0));
8181
} else {
82-
out[ind] = norm * d_loss * ((float(0) < val) - (val < float(0))) / max(S[0], 1.0);
82+
out[ind] = norm * d_loss * ((float(0) < val) - (val < float(0))) / c10::cuda::compat::max(S[0], static_cast<float>(1.0));
8383
}
8484
}
8585
}

‎modules/detectron/sigmoid_cross_entropy_loss_op.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*/
1616

1717
#include "caffe2/core/context_gpu.h"
18-
#include "sigmoid_cross_entropy_loss_op.h"
18+
#include "modules/detectron/sigmoid_cross_entropy_loss_op.h"
1919

2020
namespace caffe2 {
2121

‎modules/detectron/sigmoid_focal_loss_op.cu

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include <cfloat>
1818

1919
#include "caffe2/core/context_gpu.h"
20-
#include "sigmoid_focal_loss_op.h"
20+
#include "modules/detectron/sigmoid_focal_loss_op.h"
2121

2222
namespace caffe2 {
2323

@@ -45,15 +45,15 @@ __global__ void SigmoidFocalLossKernel(
4545
float c1 = (t == (d + 1));
4646
float c2 = (t != -1 & t != (d + 1));
4747

48-
float Np = max(weight_pos[0], 1.0);
48+
float Np = c10::cuda::compat::max(weight_pos[0], static_cast<float>(1.0));
4949
float zn = (1.0 - alpha) / Np;
5050
float zp = alpha / Np;
5151

5252
// p = 1. / 1. + expf(-x)
5353
float p = 1. / (1. + expf(-logits[i]));
5454

5555
// (1 - p)**gamma * log(p) where
56-
float term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
56+
float term1 = powf((1. - p), gamma) * logf(c10::cuda::compat::max(p, FLT_MIN));
5757
// p**gamma * log(1 - p)
5858
float term2 =
5959
powf(p, gamma) *
@@ -82,7 +82,7 @@ __global__ void SigmoidFocalLossGradientKernel(
8282
int a = c / num_classes; // current anchor
8383
int d = c % num_classes; // current class
8484

85-
float Np = max(weight_pos[0], 1.0);
85+
float Np = c10::cuda::compat::max(weight_pos[0], static_cast<float>(1.0));
8686
float zn = (1.0 - alpha) / Np;
8787
float zp = alpha / Np;
8888
int t = targets[n * (H * W * A) + a * (H * W) + y * W + x];
@@ -94,7 +94,7 @@ __global__ void SigmoidFocalLossGradientKernel(
9494
// (1-p)**g * (1 - p - g*p*log(p))
9595
float term1 =
9696
powf((1. - p), gamma) *
97-
(1. - p - (p * gamma * logf(max(p, FLT_MIN))));
97+
(1. - p - (p * gamma * logf(c10::cuda::compat::max(p, FLT_MIN))));
9898
// (p**g) * (g*(1-p)*log(1-p) - p)
9999
float term2 =
100100
powf(p, gamma) *

‎modules/detectron/smooth_l1_loss_op.cu

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*/
1616

1717
#include "caffe2/core/context_gpu.h"
18-
#include "smooth_l1_loss_op.h"
18+
#include "modules/detectron/smooth_l1_loss_op.h"
1919

2020
namespace caffe2 {
2121

@@ -27,7 +27,7 @@ __global__ void SmoothL1Kernel(
2727
// |x| - 0.5 * beta otherwise
2828
CUDA_1D_KERNEL_LOOP(index, n) {
2929
T val = in[index];
30-
T abs_val = abs(val);
30+
T abs_val = c10::cuda::compat::abs(val);
3131
if (abs_val < beta) {
3232
out[index] = 0.5 * val * val / beta;
3333
} else {
@@ -49,7 +49,7 @@ __global__ void SmoothL1GradientKernel(
4949
// We also scale by norm * d_loss in this kernel for convenience
5050
CUDA_1D_KERNEL_LOOP(index, n) {
5151
T val = in[index];
52-
T abs_val = abs(val);
52+
T abs_val = c10::cuda::compat::abs(val);
5353
T d_loss = *d_loss_data;
5454
if (abs_val < beta) {
5555
out[index] = norm * d_loss * val / beta;

‎modules/detectron/softmax_focal_loss_op.cu

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include <cfloat>
1818

1919
#include "caffe2/core/context_gpu.h"
20-
#include "softmax_focal_loss_op.h"
20+
#include "modules/detectron/softmax_focal_loss_op.h"
2121

2222
namespace caffe2 {
2323

@@ -69,7 +69,7 @@ __global__ void SoftmaxFocalLossKernel(
6969
int n = i / (W * H * A);
7070
const int label = static_cast<int>(targets[i]);
7171

72-
float Np = max(weight_pos[0], 1.0);
72+
float Np = c10::cuda::compat::max(weight_pos[0], static_cast<float>(1.0));
7373
float z = (label == 0) * (1 - alpha) / Np +
7474
(label >= 1) * alpha / Np;
7575

@@ -79,7 +79,7 @@ __global__ void SoftmaxFocalLossKernel(
7979
int idx = n * (H * W * D) + (offset + label) * (H * W) + y * W + x;
8080
losses[i] =
8181
-(pow(1.0f - Pdata[idx], gamma) *
82-
log(max(Pdata[idx], FLT_MIN))) * z;
82+
log(c10::cuda::compat::max(Pdata[idx], FLT_MIN))) * z;
8383
}
8484
}
8585
}
@@ -97,7 +97,7 @@ __global__ void SoftmaxFocalLossGradientWeightKernel(
9797
int a = (i / (W * H)) % A;
9898
int n = i / (W * H * A);
9999
const int label = static_cast<int>(targets[i]);
100-
float Np = max(weight_pos[0], 1.0);
100+
float Np = c10::cuda::compat::max(weight_pos[0], static_cast<float>(1.0));
101101
float z = (label == 0) * (1 - alpha) / Np +
102102
(label >= 1) * alpha / Np;
103103

@@ -109,7 +109,7 @@ __global__ void SoftmaxFocalLossGradientWeightKernel(
109109
float p = Pdata[idx];
110110
buff[i] =
111111
(-pow(onemp, gamma) +
112-
gamma * pow(onemp, gamma - 1) * p * log(max(p, FLT_MIN))) * z;
112+
gamma * pow(onemp, gamma - 1) * p * log(c10::cuda::compat::max(p, FLT_MIN))) * z;
113113
}
114114
}
115115
}

‎modules/detectron/spatial_narrow_as_op.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#include "caffe2/core/context_gpu.h"
1818
#include "caffe2/core/operator.h"
19-
#include "spatial_narrow_as_op.h"
19+
#include "modules/detectron/spatial_narrow_as_op.h"
2020

2121
namespace caffe2 {
2222

‎modules/detectron/upsample_nearest_op.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858

5959

6060
#include "caffe2/core/context_gpu.h"
61-
#include "upsample_nearest_op.h"
61+
#include "modules/detectron/upsample_nearest_op.h"
6262

6363
namespace caffe2 {
6464

‎tools/amd_build/build_amd.py

+1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
"caffe2/utils/*",
6464
"c10/cuda/*",
6565
"c10/cuda/test/CMakeLists.txt",
66+
"modules/*",
6667
# PyTorch paths
6768
# Keep this synchronized with is_pytorch_file in hipify_python.py
6869
"aten/src/ATen/cuda/*",

0 commit comments

Comments
 (0)
Please sign in to comment.