Skip to content

Commit 35ac188

Browse files
authored
Merge pull request #2905 from stan-dev/feature/threadsafe-matrixcl
Make matrix_cl thread safe
2 parents 8062e06 + baffed6 commit 35ac188

24 files changed

+228
-186
lines changed

stan/math/opencl/copy.hpp

+16-9
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,12 @@ inline auto from_matrix_cl(const T& src) {
9797
} else {
9898
try {
9999
cl::Event copy_event;
100-
const cl::CommandQueue queue = opencl_context.queue();
100+
const cl::CommandQueue& queue = opencl_context.queue();
101+
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
102+
src.write_events().end());
101103
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
102104
sizeof(T_val) * dst.size(), dst.data(),
103-
&src.write_events(), &copy_event);
105+
&copy_write_events, &copy_event);
104106
copy_event.wait();
105107
src.clear_write_events();
106108
} catch (const cl::Error& e) {
@@ -150,9 +152,11 @@ inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
150152
"dst.cols()", 1);
151153
try {
152154
cl::Event copy_event;
153-
const cl::CommandQueue queue = opencl_context.queue();
155+
const cl::CommandQueue& queue = opencl_context.queue();
156+
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
157+
src.write_events().end());
154158
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
155-
sizeof(T), &dst, &src.write_events(), &copy_event);
159+
sizeof(T), &dst, &copy_write_events, &copy_event);
156160
copy_event.wait();
157161
src.clear_write_events();
158162
} catch (const cl::Error& e) {
@@ -182,10 +186,12 @@ inline T_dst from_matrix_cl(const matrix_cl<T>& src) {
182186
}
183187
try {
184188
cl::Event copy_event;
185-
const cl::CommandQueue queue = opencl_context.queue();
189+
const cl::CommandQueue& queue = opencl_context.queue();
190+
std::vector<cl::Event> copy_write_events(src.write_events().begin(),
191+
src.write_events().end());
186192
queue.enqueueReadBuffer(src.buffer(), opencl_context.in_order(), 0,
187193
sizeof(T) * src.rows(), dst.data(),
188-
&src.write_events(), &copy_event);
194+
&copy_write_events, &copy_event);
189195
copy_event.wait();
190196
src.clear_write_events();
191197
} catch (const cl::Error& e) {
@@ -251,13 +257,14 @@ inline auto packed_copy(const T& src) {
251257
return dst;
252258
}
253259
try {
254-
const cl::CommandQueue queue = opencl_context.queue();
260+
const cl::CommandQueue& queue = opencl_context.queue();
255261
matrix_cl<T_val> packed(packed_size, 1);
256262
stan::math::opencl_kernels::pack(cl::NDRange(src.rows(), src.rows()),
257263
packed, src, src.rows(), src.rows(),
258264
src.view());
259265
const std::vector<cl::Event> mat_events
260-
= vec_concat(packed.read_write_events(), src.write_events());
266+
= vec_concat(std::vector<cl::Event>{}, packed.read_write_events(),
267+
src.write_events());
261268
cl::Event copy_event;
262269
queue.enqueueReadBuffer(packed.buffer(), opencl_context.in_order(), 0,
263270
sizeof(T_val) * packed_size, dst.data(),
@@ -303,7 +310,7 @@ inline matrix_cl<Vec_scalar> packed_copy(Vec&& src, int rows) {
303310
try {
304311
matrix_cl<Vec_scalar> packed(packed_size, 1);
305312
cl::Event packed_event;
306-
const cl::CommandQueue queue = opencl_context.queue();
313+
const cl::CommandQueue& queue = opencl_context.queue();
307314
queue.enqueueWriteBuffer(
308315
packed.buffer(),
309316
opencl_context.in_order() || std::is_rvalue_reference<Vec&&>::value, 0,

stan/math/opencl/kernel_cl.hpp

+13-11
Original file line numberDiff line numberDiff line change
@@ -109,17 +109,17 @@ inline void assign_events(const cl::Event& new_event, CallArg& m,
109109
* @return A vector of OpenCL events.
110110
*/
111111
template <typename T, require_not_matrix_cl_t<T>* = nullptr>
112-
inline std::vector<cl::Event> select_events(const T& m) {
113-
return {};
112+
inline tbb::concurrent_vector<cl::Event> select_events(const T& m) {
113+
return tbb::concurrent_vector<cl::Event>{};
114114
}
115115
template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
116116
require_same_t<T, in_buffer>* = nullptr>
117-
inline const std::vector<cl::Event>& select_events(const K& m) {
117+
inline const tbb::concurrent_vector<cl::Event>& select_events(const K& m) {
118118
return m.write_events();
119119
}
120120
template <typename T, typename K, require_matrix_cl_t<K>* = nullptr,
121121
require_any_same_t<T, out_buffer, in_out_buffer>* = nullptr>
122-
inline std::vector<cl::Event> select_events(K& m) {
122+
inline tbb::concurrent_vector<cl::Event> select_events(K& m) {
123123
static_assert(!std::is_const<K>::value, "Can not write to const matrix_cl!");
124124
return m.read_write_events();
125125
}
@@ -133,9 +133,9 @@ inline std::vector<cl::Event> select_events(K& m) {
133133
* @param sources A std::vector of strings containing the code for the kernel.
134134
* @param options The values of macros to be passed at compile time.
135135
*/
136-
inline auto compile_kernel(const char* name,
137-
const std::vector<std::string>& sources,
138-
const std::map<std::string, int>& options) {
136+
inline auto compile_kernel(
137+
const char* name, const std::vector<std::string>& sources,
138+
const std::unordered_map<std::string, int>& options) {
139139
auto base_opts = opencl_context.base_opts();
140140
for (auto& it : options) {
141141
if (base_opts[it.first] > it.second) {
@@ -175,7 +175,7 @@ struct kernel_cl {
175175
private:
176176
const char* name_;
177177
std::vector<std::string> sources_;
178-
std::map<std::string, int> opts_;
178+
std::unordered_map<std::string, int> opts_;
179179
mutable cl::Kernel kernel_;
180180

181181
public:
@@ -187,7 +187,7 @@ struct kernel_cl {
187187
* @param options The values of macros to be passed at compile time.
188188
*/
189189
kernel_cl(const char* name, std::vector<std::string> sources,
190-
std::map<std::string, int> options = {})
190+
std::unordered_map<std::string, int> options = {})
191191
: name_(name), sources_(std::move(sources)), opts_(std::move(options)) {}
192192

193193
/** \ingroup kernel_executor_opencl
@@ -205,7 +205,8 @@ struct kernel_cl {
205205
opencl_context.register_kernel_cache(&kernel_);
206206
}
207207
cl::EnqueueArgs eargs(opencl_context.queue(),
208-
vec_concat(internal::select_events<Args>(args)...),
208+
vec_concat(std::vector<cl::Event>{},
209+
internal::select_events<Args>(args)...),
209210
global_thread_size);
210211
cl::KernelFunctor<internal::to_const_buffer_t<Args>&...> kernel_functor(
211212
kernel_);
@@ -232,7 +233,8 @@ struct kernel_cl {
232233
opencl_context.register_kernel_cache(&kernel_);
233234
}
234235
cl::EnqueueArgs eargs(opencl_context.queue(),
235-
vec_concat(internal::select_events<Args>(args)...),
236+
vec_concat(std::vector<cl::Event>{},
237+
internal::select_events<Args>(args)...),
236238
global_thread_size, thread_block_size);
237239
cl::KernelFunctor<internal::to_const_buffer_t<Args>&...> kernel_functor(
238240
kernel_);

stan/math/opencl/kernel_generator/append.hpp

+16-14
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ class append_row_ : public operation_cl<append_row_<T_a, T_b>,
8888
* @return part of kernel with code for this and nested expressions
8989
*/
9090
inline kernel_parts get_kernel_parts(
91-
std::map<const void*, const char*>& generated,
92-
std::map<const void*, const char*>& generated_all,
91+
std::unordered_map<const void*, const char*>& generated,
92+
std::unordered_map<const void*, const char*>& generated_all,
9393
name_generator& name_gen, const std::string& row_index_name,
9494
const std::string& col_index_name, bool view_handled) const {
9595
kernel_parts res{};
@@ -101,7 +101,7 @@ class append_row_ : public operation_cl<append_row_<T_a, T_b>,
101101
true);
102102
std::string row_index_name_b
103103
= "(" + row_index_name + " - " + var_name_ + "_first_rows)";
104-
std::map<const void*, const char*> generated_b;
104+
std::unordered_map<const void*, const char*> generated_b;
105105
kernel_parts parts_b = this->template get_arg<1>().get_kernel_parts(
106106
generated_b, generated_all, name_gen, row_index_name_b,
107107
col_index_name, true);
@@ -129,14 +129,15 @@ class append_row_ : public operation_cl<append_row_<T_a, T_b>,
129129
* @param[in,out] arg_num consecutive number of the first argument to set.
130130
* This is incremented for each argument set by this function.
131131
*/
132-
inline void set_args(std::map<const void*, const char*>& generated,
133-
std::map<const void*, const char*>& generated_all,
134-
cl::Kernel& kernel, int& arg_num) const {
132+
inline void set_args(
133+
std::unordered_map<const void*, const char*>& generated,
134+
std::unordered_map<const void*, const char*>& generated_all,
135+
cl::Kernel& kernel, int& arg_num) const {
135136
if (generated.count(this) == 0) {
136137
generated[this] = "";
137138
this->template get_arg<0>().set_args(generated, generated_all, kernel,
138139
arg_num);
139-
std::map<const void*, const char*> generated_b;
140+
std::unordered_map<const void*, const char*> generated_b;
140141
this->template get_arg<1>().set_args(generated_b, generated_all, kernel,
141142
arg_num);
142143
kernel.setArg(arg_num++, this->template get_arg<0>().rows());
@@ -250,8 +251,8 @@ class append_col_ : public operation_cl<append_col_<T_a, T_b>,
250251
* @return part of kernel with code for this and nested expressions
251252
*/
252253
inline kernel_parts get_kernel_parts(
253-
std::map<const void*, const char*>& generated,
254-
std::map<const void*, const char*>& generated_all,
254+
std::unordered_map<const void*, const char*>& generated,
255+
std::unordered_map<const void*, const char*>& generated_all,
255256
name_generator& name_gen, const std::string& row_index_name,
256257
const std::string& col_index_name, bool view_handled) const {
257258
kernel_parts res{};
@@ -263,7 +264,7 @@ class append_col_ : public operation_cl<append_col_<T_a, T_b>,
263264
true);
264265
std::string col_index_name_b
265266
= "(" + col_index_name + " - " + var_name_ + "_first_cols)";
266-
std::map<const void*, const char*> generated_b;
267+
std::unordered_map<const void*, const char*> generated_b;
267268
kernel_parts parts_b = this->template get_arg<1>().get_kernel_parts(
268269
generated_b, generated_all, name_gen, row_index_name,
269270
col_index_name_b, true);
@@ -291,14 +292,15 @@ class append_col_ : public operation_cl<append_col_<T_a, T_b>,
291292
* @param[in,out] arg_num consecutive number of the first argument to set.
292293
* This is incremented for each argument set by this function.
293294
*/
294-
inline void set_args(std::map<const void*, const char*>& generated,
295-
std::map<const void*, const char*>& generated_all,
296-
cl::Kernel& kernel, int& arg_num) const {
295+
inline void set_args(
296+
std::unordered_map<const void*, const char*>& generated,
297+
std::unordered_map<const void*, const char*>& generated_all,
298+
cl::Kernel& kernel, int& arg_num) const {
297299
if (generated.count(this) == 0) {
298300
generated[this] = "";
299301
this->template get_arg<0>().set_args(generated, generated_all, kernel,
300302
arg_num);
301-
std::map<const void*, const char*> generated_b;
303+
std::unordered_map<const void*, const char*> generated_b;
302304
this->template get_arg<1>().set_args(generated_b, generated_all, kernel,
303305
arg_num);
304306
kernel.setArg(arg_num++, this->template get_arg<0>().cols());

stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp

+11-10
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ class as_column_vector_or_scalar_
7171
* @return part of kernel with code for this and nested expressions
7272
*/
7373
inline kernel_parts get_kernel_parts(
74-
std::map<const void*, const char*>& generated,
75-
std::map<const void*, const char*>& generated_all,
74+
std::unordered_map<const void*, const char*>& generated,
75+
std::unordered_map<const void*, const char*>& generated_all,
7676
name_generator& name_gen, const std::string& row_index_name,
7777
const std::string& col_index_name, bool view_handled) const {
7878
kernel_parts res{};
@@ -82,7 +82,7 @@ class as_column_vector_or_scalar_
8282
std::string row_index_name_arg = row_index_name;
8383
std::string col_index_name_arg = col_index_name;
8484
modify_argument_indices(row_index_name_arg, col_index_name_arg);
85-
std::map<const void*, const char*> generated2;
85+
std::unordered_map<const void*, const char*> generated2;
8686
res = this->template get_arg<0>().get_kernel_parts(
8787
generated2, generated_all, name_gen, row_index_name_arg,
8888
col_index_name_arg, view_handled);
@@ -134,8 +134,8 @@ class as_column_vector_or_scalar_
134134
* @return part of kernel with code for this expressions
135135
*/
136136
inline kernel_parts get_kernel_parts_lhs(
137-
std::map<const void*, const char*>& generated,
138-
std::map<const void*, const char*>& generated_all,
137+
std::unordered_map<const void*, const char*>& generated,
138+
std::unordered_map<const void*, const char*>& generated_all,
139139
name_generator& name_gen, const std::string& row_index_name,
140140
const std::string& col_index_name) const {
141141
if (generated.count(this) == 0) {
@@ -145,7 +145,7 @@ class as_column_vector_or_scalar_
145145
std::string row_index_name_arg = row_index_name;
146146
std::string col_index_name_arg = col_index_name;
147147
modify_argument_indices(row_index_name_arg, col_index_name_arg);
148-
std::map<const void*, const char*> generated2;
148+
std::unordered_map<const void*, const char*> generated2;
149149
kernel_parts res = this->template get_arg<0>().get_kernel_parts_lhs(
150150
generated2, generated_all, name_gen, row_index_name_arg,
151151
col_index_name_arg);
@@ -185,12 +185,13 @@ class as_column_vector_or_scalar_
185185
* @param[in,out] arg_num consecutive number of the first argument to set.
186186
* This is incremented for each argument set by this function.
187187
*/
188-
inline void set_args(std::map<const void*, const char*>& generated,
189-
std::map<const void*, const char*>& generated_all,
190-
cl::Kernel& kernel, int& arg_num) const {
188+
inline void set_args(
189+
std::unordered_map<const void*, const char*>& generated,
190+
std::unordered_map<const void*, const char*>& generated_all,
191+
cl::Kernel& kernel, int& arg_num) const {
191192
if (generated.count(this) == 0) {
192193
generated[this] = "";
193-
std::map<const void*, const char*> generated2;
194+
std::unordered_map<const void*, const char*> generated2;
194195
this->template get_arg<0>().set_args(generated2, generated_all, kernel,
195196
arg_num);
196197
if (generated_all.count(this) == 0) {

stan/math/opencl/kernel_generator/block_zero_based.hpp

+11-10
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ class block_
101101
* @return part of kernel with code for this and nested expressions
102102
*/
103103
inline kernel_parts get_kernel_parts(
104-
std::map<const void*, const char*>& generated,
105-
std::map<const void*, const char*>& generated_all,
104+
std::unordered_map<const void*, const char*>& generated,
105+
std::unordered_map<const void*, const char*>& generated_all,
106106
name_generator& name_gen, const std::string& row_index_name,
107107
const std::string& col_index_name, bool view_handled) const {
108108
kernel_parts res{};
@@ -112,7 +112,7 @@ class block_
112112
std::string row_index_name_arg = row_index_name;
113113
std::string col_index_name_arg = col_index_name;
114114
modify_argument_indices(row_index_name_arg, col_index_name_arg);
115-
std::map<const void*, const char*> generated2;
115+
std::unordered_map<const void*, const char*> generated2;
116116
res = this->template get_arg<0>().get_kernel_parts(
117117
generated2, generated_all, name_gen, row_index_name_arg,
118118
col_index_name_arg, view_handled);
@@ -175,8 +175,8 @@ class block_
175175
* @return part of kernel with code for this expressions
176176
*/
177177
inline kernel_parts get_kernel_parts_lhs(
178-
std::map<const void*, const char*>& generated,
179-
std::map<const void*, const char*>& generated_all,
178+
std::unordered_map<const void*, const char*>& generated,
179+
std::unordered_map<const void*, const char*>& generated_all,
180180
name_generator& name_gen, const std::string& row_index_name,
181181
const std::string& col_index_name) const {
182182
if (generated.count(this) == 0) {
@@ -186,7 +186,7 @@ class block_
186186
std::string row_index_name_arg = row_index_name;
187187
std::string col_index_name_arg = col_index_name;
188188
modify_argument_indices(row_index_name_arg, col_index_name_arg);
189-
std::map<const void*, const char*> generated2;
189+
std::unordered_map<const void*, const char*> generated2;
190190
kernel_parts res = this->template get_arg<0>().get_kernel_parts_lhs(
191191
generated2, generated_all, name_gen, row_index_name_arg,
192192
col_index_name_arg);
@@ -226,12 +226,13 @@ class block_
226226
* @param[in,out] arg_num consecutive number of the first argument to set.
227227
* This is incremented for each argument set by this function.
228228
*/
229-
inline void set_args(std::map<const void*, const char*>& generated,
230-
std::map<const void*, const char*>& generated_all,
231-
cl::Kernel& kernel, int& arg_num) const {
229+
inline void set_args(
230+
std::unordered_map<const void*, const char*>& generated,
231+
std::unordered_map<const void*, const char*>& generated_all,
232+
cl::Kernel& kernel, int& arg_num) const {
232233
if (generated.count(this) == 0) {
233234
generated[this] = "";
234-
std::map<const void*, const char*> generated2;
235+
std::unordered_map<const void*, const char*> generated2;
235236
this->template get_arg<0>().set_args(generated2, generated_all, kernel,
236237
arg_num);
237238
if (generated_all.count(this) == 0) {

stan/math/opencl/kernel_generator/calc_if.hpp

+8-7
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ class calc_if_
6666
*/
6767
template <typename T_result>
6868
kernel_parts get_whole_kernel_parts(
69-
std::map<const void*, const char*>& generated,
70-
std::map<const void*, const char*>& generated_all, name_generator& ng,
71-
const std::string& row_index_name, const std::string& col_index_name,
72-
const T_result& result) const {
69+
std::unordered_map<const void*, const char*>& generated,
70+
std::unordered_map<const void*, const char*>& generated_all,
71+
name_generator& ng, const std::string& row_index_name,
72+
const std::string& col_index_name, const T_result& result) const {
7373
if (Do_Calculate) {
7474
return this->template get_arg<0>().get_whole_kernel_parts(
7575
generated, generated_all, ng, row_index_name, col_index_name, result);
@@ -88,9 +88,10 @@ class calc_if_
8888
* @param[in,out] arg_num consecutive number of the first argument to set.
8989
* This is incremented for each argument set by this function.
9090
*/
91-
inline void set_args(std::map<const void*, const char*>& generated,
92-
std::map<const void*, const char*>& generated_all,
93-
cl::Kernel& kernel, int& arg_num) const {
91+
inline void set_args(
92+
std::unordered_map<const void*, const char*>& generated,
93+
std::unordered_map<const void*, const char*>& generated_all,
94+
cl::Kernel& kernel, int& arg_num) const {
9495
if (Do_Calculate) {
9596
this->template get_arg<0>().set_args(generated, generated_all, kernel,
9697
arg_num);

0 commit comments

Comments
 (0)