stan-dev · t4c1 · May 5, 2021 · Apr 16, 2021 · Apr 16, 2021 · Apr 16, 2021
diff --git a/stan/math/opencl/kernel_generator.hpp b/stan/math/opencl/kernel_generator.hpp
@@ -131,6 +131,7 @@
 #include <stan/math/opencl/kernel_generator/index.hpp>
 #include <stan/math/opencl/kernel_generator/indexing.hpp>
 #include <stan/math/opencl/kernel_generator/opencl_code.hpp>
+#include <stan/math/opencl/kernel_generator/cast.hpp>
 
 #include <stan/math/opencl/kernel_generator/multi_result_kernel.hpp>
 #include <stan/math/opencl/kernel_generator/get_kernel_source_for_evaluating_into.hpp>

diff --git a/stan/math/opencl/kernel_generator/cast.hpp b/stan/math/opencl/kernel_generator/cast.hpp
@@ -0,0 +1,101 @@
+#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_CAST_HPP
+#define STAN_MATH_OPENCL_KERNEL_GENERATOR_CAST_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/opencl/matrix_cl_view.hpp>
+#include <stan/math/opencl/kernel_generator/common_return_scalar.hpp>
+#include <stan/math/opencl/kernel_generator/type_str.hpp>
+#include <stan/math/opencl/kernel_generator/name_generator.hpp>
+#include <stan/math/opencl/kernel_generator/operation_cl.hpp>
+#include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>
+#include <array>
+#include <string>
+#include <type_traits>
+#include <set>
+#include <utility>
+
+namespace stan {
+namespace math {
+
+/** \addtogroup opencl_kernel_generator
+ *  @{
+ */
+
+/**
+ * Represents a typecast os scalar in kernel generator expressions.
+ * @tparam Derived derived type
+ * @tparam T type of argument
+ * @tparam Scal type of the scalar of result
+ */
+template <typename Scal, typename T>
+class cast_ : public operation_cl<cast_<Scal, T>, Scal, T> {
+ public:
+  using Scalar = Scal;
+  using base = operation_cl<cast_<Scal, T>, Scalar, T>;
+  using base::var_name_;
+
+  /**
+   * Constructor
+   * @param args argument expression(s)
+   */
+  explicit cast_(T&& arg) : base(std::forward<T>(arg)) {}
+
+  /**
+   * Generates kernel code for this expression.
+   * @param row_index_name row index variable name
+   * @param col_index_name column index variable name
+   * @param view_handled whether whether caller already handled matrix view
+   * @param var_names_arg variable names of the nested expressions
+   * @return part of kernel with code for this expression
+   */
+  inline kernel_parts generate(const std::string& row_index_name,
+                               const std::string& col_index_name,
+                               const bool view_handled,
+                               const std::string& var_name_arg) const {
+    kernel_parts res{};
+
+    res.body = type_str<Scalar>() + " " + var_name_ + " = ("
+               + type_str<Scalar>() + ")" + var_name_arg + ";\n";
+    return res;
+  }
+
+  inline auto deep_copy() const {
+    auto&& arg_copy = this->template get_arg<0>().deep_copy();
+    return cast_<Scalar, std::remove_reference_t<decltype(arg_copy)>>{
+        std::move(arg_copy)};
+  }
+};
+
+/**
+ * Typecast a kernel generator expression scalar.
+ *
+ * @tparam T type of argument
+ * @param a input argument
+ * @return Typecast of given expression
+ */
+template <typename Scalar, typename T,
+          require_all_kernel_expressions_and_none_scalar_t<T>* = nullptr>
+inline auto cast(T&& a) {
+  auto&& a_operation = as_operation_cl(std::forward<T>(a)).deep_copy();
+  return cast_<Scalar, std::remove_reference_t<decltype(a_operation)>>(
+      std::move(a_operation));
+}
+
+/**
+ * Typecast a scalar.
+ *
+ * @tparam T type of argument
+ * @param a input argument
+ * @return Typecast of given expression
+ */
+template <typename Scalar, typename T, require_stan_scalar_t<T>* = nullptr>
+inline Scalar cast(T a) {
+  return a;
+}
+
+/** @}*/
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
diff --git a/stan/math/opencl/prim/bernoulli_cdf.hpp b/stan/math/opencl/prim/bernoulli_cdf.hpp
@@ -49,13 +49,13 @@ return_type_t<T_prob_cl> bernoulli_cdf(const T_n_cl& n,
                                       theta_val, "in the interval [0, 1]");
   auto theta_bounded_expr = 0.0 <= theta_val && theta_val <= 1.0;
 
-  auto any_n_negative = colwise_max(constant(0, N, 1) + (n < 0));
+  auto any_n_negative = colwise_max(cast<char>(n < 0));
   auto cond = n >= 1;
   auto Pi_uncond = 1.0 - theta_val;
   auto Pi = select(cond, INFTY, Pi_uncond);
   auto P_expr = colwise_prod(select(cond, 1.0, Pi_uncond));
 
-  matrix_cl<double> any_n_negative_cl;
+  matrix_cl<char> any_n_negative_cl;
   matrix_cl<double> Pi_cl;
   matrix_cl<double> P_cl;
 

diff --git a/stan/math/opencl/prim/bernoulli_lccdf.hpp b/stan/math/opencl/prim/bernoulli_lccdf.hpp
@@ -50,13 +50,13 @@ return_type_t<T_prob_cl> bernoulli_lccdf(const T_n_cl& n,
                                       theta_val, "in the interval [0, 1]");
   auto theta_bounded_expr = 0.0 <= theta_val && theta_val <= 1.0;
 
-  auto any_n_negative = colwise_max(0 + (n < 0));
-  auto any_n_over_one = colwise_max(constant(0, N, 1) + (n >= 1));
+  auto any_n_negative = colwise_max(cast<char>(n < 0));
+  auto any_n_over_one = colwise_max(cast<char>(n >= 1));
   auto P_expr = colwise_sum(log(theta_val));
   auto deriv = elt_divide(1.0, theta_val);
 
-  matrix_cl<double> any_n_negative_cl;
-  matrix_cl<double> any_n_over_one_cl;
+  matrix_cl<char> any_n_negative_cl;
+  matrix_cl<char> any_n_over_one_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> deriv_cl;
 

diff --git a/stan/math/opencl/prim/bernoulli_lcdf.hpp b/stan/math/opencl/prim/bernoulli_lcdf.hpp
@@ -50,13 +50,13 @@ return_type_t<T_prob_cl> bernoulli_lcdf(const T_n_cl& n,
                                       theta_val, "in the interval [0, 1]");
   auto theta_bounded_expr = 0.0 <= theta_val && theta_val <= 1.0;
 
-  auto any_n_negative = colwise_max(0 + (n < 0));
+  auto any_n_negative = colwise_max(cast<char>(n < 0));
   auto Pi = 1.0 - theta_val;
   auto cond = n >= 1;
   auto P_expr = colwise_sum(select(cond, 0.0, log(Pi)));
   auto deriv = select(cond, 0.0, elt_divide(-1.0, Pi));
 
-  matrix_cl<double> any_n_negative_cl;
+  matrix_cl<char> any_n_negative_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> deriv_cl;
 

diff --git a/stan/math/opencl/prim/cauchy_cdf.hpp b/stan/math/opencl/prim/cauchy_cdf.hpp
@@ -63,8 +63,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> cauchy_cdf(
       = check_cl(function, "Scale parameter", sigma_val, "positive finite");
   auto sigma_positive_finite_expr = 0 < sigma_val && isfinite(sigma_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
   auto cond = y_val == INFTY;
   auto sigma_inv = elt_divide(1.0, sigma_val);
   auto z = elt_multiply(y_val - mu_val, sigma_inv);
@@ -76,7 +75,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> cauchy_cdf(
       elt_divide(sigma_inv, -pi() * elt_multiply(1.0 + square(z), Pn)));
   auto sigma_deriv_tmp = elt_multiply(z, mu_deriv_tmp);
 
-  matrix_cl<double> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;

diff --git a/stan/math/opencl/prim/exp_mod_normal_cdf.hpp b/stan/math/opencl/prim/exp_mod_normal_cdf.hpp
@@ -72,8 +72,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_cdf(
       = check_cl(function, "Inv_cale parameter", lambda_val, "positive finite");
   auto lambda_positive_finite_expr = 0 < lambda_val && isfinite(lambda_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
   auto inv_sigma = elt_divide(1.0, sigma_val);
   auto diff = y_val - mu_val;
   auto v = elt_multiply(lambda_val, sigma_val);
@@ -102,7 +101,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_cdf(
               - elt_multiply(elt_multiply(v, sigma_val) - diff, erf_calc)),
       cdf_n);
 
-  matrix_cl<double> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
   matrix_cl<double> cdf_cl;
   matrix_cl<double> y_deriv_cl;
   matrix_cl<double> mu_deriv_cl;

diff --git a/stan/math/opencl/prim/exp_mod_normal_lccdf.hpp b/stan/math/opencl/prim/exp_mod_normal_lccdf.hpp
@@ -73,9 +73,8 @@ exp_mod_normal_lccdf(const T_y_cl& y, const T_loc_cl& mu,
       = check_cl(function, "Inv_cale parameter", lambda_val, "positive finite");
   auto lambda_positive_finite_expr = 0 < lambda_val && isfinite(lambda_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
-  auto any_y_pos_inf = colwise_max(constant(0, N, 1) + (y_val == INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
+  auto any_y_pos_inf = colwise_max(cast<char>(y_val == INFTY));
   auto inv_sigma = elt_divide(1.0, sigma_val);
   auto diff = y_val - mu_val;
   auto scaled_diff = elt_multiply(diff, inv_sigma * INV_SQRT_TWO);
@@ -104,8 +103,8 @@ exp_mod_normal_lccdf(const T_y_cl& y, const T_loc_cl& mu,
                        - INV_SQRT_TWO_PI * elt_multiply(sigma_val, exp_term_2)),
       ccdf_n);
 
-  matrix_cl<double> any_y_neg_inf_cl;
-  matrix_cl<double> any_y_pos_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_pos_inf_cl;
   matrix_cl<double> ccdf_log_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;

diff --git a/stan/math/opencl/prim/exp_mod_normal_lcdf.hpp b/stan/math/opencl/prim/exp_mod_normal_lcdf.hpp
@@ -73,9 +73,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_lcdf(
       = check_cl(function, "Inv_cale parameter", lambda_val, "positive finite");
   auto lambda_positive_finite_expr = 0 < lambda_val && isfinite(lambda_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
-  auto any_y_pos_inf = colwise_max(constant(0, N, 1) + (y_val == INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
+  auto any_y_pos_inf = colwise_max(cast<char>(y_val == INFTY));
   auto sigma_inv = elt_divide(1.0, sigma_val);
   auto diff = y_val - mu_val;
   auto scaled_diff = elt_multiply(diff * INV_SQRT_TWO, sigma_inv);
@@ -105,8 +104,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_lcdf(
               - elt_multiply(elt_multiply(v, sigma_val) - diff, erf_calc)),
       cdf_n);
 
-  matrix_cl<double> any_y_neg_inf_cl;
-  matrix_cl<double> any_y_pos_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_pos_inf_cl;
   matrix_cl<double> cdf_log_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;

diff --git a/stan/math/opencl/prim/gamma_lpdf.hpp b/stan/math/opencl/prim/gamma_lpdf.hpp
@@ -80,7 +80,7 @@ return_type_t<T_y_cl, T_shape_cl, T_inv_scale_cl> gamma_lpdf(
                                         beta_val, "positive finite");
   auto beta_pos_finite_expr = beta_val > 0 && isfinite(beta_val);
 
-  auto any_y_negative_expr = colwise_max(constant(0, N, 1) + (y_val < 0));
+  auto any_y_negative_expr = colwise_max(cast<char>(y_val < 0));
   auto log_y_expr = log(y_val);
   auto log_beta_expr = log(beta_val);
   auto logp1_expr = static_select<include_summand<propto, T_shape_cl>::value>(
@@ -99,7 +99,7 @@ return_type_t<T_y_cl, T_shape_cl, T_inv_scale_cl> gamma_lpdf(
   auto alpha_deriv_expr = log_beta_expr + log_y_expr - digamma(alpha_val);
   auto beta_deriv_expr = elt_divide(alpha_val, beta_val) - y_val;
 
-  matrix_cl<int> any_y_negative_cl;
+  matrix_cl<char> any_y_negative_cl;
   matrix_cl<double> logp_cl;
   matrix_cl<double> y_deriv_cl;
   matrix_cl<double> alpha_deriv_cl;

diff --git a/stan/math/opencl/prim/inv_chi_square_lpdf.hpp b/stan/math/opencl/prim/inv_chi_square_lpdf.hpp
@@ -74,7 +74,7 @@ return_type_t<T_y_cl, T_dof_cl> inv_chi_square_lpdf(const T_y_cl& y,
       = check_cl(function, "Random variable", y_val, "not NaN");
   auto y_not_nan = !isnan(y_val);
 
-  auto any_y_nonpositive = colwise_max(constant(0, N, 1) + (y_val <= 0));
+  auto any_y_nonpositive = colwise_max(cast<char>(y_val <= 0));
   auto log_y = log(y_val);
   auto half_nu = nu_val * 0.5;
   auto two_over_y = elt_divide(0.5, y_val);
@@ -89,7 +89,7 @@ return_type_t<T_y_cl, T_dof_cl> inv_chi_square_lpdf(const T_y_cl& y,
   auto y_deriv = elt_divide(two_over_y - half_nu - 1.0, y_val);
   auto nu_deriv = -HALF_LOG_TWO - (digamma(half_nu) + log_y) * 0.5;
 
-  matrix_cl<int> any_y_nonpositive_cl;
+  matrix_cl<char> any_y_nonpositive_cl;
   matrix_cl<double> logp_cl;
   matrix_cl<double> y_deriv_cl;
   matrix_cl<double> nu_deriv_cl;

diff --git a/stan/math/opencl/prim/inv_gamma_lpdf.hpp b/stan/math/opencl/prim/inv_gamma_lpdf.hpp
@@ -76,7 +76,7 @@ return_type_t<T_y_cl, T_shape_cl, T_scale_cl> inv_gamma_lpdf(
       = check_cl(function, "Scale parameter", beta_val, "positive finite");
   auto beta_pos_finite = beta_val > 0 && isfinite(beta_val);
 
-  auto any_y_nonpositive = colwise_max(constant(0, N, 1) + (y_val <= 0));
+  auto any_y_nonpositive = colwise_max(cast<char>(y_val <= 0));
   auto log_y = log(y_val);
   auto log_beta = log(beta_val);
   auto inv_y = elt_divide(1.0, y_val);
@@ -98,20 +98,22 @@ return_type_t<T_y_cl, T_shape_cl, T_scale_cl> inv_gamma_lpdf(
   auto alpha_deriv = log_beta - digamma(alpha_val) - log_y;
   auto beta_deriv = elt_divide(alpha_val, beta_val) - inv_y;
 
-  matrix_cl<int> any_y_nonpositive_cl;
+  matrix_cl<char> any_y_nonpositive_cl;
   matrix_cl<double> logp_cl;
   matrix_cl<double> y_deriv_cl;
   matrix_cl<double> alpha_deriv_cl;
   matrix_cl<double> beta_deriv_cl;
 
   results(check_alpha_pos_finite, check_beta_pos_finite, check_y_not_nan,
-          logp_cl, y_deriv_cl, alpha_deriv_cl, beta_deriv_cl)
-      = expressions(alpha_pos_finite, beta_pos_finite, y_not_nan, logp_expr,
+          any_y_nonpositive_cl, logp_cl, y_deriv_cl, alpha_deriv_cl,
+          beta_deriv_cl)
+      = expressions(alpha_pos_finite, beta_pos_finite, y_not_nan,
+                    any_y_nonpositive, logp_expr,
                     calc_if<!is_constant<T_y_cl>::value>(y_deriv),
                     calc_if<!is_constant<T_shape_cl>::value>(alpha_deriv),
                     calc_if<!is_constant<T_scale_cl>::value>(beta_deriv));
 
-  if (from_matrix_cl(any_y_nonpositive).any()) {
+  if (from_matrix_cl(any_y_nonpositive_cl).any()) {
     return LOG_ZERO;
   }
 

diff --git a/stan/math/opencl/prim/logistic_cdf.hpp b/stan/math/opencl/prim/logistic_cdf.hpp
@@ -63,8 +63,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> logistic_cdf(
       = check_cl(function, "Scale parameter", sigma_val, "positive finite");
   auto sigma_positive_finite_expr = 0 < sigma_val && isfinite(sigma_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
   auto cond = y_val == INFTY;
   auto inv_sigma = elt_divide(1.0, sigma_val);
   auto mu_minus_y_div_sigma = elt_multiply(mu_val - y_val, inv_sigma);
@@ -78,7 +77,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> logistic_cdf(
                                        Pn));
   auto sigma_deriv_tmp = elt_multiply(y_deriv_tmp, mu_minus_y_div_sigma);
 
-  matrix_cl<double> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;

diff --git a/stan/math/opencl/prim/logistic_lccdf.hpp b/stan/math/opencl/prim/logistic_lccdf.hpp
@@ -63,9 +63,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> logistic_lccdf(
       = check_cl(function, "Scale parameter", sigma_val, "positive finite");
   auto sigma_positive_finite_expr = 0 < sigma_val && isfinite(sigma_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
-  auto any_y_pos_inf = colwise_max(constant(0, N, 1) + (y_val == INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
+  auto any_y_pos_inf = colwise_max(cast<char>(y_val == INFTY));
   auto inv_sigma = elt_divide(1.0, sigma_val);
   auto mu_minus_y_div_sigma = elt_multiply(mu_val - y_val, inv_sigma);
   auto exp_scaled_diff = exp(mu_minus_y_div_sigma);
@@ -78,8 +77,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> logistic_lccdf(
   auto y_deriv = -mu_deriv;
   auto sigma_deriv = elt_multiply(-mu_deriv, mu_minus_y_div_sigma);
 
-  matrix_cl<double> any_y_neg_inf_cl;
-  matrix_cl<double> any_y_pos_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_pos_inf_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;

diff --git a/stan/math/opencl/prim/logistic_lcdf.hpp b/stan/math/opencl/prim/logistic_lcdf.hpp
@@ -63,8 +63,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> logistic_lcdf(
       = check_cl(function, "Scale parameter", sigma_val, "positive finite");
   auto sigma_positive_finite_expr = 0 < sigma_val && isfinite(sigma_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
   auto cond = y_val == INFTY;
   auto inv_sigma = elt_divide(1.0, sigma_val);
   auto mu_minus_y_div_sigma = elt_multiply(mu_val - y_val, inv_sigma);
@@ -78,7 +77,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> logistic_lcdf(
   auto mu_deriv = -y_deriv;
   auto sigma_deriv = elt_multiply(y_deriv, mu_minus_y_div_sigma);
 
-  matrix_cl<double> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;

diff --git a/stan/math/opencl/prim/lognormal_cdf.hpp b/stan/math/opencl/prim/lognormal_cdf.hpp
@@ -63,7 +63,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> lognormal_cdf(
       = check_cl(function, "Scale parameter", sigma_val, "positive finite");
   auto sigma_positive_finite_expr = 0 < sigma_val && isfinite(sigma_val);
 
-  auto any_y_zero = colwise_max(constant(0, N, 1) + (y_val == 0.0));
+  auto any_y_zero = colwise_max(cast<char>(y_val == 0.0));
   auto log_y = log(y_val);
   auto scaled_diff = elt_divide(log_y - mu_val, sigma_val * SQRT_TWO);
   auto erfc_m_diff = erfc(-scaled_diff);
@@ -75,7 +75,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> lognormal_cdf(
   auto y_deriv_tmp = elt_divide(-mu_deriv_tmp, y_val);
   auto sigma_deriv_tmp = elt_multiply(mu_deriv_tmp, scaled_diff * SQRT_TWO);
 
-  matrix_cl<double> any_y_zero_cl;
+  matrix_cl<char> any_y_zero_cl;
   matrix_cl<double> cdf_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;