|
86 | 86 | // PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED
|
87 | 87 | // HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
|
88 | 88 | // MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
| 89 | +static inline bool is_aligned(const unsigned val, const unsigned pos) { |
| 90 | + return pos ? (val & (pos - 1)) == 0 : true; |
| 91 | +} |
| 92 | + |
| 93 | +static inline bool is_overlapped(const int astart, int asize, const int bstart, |
| 94 | + int bsize) { |
| 95 | + asize = asize == 0 ? 1 : asize; |
| 96 | + bsize = bsize == 0 ? 1 : bsize; |
| 97 | + |
| 98 | + const int aend = astart + asize; |
| 99 | + const int bend = bstart + bsize; |
| 100 | + |
| 101 | + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; |
| 102 | +} |
| 103 | +static inline bool is_overlapped_widen(const int astart, int asize, |
| 104 | + const int bstart, int bsize) { |
| 105 | + asize = asize == 0 ? 1 : asize; |
| 106 | + bsize = bsize == 0 ? 1 : bsize; |
| 107 | + |
| 108 | + const int aend = astart + asize; |
| 109 | + const int bend = bstart + bsize; |
| 110 | + |
| 111 | + if (astart < bstart && is_overlapped(astart, asize, bstart, bsize) && |
| 112 | + !is_overlapped(astart, asize, bstart + bsize, bsize)) { |
| 113 | + return false; |
| 114 | + } else { |
| 115 | + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +#ifdef DEBUG |
| 120 | +#define require_align(val, pos) \ |
| 121 | + if (!is_aligned(val, pos)) { \ |
| 122 | + std::cout << val << " " << pos << std::endl; \ |
| 123 | + } \ |
| 124 | + CHECK_EQ(is_aligned(val, pos), true) |
| 125 | +#else |
| 126 | +#define require_align(val, pos) CHECK_EQ(is_aligned(val, pos), true) |
| 127 | +#endif |
| 128 | + |
| 129 | +// RVV |
| 130 | +// The following code about RVV was based from: |
| 131 | +// https://github.com/riscv/riscv-isa-sim |
| 132 | +// Copyright (c) 2010-2017, The Regents of the University of California |
| 133 | +// (Regents). All Rights Reserved. |
| 134 | + |
| 135 | +// Redistribution and use in source and binary forms, with or without |
| 136 | +// modification, are permitted provided that the following conditions are met: |
| 137 | +// 1. Redistributions of source code must retain the above copyright |
| 138 | +// notice, this list of conditions and the following disclaimer. |
| 139 | +// 2. Redistributions in binary form must reproduce the above copyright |
| 140 | +// notice, this list of conditions and the following disclaimer in the |
| 141 | +// documentation and/or other materials provided with the distribution. |
| 142 | +// 3. Neither the name of the Regents nor the |
| 143 | +// names of its contributors may be used to endorse or promote products |
| 144 | +// derived from this software without specific prior written permission. |
| 145 | + |
| 146 | +// IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, |
| 147 | +// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, |
| 148 | +// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF |
| 149 | +// REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 150 | + |
| 151 | +// REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED |
| 152 | +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 153 | +// PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED |
| 154 | +// HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE |
| 155 | +// MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
| 156 | +template <uint64_t N> |
| 157 | +struct type_usew_t; |
| 158 | +template <> |
| 159 | +struct type_usew_t<8> { |
| 160 | + using type = uint8_t; |
| 161 | +}; |
| 162 | + |
| 163 | +template <> |
| 164 | +struct type_usew_t<16> { |
| 165 | + using type = uint16_t; |
| 166 | +}; |
| 167 | + |
| 168 | +template <> |
| 169 | +struct type_usew_t<32> { |
| 170 | + using type = uint32_t; |
| 171 | +}; |
| 172 | + |
| 173 | +template <> |
| 174 | +struct type_usew_t<64> { |
| 175 | + using type = uint64_t; |
| 176 | +}; |
| 177 | + |
| 178 | +template <> |
| 179 | +struct type_usew_t<128> { |
| 180 | + using type = __uint128_t; |
| 181 | +}; |
| 182 | +template <uint64_t N> |
| 183 | +struct type_sew_t; |
| 184 | + |
| 185 | +template <> |
| 186 | +struct type_sew_t<8> { |
| 187 | + using type = int8_t; |
| 188 | +}; |
| 189 | + |
| 190 | +template <> |
| 191 | +struct type_sew_t<16> { |
| 192 | + using type = int16_t; |
| 193 | +}; |
| 194 | + |
| 195 | +template <> |
| 196 | +struct type_sew_t<32> { |
| 197 | + using type = int32_t; |
| 198 | +}; |
| 199 | + |
| 200 | +template <> |
| 201 | +struct type_sew_t<64> { |
| 202 | + using type = int64_t; |
| 203 | +}; |
| 204 | + |
| 205 | +template <> |
| 206 | +struct type_sew_t<128> { |
| 207 | + using type = __int128_t; |
| 208 | +}; |
| 209 | + |
| 210 | +#define VV_PARAMS(x) \ |
| 211 | + type_sew_t<x>::type& vd = \ |
| 212 | + Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 213 | + type_sew_t<x>::type vs1 = Rvvelt<type_sew_t<x>::type>(rvv_vs1_reg(), i); \ |
| 214 | + type_sew_t<x>::type vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i); |
| 215 | + |
| 216 | +#define VV_UPARAMS(x) \ |
| 217 | + type_usew_t<x>::type& vd = \ |
| 218 | + Rvvelt<type_usew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 219 | + type_usew_t<x>::type vs1 = Rvvelt<type_usew_t<x>::type>(rvv_vs1_reg(), i); \ |
| 220 | + type_usew_t<x>::type vs2 = Rvvelt<type_usew_t<x>::type>(rvv_vs2_reg(), i); |
| 221 | + |
| 222 | +#define VX_PARAMS(x) \ |
| 223 | + type_sew_t<x>::type& vd = \ |
| 224 | + Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 225 | + type_sew_t<x>::type rs1 = (type_sew_t<x>::type)(get_register(rs1_reg())); \ |
| 226 | + type_sew_t<x>::type vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i); |
| 227 | + |
| 228 | +#define VX_UPARAMS(x) \ |
| 229 | + type_usew_t<x>::type& vd = \ |
| 230 | + Rvvelt<type_usew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 231 | + type_usew_t<x>::type rs1 = (type_usew_t<x>::type)(get_register(rs1_reg())); \ |
| 232 | + type_usew_t<x>::type vs2 = Rvvelt<type_usew_t<x>::type>(rvv_vs2_reg(), i); |
| 233 | + |
| 234 | +#define VI_PARAMS(x) \ |
| 235 | + type_sew_t<x>::type& vd = \ |
| 236 | + Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 237 | + type_sew_t<x>::type simm5 = (type_sew_t<x>::type)(instr_.RvvSimm5()); \ |
| 238 | + type_sew_t<x>::type vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i); |
| 239 | + |
| 240 | +#define VI_UPARAMS(x) \ |
| 241 | + type_usew_t<x>::type& vd = \ |
| 242 | + Rvvelt<type_usew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 243 | + type_usew_t<x>::type uimm5 = (type_usew_t<x>::type)(instr_.RvvUimm5()); \ |
| 244 | + type_usew_t<x>::type vs2 = Rvvelt<type_usew_t<x>::type>(rvv_vs2_reg(), i); |
| 245 | + |
| 246 | +#define VN_PARAMS(x) \ |
| 247 | + constexpr int half_x = x >> 1; \ |
| 248 | + type_sew_t<half_x>::type& vd = \ |
| 249 | + Rvvelt<type_sew_t<half_x>::type>(rvv_vd_reg(), i, true); \ |
| 250 | + type_sew_t<x>::type uimm5 = (type_sew_t<x>::type)(instr_.RvvUimm5()); \ |
| 251 | + type_sew_t<x>::type vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i); |
| 252 | + |
| 253 | +#define VN_UPARAMS(x) \ |
| 254 | + constexpr int half_x = x >> 1; \ |
| 255 | + type_usew_t<half_x>::type& vd = \ |
| 256 | + Rvvelt<type_usew_t<half_x>::type>(rvv_vd_reg(), i, true); \ |
| 257 | + type_usew_t<x>::type uimm5 = (type_usew_t<x>::type)(instr_.RvvUimm5()); \ |
| 258 | + type_sew_t<x>::type vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i); |
| 259 | + |
| 260 | +#define VXI_PARAMS(x) \ |
| 261 | + type_sew_t<x>::type& vd = \ |
| 262 | + Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 263 | + type_sew_t<x>::type vs1 = Rvvelt<type_sew_t<x>::type>(rvv_vs1_reg(), i); \ |
| 264 | + type_sew_t<x>::type vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i); \ |
| 265 | + type_sew_t<x>::type rs1 = (type_sew_t<x>::type)(get_register(rs1_reg())); \ |
| 266 | + type_sew_t<x>::type simm5 = (type_sew_t<x>::type)(instr_.RvvSimm5()); |
| 267 | + |
| 268 | +#define VI_XI_SLIDEDOWN_PARAMS(x, off) \ |
| 269 | + auto& vd = Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 270 | + auto vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i + off); |
| 271 | + |
| 272 | +#define VI_XI_SLIDEUP_PARAMS(x, offset) \ |
| 273 | + auto& vd = Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 274 | + auto vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i - offset); |
| 275 | + |
| 276 | +/* Vector Integer Extension */ |
| 277 | +#define VI_VIE_PARAMS(x, scale) \ |
| 278 | + if ((x / scale) < 8) UNREACHABLE(); \ |
| 279 | + auto& vd = Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 280 | + auto vs2 = Rvvelt<type_sew_t<x / scale>::type>(rvv_vs2_reg(), i); |
| 281 | + |
| 282 | +#define VI_VIE_UPARAMS(x, scale) \ |
| 283 | + if ((x / scale) < 8) UNREACHABLE(); \ |
| 284 | + auto& vd = Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true); \ |
| 285 | + auto vs2 = Rvvelt<type_usew_t<x / scale>::type>(rvv_vs2_reg(), i); |
| 286 | + |
| 287 | +#define require_noover(astart, asize, bstart, bsize) \ |
| 288 | + CHECK_EQ(!is_overlapped(astart, asize, bstart, bsize), true) |
| 289 | +#define require_noover_widen(astart, asize, bstart, bsize) \ |
| 290 | + CHECK_EQ(!is_overlapped_widen(astart, asize, bstart, bsize), true) |
| 291 | + |
89 | 292 | #define RVV_VI_GENERAL_LOOP_BASE \
|
90 | 293 | for (uint64_t i = rvv_vstart(); i < rvv_vl(); i++) {
|
91 | 294 | #define RVV_VI_LOOP_END \
|
|
0 commit comments