From c7a099caa0be6d2ae646aac295df34dbf2262dc3 Mon Sep 17 00:00:00 2001 From: surechen Date: Fri, 2 Apr 2021 12:04:27 +0800 Subject: [PATCH 1/3] add neon instruction vmaxnm_f* vpmaxnm_f* vminnm_f* vpminnm_f* --- .../core_arch/src/aarch64/neon/generated.rs | 308 ++++++++++++++++++ crates/stdarch-gen/neon.spec | 56 ++++ 2 files changed, 364 insertions(+) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 1c7ddff7f9..6855c23ad9 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -2510,6 +2510,97 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vmaxq_f64_(a, b) } +/// Floating-point Maximun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnm))] +pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")] + fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t; + } + vmaxnm_f64_(a, b) +} + +/// Floating-point Maximun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnm))] +pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")] + fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + vmaxnmq_f64_(a, b) +} + +/// Floating-point Maximun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnm))] +pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")] + fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + vmaxnm_f32_(a, b) +} + +/// Floating-point Maximun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnm))] +pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")] + fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + vmaxnmq_f32_(a, b) +} + +/// Floating-point Maximum Number Pairwise (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")] + fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + vpmaxnm_f32_(a, b) +} + +/// Floating-point Maximum Number Pairwise (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")] + fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + vpmaxnmq_f64_(a, b) +} + +/// Floating-point Maximum Number Pairwise (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmaxnmp))] +pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")] + fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + vpmaxnmq_f32_(a, b) +} + /// Minimum (vector) #[inline] #[target_feature(enable = "neon")] @@ -2536,6 +2627,97 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vminq_f64_(a, b) } +/// Floating-point Minimun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnm))] +pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")] + fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t; + } + vminnm_f64_(a, b) +} + +/// Floating-point Minimun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnm))] +pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")] + fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + vminnmq_f64_(a, b) +} + +/// Floating-point Minimun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnm))] +pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")] + fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + vminnm_f32_(a, b) +} + +/// Floating-point Minimun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnm))] +pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")] + fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + vminnmq_f32_(a, b) +} + +/// Floating-point Minimum Number Pairwise (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")] + fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } + vpminnm_f32_(a, b) +} + +/// Floating-point Minimum Number Pairwise (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")] + fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t; + } + vpminnmq_f64_(a, b) +} + +/// Floating-point Minimum Number Pairwise (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fminnmp))] +pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")] + fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } + vpminnmq_f32_(a, b) +} + /// Calculates the square root of each lane. #[inline] #[target_feature(enable = "neon")] @@ -6301,6 +6483,69 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vmaxnm_f64() { + let a: f64 = 1.0; + let b: f64 = 8.0; + let e: f64 = 8.0; + let r: f64 = transmute(vmaxnm_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxnmq_f64() { + let a: f64x2 = f64x2::new(1.0, 2.0); + let b: f64x2 = f64x2::new(8.0, 16.0); + let e: f64x2 = f64x2::new(8.0, 16.0); + let r: f64x2 = transmute(vmaxnmq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxnm_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(8.0, 16.0); + let e: f32x2 = f32x2::new(8.0, 16.0); + let r: f32x2 = transmute(vmaxnm_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxnmq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); + let e: f32x4 = f32x4::new(8.0, 16.0, 3.0, 6.0); + let r: f32x4 = transmute(vmaxnmq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmaxnm_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(6.0, -3.0); + let e: f32x2 = f32x2::new(2.0, 6.0); + let r: f32x2 = transmute(vpmaxnm_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmaxnmq_f64() { + let a: f64x2 = f64x2::new(1.0, 2.0); + let b: f64x2 = f64x2::new(6.0, -3.0); + let e: f64x2 = f64x2::new(2.0, 6.0); + let r: f64x2 = transmute(vpmaxnmq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmaxnmq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); + let e: f32x4 = f32x4::new(2.0, 3.0, 16.0, 6.0); + let r: f32x4 = transmute(vpmaxnmq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmin_f64() { let a: f64 = 1.0; @@ -6319,6 +6564,69 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vminnm_f64() { + let a: f64 = 1.0; + let b: f64 = 8.0; + let e: f64 = 1.0; + let r: f64 = transmute(vminnm_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminnmq_f64() { + let a: f64x2 = f64x2::new(1.0, 2.0); + let b: f64x2 = f64x2::new(8.0, 16.0); + let e: f64x2 = f64x2::new(1.0, 2.0); + let r: f64x2 = transmute(vminnmq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminnm_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(8.0, 16.0); + let e: f32x2 = f32x2::new(1.0, 2.0); + let r: f32x2 = transmute(vminnm_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminnmq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); + let e: f32x4 = f32x4::new(1.0, 2.0, -1.0, -4.0); + let r: f32x4 = transmute(vminnmq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpminnm_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(6.0, -3.0); + let e: f32x2 = f32x2::new(1.0, -3.0); + let r: f32x2 = transmute(vpminnm_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpminnmq_f64() { + let a: f64x2 = f64x2::new(1.0, 2.0); + let b: f64x2 = f64x2::new(6.0, -3.0); + let e: f64x2 = f64x2::new(1.0, -3.0); + let r: f64x2 = transmute(vpminnmq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpminnmq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); + let e: f32x4 = f32x4::new(1.0, -4.0, 8.0, -1.0); + let r: f32x4 = transmute(vpminnmq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vsqrt_f32() { let a: f32x2 = f32x2::new(4.0, 9.0); diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index e9f82943f1..d87fb14316 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -1471,6 +1471,34 @@ link-arm = vmaxs._EXT_ link-aarch64 = fmax._EXT_ generate float*_t +/// Floating-point Maximun Number (vector) +name = vmaxnm +a = 1.0, 2.0, 3.0, -4.0 +b = 8.0, 16.0, -1.0, 6.0 +validate 8.0, 16.0, 3.0, 6.0 + +aarch64 = fmaxnm +link-aarch64 = fmaxnm._EXT_ +generate float64x*_t, float*_t + +/// Floating-point Maximum Number Pairwise (vector). +name = vpmaxnm +a = 1.0, 2.0 +b = 6.0, -3.0 +validate 2.0, 6.0 +aarch64 = fmaxnmp +link-aarch64 = fmaxnmp._EXT_ +generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t + +/// Floating-point Maximum Number Pairwise (vector). +name = vpmaxnm +a = 1.0, 2.0, 3.0, -4.0 +b = 8.0, 16.0, -1.0, 6.0 +validate 2.0, 3.0, 16.0, 6.0 +aarch64 = fmaxnmp +link-aarch64 = fmaxnmp._EXT_ +generate float32x4_t:float32x4_t:float32x4_t + /// Minimum (vector) name = vmin a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 @@ -1505,6 +1533,34 @@ link-arm = vmins._EXT_ link-aarch64 = fmin._EXT_ generate float*_t +/// Floating-point Minimun Number (vector) +name = vminnm +a = 1.0, 2.0, 3.0, -4.0 +b = 8.0, 16.0, -1.0, 6.0 +validate 1.0, 2.0, -1.0, -4.0 + +aarch64 = fminnm +link-aarch64 = fminnm._EXT_ +generate float64x*_t, float*_t + +/// Floating-point Minimum Number Pairwise (vector). +name = vpminnm +a = 1.0, 2.0 +b = 6.0, -3.0 +validate 1.0, -3.0 +aarch64 = fminnmp +link-aarch64 = fminnmp._EXT_ +generate float32x2_t:float32x2_t:float32x2_t, float64x2_t:float64x2_t:float64x2_t + +/// Floating-point Minimum Number Pairwise (vector). +name = vpminnm +a = 1.0, 2.0, 3.0, -4.0 +b = 8.0, 16.0, -1.0, 6.0 +validate 1.0, -4.0, 8.0, -1.0 +aarch64 = fminnmp +link-aarch64 = fminnmp._EXT_ +generate float32x4_t:float32x4_t:float32x4_t + /// Calculates the square root of each lane. name = vsqrt fn = simd_fsqrt From 73c5a58f8408eaf243a10fd3a36f2dc6e344f4c2 Mon Sep 17 00:00:00 2001 From: surechen Date: Fri, 2 Apr 2021 16:03:12 +0800 Subject: [PATCH 2/3] edit for v8 --- .../core_arch/src/aarch64/neon/generated.rs | 88 --------------- crates/core_arch/src/arm/neon/generated.rs | 100 ++++++++++++++++++ crates/stdarch-gen/neon.spec | 18 +++- crates/stdarch-gen/src/main.rs | 27 ++++- 4 files changed, 142 insertions(+), 91 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 6855c23ad9..4fcc4f4ea8 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -2536,32 +2536,6 @@ pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vmaxnmq_f64_(a, b) } -/// Floating-point Maximun Number (vector) -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmaxnm))] -pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")] - fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - vmaxnm_f32_(a, b) -} - -/// Floating-point Maximun Number (vector) -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fmaxnm))] -pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")] - fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - vmaxnmq_f32_(a, b) -} - /// Floating-point Maximum Number Pairwise (vector). #[inline] #[target_feature(enable = "neon")] @@ -2653,32 +2627,6 @@ pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vminnmq_f64_(a, b) } -/// Floating-point Minimun Number (vector) -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fminnm))] -pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")] - fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } - vminnm_f32_(a, b) -} - -/// Floating-point Minimun Number (vector) -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(fminnm))] -pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")] - fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } - vminnmq_f32_(a, b) -} - /// Floating-point Minimum Number Pairwise (vector). #[inline] #[target_feature(enable = "neon")] @@ -6501,24 +6449,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vmaxnm_f32() { - let a: f32x2 = f32x2::new(1.0, 2.0); - let b: f32x2 = f32x2::new(8.0, 16.0); - let e: f32x2 = f32x2::new(8.0, 16.0); - let r: f32x2 = transmute(vmaxnm_f32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmaxnmq_f32() { - let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); - let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); - let e: f32x4 = f32x4::new(8.0, 16.0, 3.0, 6.0); - let r: f32x4 = transmute(vmaxnmq_f32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vpmaxnm_f32() { let a: f32x2 = f32x2::new(1.0, 2.0); @@ -6582,24 +6512,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vminnm_f32() { - let a: f32x2 = f32x2::new(1.0, 2.0); - let b: f32x2 = f32x2::new(8.0, 16.0); - let e: f32x2 = f32x2::new(1.0, 2.0); - let r: f32x2 = transmute(vminnm_f32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vminnmq_f32() { - let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); - let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); - let e: f32x4 = f32x4::new(1.0, 2.0, -1.0, -4.0); - let r: f32x4 = transmute(vminnmq_f32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vpminnm_f32() { let a: f32x2 = f32x2::new(1.0, 2.0); diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs index f01a244354..c0c730929e 100644 --- a/crates/core_arch/src/arm/neon/generated.rs +++ b/crates/core_arch/src/arm/neon/generated.rs @@ -4496,6 +4496,38 @@ pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { vmaxq_f32_(a, b) } +/// Floating-point Maximun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] +pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")] + fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vmaxnm_f32_(a, b) +} + +/// Floating-point Maximun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] +pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")] + fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vmaxnmq_f32_(a, b) +} + /// Minimum (vector) #[inline] #[target_feature(enable = "neon")] @@ -4720,6 +4752,38 @@ pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { vminq_f32_(a, b) } +/// Floating-point Minimun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] +pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")] + fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vminnm_f32_(a, b) +} + +/// Floating-point Minimun Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] +pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")] + fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vminnmq_f32_(a, b) +} + /// Reciprocal square-root estimate. #[inline] #[target_feature(enable = "neon")] @@ -10702,6 +10766,24 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vmaxnm_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(8.0, 16.0); + let e: f32x2 = f32x2::new(8.0, 16.0); + let r: f32x2 = transmute(vmaxnm_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxnmq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); + let e: f32x4 = f32x4::new(8.0, 16.0, 3.0, 6.0); + let r: f32x4 = transmute(vmaxnmq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmin_s8() { let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); @@ -10828,6 +10910,24 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vminnm_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(8.0, 16.0); + let e: f32x2 = f32x2::new(1.0, 2.0); + let r: f32x2 = transmute(vminnm_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminnmq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); + let e: f32x4 = f32x4::new(1.0, 2.0, -1.0, -4.0); + let r: f32x4 = transmute(vminnmq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vrsqrte_f32() { let a: f32x2 = f32x2::new(1.0, 2.0); diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index d87fb14316..250fbcbb31 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -1479,7 +1479,14 @@ validate 8.0, 16.0, 3.0, 6.0 aarch64 = fmaxnm link-aarch64 = fmaxnm._EXT_ -generate float64x*_t, float*_t +generate float64x*_t + +target = v8 +arm = vmaxnm +aarch64 = fmaxnm +link-arm = vmaxnm._EXT_ +link-aarch64 = fmaxnm._EXT_ +generate float*_t /// Floating-point Maximum Number Pairwise (vector). name = vpmaxnm @@ -1541,7 +1548,14 @@ validate 1.0, 2.0, -1.0, -4.0 aarch64 = fminnm link-aarch64 = fminnm._EXT_ -generate float64x*_t, float*_t +generate float64x*_t + +target = v8 +arm = vminnm +aarch64 = fminnm +link-arm = vminnm._EXT_ +link-aarch64 = fminnm._EXT_ +generate float*_t /// Floating-point Minimum Number Pairwise (vector). name = vpminnm diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 3251b9f6bf..bbeb7c80a5 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -1,4 +1,5 @@ use self::Suffix::*; +use self::TargetFeature::*; use std::env; use std::fs::File; use std::io::prelude::*; @@ -210,6 +211,12 @@ enum Suffix { NoQDouble, } +#[derive(Clone, Copy)] +enum TargetFeature { + ArmV7, + ArmV8, +} + fn type_to_global_type(t: &str) -> &str { match t { "int8x8_t" => "i8x8", @@ -800,6 +807,7 @@ fn gen_arm( current_tests: &[(Vec, Vec, Vec, Vec)], suffix: Suffix, para_num: i32, + target: TargetFeature, fixed: &Vec, multi_fn: &Vec, ) -> (String, String) { @@ -823,6 +831,11 @@ fn gen_arm( .clone() .unwrap_or_else(|| current_arm.to_string()); + let current_target = match target { + ArmV7 => "v7", + ArmV8 => "v8", + }; + let current_fn = if let Some(current_fn) = current_fn.clone() { if link_aarch64.is_some() || link_arm.is_some() { panic!( @@ -965,12 +978,13 @@ fn gen_arm( {} #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "{}"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr({}))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}))] {} "#, current_comment, + current_target, expand_intrinsic(¤t_arm, in_t[1]), expand_intrinsic(¤t_aarch64, in_t[1]), call, @@ -1233,6 +1247,7 @@ fn main() -> io::Result<()> { let mut fixed: Vec = Vec::new(); let mut current_tests: Vec<(Vec, Vec, Vec, Vec)> = Vec::new(); let mut multi_fn: Vec = Vec::new(); + let mut target: TargetFeature = ArmV7; // // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY @@ -1309,6 +1324,7 @@ mod test { c = Vec::new(); fixed = Vec::new(); multi_fn = Vec::new(); + target = ArmV7; } else if line.starts_with("//") { } else if line.starts_with("name = ") { current_name = Some(String::from(&line[7..])); @@ -1341,6 +1357,14 @@ mod test { link_aarch64 = Some(String::from(&line[15..])); } else if line.starts_with("link-arm = ") { link_arm = Some(String::from(&line[11..])); + } else if line.starts_with("target = ") { + target = match Some(String::from(&line[9..])) { + Some(input) => match input.as_str() { + "v8" => ArmV8, + _ => ArmV7, + }, + _ => ArmV7, + } } else if line.starts_with("generate ") { let line = &line[9..]; let types: Vec = line @@ -1396,6 +1420,7 @@ mod test { ¤t_tests, suffix, para_num, + target, &fixed, &multi_fn, ); From 5428d5d0b4f1b5d820d74f80d6a55d569e007f8c Mon Sep 17 00:00:00 2001 From: surechen Date: Mon, 5 Apr 2021 21:00:11 +0800 Subject: [PATCH 3/3] edit target --- crates/core_arch/src/arm/neon/generated.rs | 8 ++++---- crates/stdarch-gen/src/main.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs index 59447f06ab..b538450d3d 100644 --- a/crates/core_arch/src/arm/neon/generated.rs +++ b/crates/core_arch/src/arm/neon/generated.rs @@ -4909,7 +4909,7 @@ vmaxq_f32_(a, b) /// Floating-point Maximun Number (vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { @@ -4925,7 +4925,7 @@ vmaxnm_f32_(a, b) /// Floating-point Maximun Number (vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { @@ -5165,7 +5165,7 @@ vminq_f32_(a, b) /// Floating-point Minimun Number (vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { @@ -5181,7 +5181,7 @@ vminnm_f32_(a, b) /// Floating-point Minimun Number (vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index cad7ba78af..adf8f4a219 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -995,7 +995,7 @@ fn gen_arm( let current_target = match target { ArmV7 => "v7", - FPArmV8 => "fp-armv8", + FPArmV8 => "fp-armv8,v8", }; let current_fn = if let Some(current_fn) = current_fn.clone() {