From b31d11b379b717d4d9dfb963ca44396b731da324 Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Sat, 23 Feb 2019 20:09:17 +0100 Subject: [PATCH 1/2] ACLE/SIMD32: add `ssub8` and `usub8` - add `ssub8` and `usub8` - bump instruction limit to 29 --- crates/core_arch/src/acle/simd32.rs | 67 +++++++++++++++++++++++++++-- crates/stdsimd-test/src/lib.rs | 2 +- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index 59a1f0a47d..e5d00e1451 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -16,13 +16,13 @@ //! - [x] __sadd8 //! - [x] __shadd8 //! - [x] __shsub8 -//! - [ ] __ssub8 +//! - [x] __ssub8 //! - [ ] __uadd8 //! - [ ] __uhadd8 //! - [ ] __uhsub8 //! - [ ] __uqadd8 //! - [ ] __uqsub8 -//! - [ ] __usub8 +//! - [x] __usub8 //! - [x] __usad8 //! - [x] __usada8 //! - [x] __qadd16 @@ -130,6 +130,12 @@ extern "C" { #[link_name = "llvm.arm.shsub8"] fn arm_shsub8(a: i32, b: i32) -> i32; + #[link_name = "llvm.arm.ssub8"] + fn arm_ssub8(a: i32, b: i32) -> i32; + + #[link_name = "llvm.arm.usub8"] + fn arm_usub8(a: i32, b: i32) -> i32; + #[link_name = "llvm.arm.shsub16"] fn arm_shsub16(a: i32, b: i32) -> i32; @@ -337,6 +343,39 @@ pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_shsub8, a, b) } +/// Inserts a `USUB8` instruction. +/// +/// Returns the 8-bit unsigned equivalent of +/// +/// res\[0\] = a\[0\] - a\[0\] +/// res\[1\] = a\[1\] - a\[1\] +/// res\[2\] = a\[2\] - a\[2\] +/// res\[3\] = a\[3\] - a\[3\] +/// +/// where [0] is the lower 8 bits and [3] is the upper 8 bits. +#[inline] +#[cfg_attr(test, assert_instr(usub8))] +pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t { + dsp_call!(arm_usub8, a, b) +} + +/// Inserts a `SSUB8` instruction. +/// +/// Returns the 8-bit signed equivalent of +/// +/// res\[0\] = a\[0\] - a\[0\] +/// res\[1\] = a\[1\] - a\[1\] +/// res\[2\] = a\[2\] - a\[2\] +/// res\[3\] = a\[3\] - a\[3\] +/// +/// where [0] is the lower 8 bits and [3] is the upper 8 bits. +/// The GE bits of the APSR are set. +#[inline] +#[cfg_attr(test, assert_instr(ssub8))] +pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { + dsp_call!(arm_ssub8, a, b) +} + /// Signed halving parallel halfword-wise subtraction. /// /// Returns the 16-bit signed equivalent of @@ -427,7 +466,7 @@ pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { #[cfg(test)] mod tests { - use crate::core_arch::simd::{i16x2, i8x4}; + use crate::core_arch::simd::{i16x2, i8x4, u8x4}; use std::mem::transmute; use stdsimd_test::simd_test; @@ -596,6 +635,28 @@ mod tests { } } + #[test] + fn ssub8() { + unsafe { + let a = i8x4::new(1, 2, 3, 4); + let b = i8x4::new(5, 4, 3, 2); + let c = i8x4::new(-4, -2, 0, 2); + let r: i8x4 = dsp_call!(super::__ssub8, a, b); + assert_eq!(r, c); + } + } + + #[test] + fn usub8() { + unsafe { + let a = u8x4::new(1, 2, 3, 4); + let b = u8x4::new(5, 4, 3, 2); + let c = u8x4::new(252, 254, 0, 2); + let r: u8x4 = dsp_call!(super::__usub8, a, b); + assert_eq!(r, c); + } + } + #[test] fn shsub16() { unsafe { diff --git a/crates/stdsimd-test/src/lib.rs b/crates/stdsimd-test/src/lib.rs index 05d9199933..af1a9a2806 100644 --- a/crates/stdsimd-test/src/lib.rs +++ b/crates/stdsimd-test/src/lib.rs @@ -153,7 +153,7 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // core_arch/src/acle/simd32 "usad8" => 27, - "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" => 29, + "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, // Original limit was 20 instructions, but ARM DSP Intrinsics // are exactly 20 instructions long. So, bump the limit to 22 From 53115977197f526225f6cf35ebcabc234e095802 Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Mon, 25 Feb 2019 19:26:32 +0100 Subject: [PATCH 2/2] The GE bits of the APSR are set also by USUB8 --- crates/core_arch/src/acle/simd32.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/core_arch/src/acle/simd32.rs b/crates/core_arch/src/acle/simd32.rs index e5d00e1451..1c64edee92 100644 --- a/crates/core_arch/src/acle/simd32.rs +++ b/crates/core_arch/src/acle/simd32.rs @@ -353,6 +353,7 @@ pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { /// res\[3\] = a\[3\] - a\[3\] /// /// where [0] is the lower 8 bits and [3] is the upper 8 bits. +/// The GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(usub8))] pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t {