Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: RustCrypto/universal-hashes
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 202ecd05fe3f0377a8ee3cdb5563637520393d3a
Choose a base ref
..
head repository: RustCrypto/universal-hashes
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: dd31c6894348667599070082442402fd1fcc3a28
Choose a head ref
Showing with 28 additions and 45 deletions.
  1. +27 −44 polyval/src/field/backend/pclmulqdq.rs
  2. +1 −1 polyval/src/field/backend/soft.rs
71 changes: 27 additions & 44 deletions polyval/src/field/backend/pclmulqdq.rs
Original file line number Diff line number Diff line change
@@ -38,12 +38,6 @@ impl From<M128i> for Block {
}
}

impl From<u128> for M128i {
fn from(x: u128) -> M128i {
M128i(unsafe { _mm_loadu_si128(&x as *const u128 as *const __m128i) })
}
}

impl Add for M128i {
type Output = Self;

@@ -57,48 +51,35 @@ impl Add for M128i {
impl Mul for M128i {
type Output = Self;

/// Computes POLYVAL multiplication over GF(2^128).
/// Computes carryless POLYVAL multiplication over GF(2^128).
fn mul(self, rhs: Self) -> Self {
let t1 = self.clmul(rhs, 0x00);
let t2 = self.clmul(rhs, 0x01);
let t3 = self.clmul(rhs, 0x10);
let t4 = self.clmul(rhs, 0x11);
let t5 = t2 + t3;
(t4 + t5.shr64()) + (t1 + t5.shl64()).reduce()
unsafe {
let t1 = pclmulqdq(self.0, rhs.0, 0x00);
let t2 = pclmulqdq(self.0, rhs.0, 0x01);
let t3 = pclmulqdq(self.0, rhs.0, 0x10);
let t4 = pclmulqdq(self.0, rhs.0, 0x11);
let t5 = xor(t2, t3);
let t6 = xor(t4, psrldq8(t5));
let t7 = xor(t1, pslldq8(t5));
M128i(xor(t6, reduce(t7)))
}
}
}

impl M128i {
/// Wrapper for PCLMULQDQ
fn clmul(self, rhs: Self, imm: u8) -> Self {
M128i(unsafe { pclmulqdq(self.0, rhs.0, imm) })
}

/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
/// Algorithm 4: "Montgomery reduction"
///
/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
fn reduce(self) -> Self {
// Mask value used when performing Montgomery fast reduction.
// This corresponds to POLYVAL's polynomial with the highest bit unset.
let mask = Self::from(1 << 127 | 1 << 126 | 1 << 121 | 1);
let a = mask.clmul(self, 0x01);
let b = self.shuffle() + a;
let c = mask.clmul(b, 0x01);
b.shuffle() + c
}

fn shuffle(self) -> Self {
M128i(unsafe { shufpd1(self.0) })
}

fn shl64(self) -> Self {
M128i(unsafe { pslldq8(self.0) })
}

fn shr64(self) -> Self {
M128i(unsafe { psrldq8(self.0) })
}
/// Mask value used when performing Montgomery fast reduction.
/// This corresponds to POLYVAL's polynomial with the highest bit unset.
const MASK: u128 = 1 << 127 | 1 << 126 | 1 << 121 | 1;

/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
/// Algorithm 4: "Montgomery reduction"
///
/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
unsafe fn reduce(x: __m128i) -> __m128i {
let mask = _mm_loadu_si128(&MASK as *const u128 as *const __m128i);
let a = pclmulqdq(mask, x, 0x01);
let b = xor(shufpd1(x), a);
let c = pclmulqdq(mask, b, 0x01);
xor(shufpd1(b), c)
}

#[target_feature(enable = "sse2", enable = "sse4.1")]
@@ -125,6 +106,8 @@ unsafe fn psrldq8(a: __m128i) -> __m128i {
// TODO(tarcieri): _mm256_clmulepi64_epi128 (vpclmulqdq)
#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")]
unsafe fn pclmulqdq(a: __m128i, b: __m128i, imm: u8) -> __m128i {
// The `imm` value passed to `_mm_clmulepi64_si128` needs to be a literal
// value since it ends up being encoded into the CPU instruction.
match imm {
// Low-Low: `clmul(a[0..8], b[0..8])` (PCLMULLQLQDQ)
0x00 => _mm_clmulepi64_si128(a, b, 0x00),
2 changes: 1 addition & 1 deletion polyval/src/field/backend/soft.rs
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@ impl Add for U64x2 {
impl Mul for U64x2 {
type Output = Self;

/// Computes POLYVAL multiplication over GF(2^128) in constant time.
/// Computes carryless POLYVAL multiplication over GF(2^128) in constant time.
///
/// Method described at:
/// <https://www.bearssl.org/constanttime.html#ghash-for-gcm>