Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 43c6622

Browse files
committedSep 3, 2019
polyval: Refactor in prep for soft backend rewrite
This incrementally incorporates some of the refactoring changes from #7 in order to simplify the diff for that PR (and because those changes seem generally good regardless).
1 parent d0b54b0 commit 43c6622

File tree

8 files changed

+82
-142
lines changed

8 files changed

+82
-142
lines changed
 

‎poly1305/benches/poly1305.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
extern crate test;
44

5-
use poly1305::{Poly1305, universal_hash::UniversalHash};
5+
use poly1305::{universal_hash::UniversalHash, Poly1305};
66
use test::Bencher;
77

88
// TODO(tarcieri): move this into the `universal-hash` crate

‎polyval/benches/polyval.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
extern crate test;
44

5-
use polyval::{Polyval, universal_hash::UniversalHash};
5+
use polyval::{universal_hash::UniversalHash, Polyval};
66
use test::Bencher;
77

88
// TODO(tarcieri): move this into the `universal-hash` crate

‎polyval/src/field.rs

+6-22
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@
1414
//!
1515
//! [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
1616
17-
pub(crate) mod backend;
18-
mod clmul;
17+
pub mod backend;
1918

2019
use self::backend::Backend;
2120
use core::ops::{Add, Mul};
@@ -26,12 +25,6 @@ pub const FIELD_SIZE: usize = 16;
2625
/// POLYVAL field element bytestrings (16-bytes)
2726
pub type Block = [u8; FIELD_SIZE];
2827

29-
/// Mask value used when performing Montgomery fast reduction.
30-
/// This corresponds to POLYVAL's polynomial with the highest bit unset.
31-
///
32-
/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
33-
const MASK: u128 = 1 << 127 | 1 << 126 | 1 << 121 | 1;
34-
3528
/// POLYVAL field element.
3629
#[derive(Copy, Clone)]
3730
pub struct Element<B: Backend>(B);
@@ -46,17 +39,6 @@ impl<B: Backend> Element<B> {
4639
pub fn to_bytes(self) -> Block {
4740
self.0.into()
4841
}
49-
50-
/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
51-
/// Algorithm 4: "Montgomery reduction"
52-
fn reduce(self) -> Self {
53-
let mask = B::from(MASK);
54-
let a = mask.clmul(self.0, 0x01);
55-
let b = self.0.shuffle() ^ a;
56-
let c = mask.clmul(b, 0x01);
57-
let d = b.shuffle() ^ c;
58-
Element(d)
59-
}
6042
}
6143

6244
impl<B: Backend> Default for Element<B> {
@@ -77,7 +59,7 @@ impl<B: Backend> Add for Element<B> {
7759
///
7860
/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
7961
fn add(self, rhs: Self) -> Self {
80-
Element(self.0 ^ rhs.0)
62+
Element(self.0 + rhs.0)
8163
}
8264
}
8365

@@ -99,8 +81,10 @@ impl<B: Backend> Mul for Element<B> {
9981
let t2 = self.0.clmul(rhs.0, 0x01);
10082
let t3 = self.0.clmul(rhs.0, 0x10);
10183
let t4 = self.0.clmul(rhs.0, 0x11);
102-
let t5 = t2 ^ t3;
103-
Element(t4 ^ t5.shr64()) + Element(t1 ^ t5.shl64()).reduce()
84+
let t5 = t2 + t3;
85+
let t6 = t4 + t5.shr64();
86+
let t7 = (t1 + t5.shl64()).reduce();
87+
Element(t6 + t7)
10488
}
10589
}
10690

‎polyval/src/field/backend.rs

+21-7
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@
99
mod pclmulqdq;
1010

1111
#[cfg(feature = "insecure-soft")]
12-
mod soft;
12+
pub mod soft;
1313

14-
use super::clmul::Clmul;
1514
use super::Block;
16-
use core::ops::BitXor;
15+
use core::ops::Add;
1716

1817
#[cfg(not(any(
1918
all(
@@ -49,10 +48,25 @@ pub(crate) use self::pclmulqdq::M128i;
4948
))]
5049
pub(crate) use self::soft::U64x2 as M128i;
5150

52-
/// Trait representing the arithmetic operations we expect on the XMM registers
53-
pub trait Backend:
54-
BitXor<Output = Self> + Clmul + Copy + From<Block> + Into<Block> + From<u128>
55-
{
51+
/// Field arithmetic backend
52+
pub trait Backend: Add<Output = Self> + Copy + From<Block> + Into<Block> + From<u128> {
53+
/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
54+
/// Algorithm 4: "Montgomery reduction"
55+
///
56+
/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
57+
fn reduce(self) -> Self {
58+
// Mask value used when performing Montgomery fast reduction.
59+
// This corresponds to POLYVAL's polynomial with the highest bit unset.
60+
let mask = Self::from(1 << 127 | 1 << 126 | 1 << 121 | 1);
61+
let a = mask.clmul(self, 0x01);
62+
let b = self.shuffle() + a;
63+
let c = mask.clmul(b, 0x01);
64+
b.shuffle() + c
65+
}
66+
67+
/// Carryless multiplication
68+
fn clmul(self, rhs: Self, imm: u8) -> Self;
69+
5670
/// Swap the hi and low 64-bit halves of the register
5771
fn shuffle(self) -> Self;
5872

‎polyval/src/field/backend/pclmulqdq.rs

+24-21
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,8 @@ use core::arch::x86::*;
1010
use core::arch::x86_64::*;
1111

1212
use super::Backend;
13-
use crate::field::{
14-
clmul::{self, Clmul},
15-
Block,
16-
};
17-
use core::ops::BitXor;
13+
use crate::field::Block;
14+
use core::ops::Add;
1815

1916
/// Wrapper for `__m128i` - a 128-bit XMM register (SSE2)
2017
#[repr(align(16))]
@@ -45,24 +42,21 @@ impl From<u128> for M128i {
4542
}
4643
}
4744

48-
impl BitXor for M128i {
45+
impl Add for M128i {
4946
type Output = Self;
5047

51-
fn bitxor(self, rhs: Self) -> Self::Output {
48+
/// Adds two POLYVAL field elements.
49+
fn add(self, rhs: Self) -> Self {
5250
M128i(unsafe { xor(self.0, rhs.0) })
5351
}
5452
}
5553

56-
impl Clmul for M128i {
57-
fn clmul<I>(self, rhs: Self, imm: I) -> Self
58-
where
59-
I: Into<clmul::PseudoOp>,
60-
{
61-
M128i(unsafe { pclmulqdq(self.0, rhs.0, imm.into()) })
54+
impl Backend for M128i {
55+
/// Wrapper for PCLMULQDQ
56+
fn clmul(self, rhs: Self, imm: u8) -> Self {
57+
M128i(unsafe { pclmulqdq(self.0, rhs.0, imm) })
6258
}
63-
}
6459

65-
impl Backend for M128i {
6660
fn shuffle(self) -> Self {
6761
M128i(unsafe { shufpd1(self.0) })
6862
}
@@ -99,11 +93,20 @@ unsafe fn psrldq8(a: __m128i) -> __m128i {
9993

10094
// TODO(tarcieri): _mm256_clmulepi64_epi128 (vpclmulqdq)
10195
#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")]
102-
unsafe fn pclmulqdq(a: __m128i, b: __m128i, op: clmul::PseudoOp) -> __m128i {
103-
match op {
104-
clmul::PseudoOp::PCLMULLQLQDQ => _mm_clmulepi64_si128(a, b, 0x00),
105-
clmul::PseudoOp::PCLMULHQLQDQ => _mm_clmulepi64_si128(a, b, 0x01),
106-
clmul::PseudoOp::PCLMULLQHQDQ => _mm_clmulepi64_si128(a, b, 0x10),
107-
clmul::PseudoOp::PCLMULHQHQDQ => _mm_clmulepi64_si128(a, b, 0x11),
96+
unsafe fn pclmulqdq(a: __m128i, b: __m128i, imm: u8) -> __m128i {
97+
match imm {
98+
// Low-Low: `clmul(a[0..8], b[0..8])` (PCLMULLQLQDQ)
99+
0x00 => _mm_clmulepi64_si128(a, b, 0x00),
100+
101+
// High-Low: `clmul(a[8..16], b[0..8])` (PCLMULHQLQDQ)
102+
0x01 => _mm_clmulepi64_si128(a, b, 0x01),
103+
104+
// Low-High: `clmul(a[0..8], b[8..16])` (PCLMULLQHQDQ)
105+
0x10 => _mm_clmulepi64_si128(a, b, 0x10),
106+
107+
// High-High: `clmul(a[8..16], b[8..16])` (PCLMULHQHQDQ)
108+
0x11 => _mm_clmulepi64_si128(a, b, 0x11),
109+
110+
_ => unreachable!(),
108111
}
109112
}

‎polyval/src/field/backend/soft.rs

+28-34
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,19 @@
66
// See: <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c>
77

88
use super::Backend;
9-
use crate::field::{
10-
clmul::{self, Clmul},
11-
Block,
12-
};
13-
use core::{convert::TryInto, ops::BitXor};
9+
use crate::field::Block;
10+
use core::{convert::TryInto, ops::Add};
1411

1512
/// 2 x `u64` values emulating an XMM register
1613
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
17-
pub struct U64x2([u64; 2]);
14+
pub struct U64x2(u64, u64);
1815

1916
impl From<Block> for U64x2 {
2017
fn from(bytes: Block) -> U64x2 {
21-
U64x2([
18+
U64x2(
2219
u64::from_le_bytes(bytes[..8].try_into().unwrap()),
2320
u64::from_le_bytes(bytes[8..].try_into().unwrap()),
24-
])
21+
)
2522
}
2623
}
2724

@@ -36,66 +33,63 @@ impl From<u128> for U64x2 {
3633
fn from(x: u128) -> U64x2 {
3734
let lo = (x & 0xFFFF_FFFFF) as u64;
3835
let hi = (x >> 64) as u64;
39-
U64x2([lo, hi])
36+
U64x2(lo, hi)
4037
}
4138
}
4239

4340
impl From<U64x2> for u128 {
4441
fn from(u64x2: U64x2) -> u128 {
45-
u128::from(u64x2.0[0]) | (u128::from(u64x2.0[1]) << 64)
42+
u128::from(u64x2.0) | (u128::from(u64x2.1) << 64)
4643
}
4744
}
4845

49-
impl BitXor for U64x2 {
46+
impl Add for U64x2 {
5047
type Output = Self;
5148

52-
fn bitxor(self, rhs: Self) -> Self::Output {
53-
U64x2([self.0[0] ^ rhs.0[0], self.0[1] ^ rhs.0[1]])
49+
/// Adds two POLYVAL field elements.
50+
fn add(self, rhs: Self) -> Self {
51+
U64x2(self.0 ^ rhs.0, self.1 ^ rhs.1)
5452
}
5553
}
5654

57-
impl Clmul for U64x2 {
58-
fn clmul<I>(self, other: Self, imm: I) -> Self
59-
where
60-
I: Into<clmul::PseudoOp>,
61-
{
55+
impl Backend for U64x2 {
56+
fn clmul(self, other: Self, imm: u8) -> Self {
6257
let (a, b) = match imm.into() {
63-
clmul::PseudoOp::PCLMULLQLQDQ => (self.0[0], other.0[0]),
64-
clmul::PseudoOp::PCLMULHQLQDQ => (self.0[1], other.0[0]),
65-
clmul::PseudoOp::PCLMULLQHQDQ => (self.0[0], other.0[1]),
66-
clmul::PseudoOp::PCLMULHQHQDQ => (self.0[1], other.0[1]),
58+
0x00 => (self.0, other.0),
59+
0x01 => (self.1, other.0),
60+
0x10 => (self.0, other.1),
61+
0x11 => (self.1, other.1),
62+
_ => unreachable!(),
6763
};
6864

69-
let mut result = [0u64; 2];
65+
let mut result = U64x2(0, 0);
7066

7167
for i in 0..64 {
7268
if b & (1 << i) != 0 {
73-
result[1] ^= a;
69+
result.1 ^= a;
7470
}
7571

76-
result[0] >>= 1;
72+
result.0 >>= 1;
7773

78-
if result[1] & 1 != 0 {
79-
result[0] ^= 1 << 63;
74+
if result.1 & 1 != 0 {
75+
result.0 ^= 1 << 63;
8076
}
8177

82-
result[1] >>= 1;
78+
result.1 >>= 1;
8379
}
8480

85-
U64x2(result)
81+
result
8682
}
87-
}
8883

89-
impl Backend for U64x2 {
9084
fn shuffle(self) -> Self {
91-
U64x2([self.0[1], self.0[0]])
85+
U64x2(self.1, self.0)
9286
}
9387

9488
fn shl64(self) -> Self {
95-
U64x2([0, self.0[0]])
89+
U64x2(0, self.0)
9690
}
9791

9892
fn shr64(self) -> Self {
99-
U64x2([self.0[1], 0])
93+
U64x2(self.1, 0)
10094
}
10195
}

‎polyval/src/field/clmul.rs

-55
This file was deleted.

‎polyval/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
//! ## Requirements
1515
//!
1616
//! - Rust 1.34.0 or newer
17-
//! - `RUSTFLAGS` with `-Ctarget-cpu` and `-Ctarget-feature`:
17+
//! - Recommended: `RUSTFLAGS` with `-Ctarget-cpu` and `-Ctarget-feature`:
1818
//! - x86(-64) CPU: `target-cpu=sandybridge` or newer
1919
//! - SSE2 + SSE4.1: `target-feature=+sse2,+sse4.1`
2020
//!

0 commit comments

Comments
 (0)
Please sign in to comment.