Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit a5b36f7

Browse files
committedSep 3, 2019
polyval: Constant-time software implementation
Adapts BearSSL's `ghash_ctmul64.c` into a constant-time software backend for POLYVAL.
1 parent fc963a5 commit a5b36f7

File tree

8 files changed

+181
-102
lines changed

8 files changed

+181
-102
lines changed
 

‎.travis.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ rust:
66
- nightly
77

88
script:
9-
- cargo test --all --exclude polyval --release
9+
- cargo test --all --release
1010
- cargo test --all --all-features --release
1111

1212
env:
@@ -23,13 +23,15 @@ matrix:
2323
rust: 1.34.0
2424
env: {} # clear `-D warnings` above; allow warnings
2525

26-
# polyval presently needs either RUSTFLAGS or non-default features
26+
# Test `polyval` with the PCLMULQDQ-accelerated backend
2727
- name: "Rust: 1.32.0 (polyval)"
2828
rust: 1.34.0
29-
script: ./test_polyval.sh
29+
env: RUSTFLAGS="-Ctarget-cpu=sandybridge -Ctarget-feature=+sse2,+sse4.1"
30+
script: cd polyval && cargo test --no-default-features --release --tests
3031
- name: "Rust: stable (polyval)"
3132
rust: stable
32-
script: ./test_polyval.sh
33+
env: RUSTFLAGS="-Ctarget-cpu=sandybridge -Ctarget-feature=+sse2,+sse4.1"
34+
script: cd polyval && cargo test --no-default-features --release --tests
3335

3436
# no_std build
3537
- name: "Rust: stable (thumbv7em-none-eabihf)"

‎polyval/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ zeroize = { version = "0.10", optional = true, default-features = false }
2222
hex-literal = "0.1"
2323

2424
[features]
25-
default = []
25+
default = ["soft"]
2626
std = ["universal-hash/std"]
27-
insecure-soft = []
27+
soft = []
2828

2929
[badges]
3030
maintenance = { status = "experimental" }

‎polyval/src/field.rs

+1-8
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,7 @@ impl<B: Backend> Mul for Element<B> {
7777
///
7878
/// [RFC 8452 Section 3]: https://tools.ietf.org/html/rfc8452#section-3
7979
fn mul(self, rhs: Self) -> Self {
80-
let t1 = self.0.clmul(rhs.0, 0x00);
81-
let t2 = self.0.clmul(rhs.0, 0x01);
82-
let t3 = self.0.clmul(rhs.0, 0x10);
83-
let t4 = self.0.clmul(rhs.0, 0x11);
84-
let t5 = t2 + t3;
85-
let t6 = t4 + t5.shr64();
86-
let t7 = (t1 + t5.shl64()).reduce();
87-
Element(t6 + t7)
80+
Element(self.0 * rhs.0)
8881
}
8982
}
9083

‎polyval/src/field/backend.rs

+8-31
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
))]
99
mod pclmulqdq;
1010

11-
#[cfg(feature = "insecure-soft")]
11+
#[cfg(feature = "soft")]
1212
pub mod soft;
1313

1414
use super::Block;
15-
use core::ops::Add;
15+
use core::ops::{Add, Mul};
1616

1717
#[cfg(not(any(
1818
all(
@@ -21,12 +21,12 @@ use core::ops::Add;
2121
target_feature = "sse4.1",
2222
any(target_arch = "x86", target_arch = "x86_64")
2323
),
24-
feature = "insecure-soft"
24+
feature = "soft"
2525
)))]
2626
compile_error!(
2727
"no backends available! On x86/x86-64 platforms, enable intrinsics with \
2828
RUSTFLAGS=\"-Ctarget-cpu=sandybridge -Ctarget-feature=+sse2,+sse4.1\" or \
29-
enable **INSECURE** portable emulation with the `insecure-soft` feature"
29+
enable portable emulation with the `soft` Cargo feature"
3030
);
3131

3232
#[cfg(all(
@@ -44,35 +44,12 @@ pub(crate) use self::pclmulqdq::M128i;
4444
target_feature = "sse4.1",
4545
any(target_arch = "x86", target_arch = "x86_64")
4646
)),
47-
feature = "insecure-soft"
47+
feature = "soft"
4848
))]
4949
pub(crate) use self::soft::U64x2 as M128i;
5050

5151
/// Field arithmetic backend
52-
pub trait Backend: Add<Output = Self> + Copy + From<Block> + Into<Block> + From<u128> {
53-
/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
54-
/// Algorithm 4: "Montgomery reduction"
55-
///
56-
/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
57-
fn reduce(self) -> Self {
58-
// Mask value used when performing Montgomery fast reduction.
59-
// This corresponds to POLYVAL's polynomial with the highest bit unset.
60-
let mask = Self::from(1 << 127 | 1 << 126 | 1 << 121 | 1);
61-
let a = mask.clmul(self, 0x01);
62-
let b = self.shuffle() + a;
63-
let c = mask.clmul(b, 0x01);
64-
b.shuffle() + c
65-
}
66-
67-
/// Carryless multiplication
68-
fn clmul(self, rhs: Self, imm: u8) -> Self;
69-
70-
/// Swap the hi and low 64-bit halves of the register
71-
fn shuffle(self) -> Self;
72-
73-
/// Shift the contents of the register left by 64-bits
74-
fn shl64(self) -> Self;
75-
76-
/// Shift the contents of the register right by 64-bits
77-
fn shr64(self) -> Self;
52+
pub trait Backend:
53+
Add<Output = Self> + Mul<Output = Self> + Copy + From<Block> + Into<Block> + From<u128>
54+
{
7855
}

‎polyval/src/field/backend/pclmulqdq.rs

+33-2
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@ use core::arch::x86_64::*;
1111

1212
use super::Backend;
1313
use crate::field::Block;
14-
use core::ops::Add;
14+
use core::ops::{Add, Mul};
1515

1616
/// Wrapper for `__m128i` - a 128-bit XMM register (SSE2)
1717
#[repr(align(16))]
1818
#[derive(Copy, Clone)]
1919
pub struct M128i(__m128i);
2020

21+
impl Backend for M128i {}
22+
2123
impl From<Block> for M128i {
2224
fn from(bytes: Block) -> M128i {
2325
M128i(unsafe { _mm_loadu_si128(bytes.as_ptr() as *const __m128i) })
@@ -51,12 +53,41 @@ impl Add for M128i {
5153
}
5254
}
5355

54-
impl Backend for M128i {
56+
#[allow(clippy::suspicious_arithmetic_impl)]
57+
impl Mul for M128i {
58+
type Output = Self;
59+
60+
/// Computes POLYVAL multiplication over GF(2^128).
61+
fn mul(self, rhs: Self) -> Self {
62+
let t1 = self.clmul(rhs, 0x00);
63+
let t2 = self.clmul(rhs, 0x01);
64+
let t3 = self.clmul(rhs, 0x10);
65+
let t4 = self.clmul(rhs, 0x11);
66+
let t5 = t2 + t3;
67+
(t4 + t5.shr64()) + (t1 + t5.shl64()).reduce()
68+
}
69+
}
70+
71+
impl M128i {
5572
/// Wrapper for PCLMULQDQ
5673
fn clmul(self, rhs: Self, imm: u8) -> Self {
5774
M128i(unsafe { pclmulqdq(self.0, rhs.0, imm) })
5875
}
5976

77+
/// Fast reduction modulo x^128 + x^127 + x^126 +x^121 + 1 (Gueron 2012)
78+
/// Algorithm 4: "Montgomery reduction"
79+
///
80+
/// See: <https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf>
81+
fn reduce(self) -> Self {
82+
// Mask value used when performing Montgomery fast reduction.
83+
// This corresponds to POLYVAL's polynomial with the highest bit unset.
84+
let mask = Self::from(1 << 127 | 1 << 126 | 1 << 121 | 1);
85+
let a = mask.clmul(self, 0x01);
86+
let b = self.shuffle() + a;
87+
let c = mask.clmul(b, 0x01);
88+
b.shuffle() + c
89+
}
90+
6091
fn shuffle(self) -> Self {
6192
M128i(unsafe { shufpd1(self.0) })
6293
}

‎polyval/src/field/backend/soft.rs

+129-40
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,45 @@
1-
//! Software emulation support for CLMUL hardware intrinsics.
2-
//!
3-
//! WARNING: Not constant time! Should be made constant-time or disabled by default.
4-
5-
// TODO(tarcieri): performance-oriented constant-time implementation
6-
// See: <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c>
1+
//! Constant-time software implementation of POLYVAL
2+
3+
// Adapted from BearSSL's `ghash_ctmul64.c`
4+
// <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c;hb=4b6046412bf927d6424f20fc7ee495bb96dbd227>
5+
//
6+
// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
7+
//
8+
// Permission is hereby granted, free of charge, to any person obtaining
9+
// a copy of this software and associated documentation files (the
10+
// "Software"), to deal in the Software without restriction, including
11+
// without limitation the rights to use, copy, modify, merge, publish,
12+
// distribute, sublicense, and/or sell copies of the Software, and to
13+
// permit persons to whom the Software is furnished to do so, subject to
14+
// the following conditions:
15+
//
16+
// The above copyright notice and this permission notice shall be
17+
// included in all copies or substantial portions of the Software.
18+
//
19+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20+
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21+
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22+
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
23+
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
24+
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25+
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26+
// SOFTWARE.
27+
28+
#![allow(missing_docs)]
729

830
use super::Backend;
931
use crate::field::Block;
10-
use core::{convert::TryInto, ops::Add};
32+
use core::{
33+
convert::TryInto,
34+
ops::{Add, Mul},
35+
};
1136

12-
/// 2 x `u64` values emulating an XMM register
37+
/// 2 x `u64` values
1338
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1439
pub struct U64x2(u64, u64);
1540

41+
impl Backend for U64x2 {}
42+
1643
impl From<Block> for U64x2 {
1744
fn from(bytes: Block) -> U64x2 {
1845
U64x2(
@@ -43,53 +70,115 @@ impl From<U64x2> for u128 {
4370
}
4471
}
4572

73+
#[allow(clippy::suspicious_arithmetic_impl)]
4674
impl Add for U64x2 {
4775
type Output = Self;
4876

4977
/// Adds two POLYVAL field elements.
50-
fn add(self, rhs: Self) -> Self {
78+
fn add(self, rhs: Self) -> Self::Output {
5179
U64x2(self.0 ^ rhs.0, self.1 ^ rhs.1)
5280
}
5381
}
5482

55-
impl Backend for U64x2 {
56-
fn clmul(self, other: Self, imm: u8) -> Self {
57-
let (a, b) = match imm.into() {
58-
0x00 => (self.0, other.0),
59-
0x01 => (self.1, other.0),
60-
0x10 => (self.0, other.1),
61-
0x11 => (self.1, other.1),
62-
_ => unreachable!(),
63-
};
83+
#[allow(clippy::suspicious_arithmetic_impl)]
84+
impl Mul for U64x2 {
85+
type Output = Self;
6486

65-
let mut result = U64x2(0, 0);
87+
/// Computes POLYVAL multiplication over GF(2^128) in constant time.
88+
///
89+
/// Method described at:
90+
/// <https://www.bearssl.org/constanttime.html#ghash-for-gcm>
91+
///
92+
/// POLYVAL multiplication is effectively the little endian equivalent of
93+
/// GHASH multiplication, aside from one small detail described here:
94+
///
95+
/// <https://crypto.stackexchange.com/questions/66448/how-does-bearssls-gcm-modular-reduction-work/66462#66462>
96+
///
97+
/// > If you look at the equation above, the product of two bit-reversed
98+
/// > 128-bit polynomials yields the bit-reversed result over 255 bits,
99+
/// > not 256. The BearSSL code ends up with a 256-bit result in zw[],
100+
/// > and that value is shifted by one bit, because of that reversed
101+
/// > convention issue. Thus, the code must include a shifting step to put
102+
/// > it back where it should
103+
///
104+
/// This shift is unnecessary for POLYVAL.
105+
fn mul(self, rhs: Self) -> Self {
106+
let h0 = self.0;
107+
let h1 = self.1;
108+
let h0r = rev64(h0);
109+
let h1r = rev64(h1);
110+
let h2 = h0 ^ h1;
111+
let h2r = h0r ^ h1r;
112+
113+
let y0 = rhs.0;
114+
let y1 = rhs.1;
115+
let y0r = rev64(y0);
116+
let y1r = rev64(y1);
117+
let y2 = y0 ^ y1;
118+
let y2r = y0r ^ y1r;
119+
let z0 = bmul64(y0, h0);
120+
let z1 = bmul64(y1, h1);
121+
122+
let mut z2 = bmul64(y2, h2);
123+
let mut z0h = bmul64(y0r, h0r);
124+
let mut z1h = bmul64(y1r, h1r);
125+
let mut z2h = bmul64(y2r, h2r);
126+
127+
z2 ^= z0 ^ z1;
128+
z2h ^= z0h ^ z1h;
129+
z0h = rev64(z0h) >> 1;
130+
z1h = rev64(z1h) >> 1;
131+
z2h = rev64(z2h) >> 1;
132+
133+
let v0 = z0;
134+
let mut v1 = z0h ^ z2;
135+
let mut v2 = z1 ^ z2h;
136+
let mut v3 = z1h;
137+
138+
v2 ^= v0 ^ v0 >> 1 ^ v0 >> 2 ^ v0 >> 7;
139+
v1 ^= v0 << 63 ^ v0 << 62 ^ v0 << 57;
140+
v3 ^= v1 ^ v1 >> 1 ^ v1 >> 2 ^ v1 >> 7;
141+
v2 ^= v1 << 63 ^ v1 << 62 ^ v1 << 57;
142+
143+
U64x2(v2, v3)
144+
}
145+
}
66146

67-
for i in 0..64 {
68-
if b & (1 << i) != 0 {
69-
result.1 ^= a;
70-
}
147+
fn rev64(mut x: u64) -> u64 {
148+
x = ((x & 0x5555_5555_5555_5555) << 1) | ((x >> 1) & 0x5555_5555_5555_5555);
149+
x = ((x & 0x3333_3333_3333_3333) << 2) | ((x >> 2) & 0x3333_3333_3333_3333);
150+
x = ((x & 0x0f0f_0f0f_0f0f_0f0f) << 4) | ((x >> 4) & 0x0f0f_0f0f_0f0f_0f0f);
151+
x = ((x & 0x00ff_00ff_00ff_00ff) << 8) | ((x >> 8) & 0x00ff_00ff_00ff_00ff);
152+
x = ((x & 0xffff_0000_ffff) << 16) | ((x >> 16) & 0xffff_0000_ffff);
153+
(x << 32) | (x >> 32)
154+
}
71155

72-
result.0 >>= 1;
156+
fn bmul64(x: u64, y: u64) -> u64 {
157+
let x0 = x & 0x1111_1111_1111_1111;
158+
let x1 = x & 0x2222_2222_2222_2222;
159+
let x2 = x & 0x4444_4444_4444_4444;
160+
let x3 = x & 0x8888_8888_8888_8888;
161+
let y0 = y & 0x1111_1111_1111_1111;
162+
let y1 = y & 0x2222_2222_2222_2222;
163+
let y2 = y & 0x4444_4444_4444_4444;
164+
let y3 = y & 0x8888_8888_8888_8888;
73165

74-
if result.1 & 1 != 0 {
75-
result.0 ^= 1 << 63;
76-
}
166+
let mut z0 =
167+
x0.wrapping_mul(y0) ^ x1.wrapping_mul(y3) ^ x2.wrapping_mul(y2) ^ x3.wrapping_mul(y1);
77168

78-
result.1 >>= 1;
79-
}
169+
let mut z1 =
170+
x0.wrapping_mul(y1) ^ x1.wrapping_mul(y0) ^ x2.wrapping_mul(y3) ^ x3.wrapping_mul(y2);
80171

81-
result
82-
}
172+
let mut z2 =
173+
x0.wrapping_mul(y2) ^ x1.wrapping_mul(y1) ^ x2.wrapping_mul(y0) ^ x3.wrapping_mul(y3);
83174

84-
fn shuffle(self) -> Self {
85-
U64x2(self.1, self.0)
86-
}
175+
let mut z3 =
176+
x0.wrapping_mul(y3) ^ x1.wrapping_mul(y2) ^ x2.wrapping_mul(y1) ^ x3.wrapping_mul(y0);
87177

88-
fn shl64(self) -> Self {
89-
U64x2(0, self.0)
90-
}
178+
z0 &= 0x1111_1111_1111_1111;
179+
z1 &= 0x2222_2222_2222_2222;
180+
z2 &= 0x4444_4444_4444_4444;
181+
z3 &= 0x8888_8888_8888_8888;
91182

92-
fn shr64(self) -> Self {
93-
U64x2(self.1, 0)
94-
}
183+
z0 | z1 | z2 | z3
95184
}

‎polyval/src/lib.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818
//! - x86(-64) CPU: `target-cpu=sandybridge` or newer
1919
//! - SSE2 + SSE4.1: `target-feature=+sse2,+sse4.1`
2020
//!
21-
//! An **INSECURE** (variable timing) portable implementation is gated behind
22-
//! the `insecure-soft` cargo feature. Use of this implementation is
23-
//! **NOT RECOMMENDED** and may potentially leak the POLYVAL key!
21+
//! If `RUSTFLAGS` are not provided, this crate will fall back to a much slower
22+
//! software-only implementation.
2423
//!
2524
//! ## Relationship to GHASH
2625
//!

‎test_polyval.sh

-12
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.