Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 80791b0

Browse files
committedDec 5, 2019
polyval: Constant-time 32-bit software implementation
Previously (in #7) BearSSL's `ghash_ctmul64.c` was adapted into a portable software backend. This commit additionally adapts BearSSL's `ghash_ctmul32.c` into a portable constant-time field arithmetic backend for POLYVAL designed for 32-bit architectures.
1 parent e010cf7 commit 80791b0

File tree

5 files changed

+282
-44
lines changed

5 files changed

+282
-44
lines changed
 

‎polyval/benches/polyval.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
1616

1717
group.bench_function(BenchmarkId::new("update_padded", size), |b| {
1818
let mut m = Polyval::new(&Default::default());
19-
b.iter(|| m.update_padded(&buf) );
19+
b.iter(|| m.update_padded(&buf));
2020
});
2121
}
2222

‎polyval/src/field.rs

+26-8
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
any(target_arch = "x86", target_arch = "x86_64")
2222
))]
2323
mod pclmulqdq;
24-
mod soft;
24+
pub mod u32_soft;
25+
mod u64_soft;
2526

2627
use core::ops::{Add, Mul};
2728

@@ -34,14 +35,31 @@ use core::ops::{Add, Mul};
3435
use self::pclmulqdq::M128i;
3536

3637
#[allow(unused_imports)]
37-
use self::soft::U64x2;
38+
use self::u32_soft::U32x4;
3839

39-
#[cfg(not(all(
40-
target_feature = "pclmulqdq",
41-
target_feature = "sse2",
42-
target_feature = "sse4.1",
43-
any(target_arch = "x86", target_arch = "x86_64")
44-
)))]
40+
#[allow(unused_imports)]
41+
use self::u64_soft::U64x2;
42+
43+
#[cfg(all(
44+
not(target_pointer_width = "64"),
45+
not(all(
46+
target_feature = "pclmulqdq",
47+
target_feature = "sse2",
48+
target_feature = "sse4.1",
49+
any(target_arch = "x86", target_arch = "x86_64")
50+
))
51+
))]
52+
type M128i = U32x4;
53+
54+
#[cfg(all(
55+
target_pointer_width = "64",
56+
not(all(
57+
target_feature = "pclmulqdq",
58+
target_feature = "sse2",
59+
target_feature = "sse4.1",
60+
any(target_arch = "x86", target_arch = "x86_64")
61+
))
62+
))]
4563
type M128i = U64x2;
4664

4765
/// POLYVAL field element bytestrings (16-bytes)

‎polyval/src/field/u32_soft.rs

+224
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
//! Constant-time software implementation of POLYVAL for 32-bit architectures
2+
//!
3+
//! Adapted from BearSSL's `ghash_ctmul32.c`
4+
//! <https://bearssl.org/gitweb/?p=BearSSL;a=blob_plain;f=src/hash/ghash_ctmul32.c;hb=4b6046412bf927d6424f20fc7ee495bb96dbd227>
5+
//!
6+
//! Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
7+
//!
8+
//! This implementation uses 32-bit multiplications, and only the low
9+
//! 32 bits for each multiplication result. This is meant primarily for
10+
//! the ARM Cortex M0 and M0+, whose multiplication opcode does not yield
11+
//! the upper 32 bits; but it might also be useful on architectures where
12+
//! access to the upper 32 bits requires use of specific registers that
13+
//! create contention (e.g. on i386, "mul" necessarily outputs the result
14+
//! in edx:eax, while "imul" can use any registers but is limited to the
15+
//! low 32 bits).
16+
//!
17+
//! The implementation trick that is used here is bit-reversing (bit 0
18+
//! is swapped with bit 31, bit 1 with bit 30, and so on). In GF(2)[X],
19+
//! for all values x and y, we have:
20+
//!
21+
//! ```text
22+
//! rev32(x) * rev32(y) = rev64(x * y)
23+
//! ```
24+
//!
25+
//! In other words, if we bit-reverse (over 32 bits) the operands, then we
26+
//! bit-reverse (over 64 bits) the result.
27+
28+
use crate::field::Block;
29+
use core::{
30+
convert::TryInto,
31+
num::Wrapping,
32+
ops::{Add, Mul},
33+
};
34+
35+
/// 4 x `u32` values
36+
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
37+
pub struct U32x4(u32, u32, u32, u32);
38+
39+
impl From<Block> for U32x4 {
40+
fn from(bytes: Block) -> U32x4 {
41+
U32x4(
42+
u32::from_le_bytes(bytes[..4].try_into().unwrap()),
43+
u32::from_le_bytes(bytes[4..8].try_into().unwrap()),
44+
u32::from_le_bytes(bytes[8..12].try_into().unwrap()),
45+
u32::from_le_bytes(bytes[12..].try_into().unwrap()),
46+
)
47+
}
48+
}
49+
50+
impl From<U32x4> for Block {
51+
fn from(u32x4: U32x4) -> Block {
52+
let x: u128 = u32x4.into();
53+
x.to_le_bytes()
54+
}
55+
}
56+
57+
impl From<U32x4> for u128 {
58+
fn from(u32x4: U32x4) -> u128 {
59+
u128::from(u32x4.0)
60+
| (u128::from(u32x4.1) << 32)
61+
| (u128::from(u32x4.2) << 64)
62+
| (u128::from(u32x4.3) << 96)
63+
}
64+
}
65+
66+
#[allow(clippy::suspicious_arithmetic_impl)]
67+
impl Add for U32x4 {
68+
type Output = Self;
69+
70+
/// Adds two POLYVAL field elements.
71+
fn add(self, rhs: Self) -> Self::Output {
72+
U32x4(
73+
self.0 ^ rhs.0,
74+
self.1 ^ rhs.1,
75+
self.2 ^ rhs.2,
76+
self.3 ^ rhs.3,
77+
)
78+
}
79+
}
80+
81+
#[allow(clippy::suspicious_arithmetic_impl)]
82+
impl Mul for U32x4 {
83+
type Output = Self;
84+
85+
/// Computes carryless POLYVAL multiplication over GF(2^128) in constant time.
86+
///
87+
/// Method described at:
88+
/// <https://www.bearssl.org/constanttime.html#ghash-for-gcm>
89+
///
90+
/// POLYVAL multiplication is effectively the little endian equivalent of
91+
/// GHASH multiplication, aside from one small detail described here:
92+
///
93+
/// <https://crypto.stackexchange.com/questions/66448/how-does-bearssls-gcm-modular-reduction-work/66462#66462>
94+
///
95+
/// > The product of two bit-reversed 128-bit polynomials yields the
96+
/// > bit-reversed result over 255 bits, not 256. The BearSSL code ends up
97+
/// > with a 256-bit result in zw[], and that value is shifted by one bit,
98+
/// > because of that reversed convention issue. Thus, the code must
99+
/// > include a shifting step to put it back where it should
100+
///
101+
/// This shift is unnecessary for POLYVAL and has been removed.
102+
fn mul(self, rhs: Self) -> Self {
103+
let hw = [self.0, self.1, self.2, self.3];
104+
let yw = [rhs.0, rhs.1, rhs.2, rhs.3];
105+
let hwr = [rev32(hw[0]), rev32(hw[1]), rev32(hw[2]), rev32(hw[3])];
106+
107+
// We are using Karatsuba: the 128x128 multiplication is
108+
// reduced to three 64x64 multiplications, hence nine
109+
// 32x32 multiplications. With the bit-reversal trick,
110+
// we have to perform 18 32x32 multiplications.
111+
112+
let mut a = [0u32; 18];
113+
114+
a[0] = yw[0];
115+
a[1] = yw[1];
116+
a[2] = yw[2];
117+
a[3] = yw[3];
118+
a[4] = a[0] ^ a[1];
119+
a[5] = a[2] ^ a[3];
120+
a[6] = a[0] ^ a[2];
121+
a[7] = a[1] ^ a[3];
122+
a[8] = a[6] ^ a[7];
123+
a[9] = rev32(yw[0]);
124+
a[10] = rev32(yw[1]);
125+
a[11] = rev32(yw[2]);
126+
a[12] = rev32(yw[3]);
127+
a[13] = a[9] ^ a[10];
128+
a[14] = a[11] ^ a[12];
129+
a[15] = a[9] ^ a[11];
130+
a[16] = a[10] ^ a[12];
131+
a[17] = a[15] ^ a[16];
132+
133+
let mut b = [0u32; 18];
134+
135+
b[0] = hw[0];
136+
b[1] = hw[1];
137+
b[2] = hw[2];
138+
b[3] = hw[3];
139+
b[4] = b[0] ^ b[1];
140+
b[5] = b[2] ^ b[3];
141+
b[6] = b[0] ^ b[2];
142+
b[7] = b[1] ^ b[3];
143+
b[8] = b[6] ^ b[7];
144+
b[9] = hwr[0];
145+
b[10] = hwr[1];
146+
b[11] = hwr[2];
147+
b[12] = hwr[3];
148+
b[13] = b[9] ^ b[10];
149+
b[14] = b[11] ^ b[12];
150+
b[15] = b[9] ^ b[11];
151+
b[16] = b[10] ^ b[12];
152+
b[17] = b[15] ^ b[16];
153+
154+
let mut c = [0u32; 18];
155+
156+
for i in 0..18 {
157+
c[i] = bmul32(a[i], b[i]);
158+
}
159+
160+
c[4] ^= c[0] ^ c[1];
161+
c[5] ^= c[2] ^ c[3];
162+
c[8] ^= c[6] ^ c[7];
163+
164+
c[13] ^= c[9] ^ c[10];
165+
c[14] ^= c[11] ^ c[12];
166+
c[17] ^= c[15] ^ c[16];
167+
168+
let mut zw = [0u32; 8];
169+
170+
zw[0] = c[0];
171+
zw[1] = c[4] ^ rev32(c[9]) >> 1;
172+
zw[2] = c[1] ^ c[0] ^ c[2] ^ c[6] ^ rev32(c[13]) >> 1;
173+
zw[3] = c[4] ^ c[5] ^ c[8] ^ rev32(c[10] ^ c[9] ^ c[11] ^ c[15]) >> 1;
174+
zw[4] = c[2] ^ c[1] ^ c[3] ^ c[7] ^ rev32(c[13] ^ c[14] ^ c[17]) >> 1;
175+
zw[5] = c[5] ^ rev32(c[11] ^ c[10] ^ c[12] ^ c[16]) >> 1;
176+
zw[6] = c[3] ^ rev32(c[14]) >> 1;
177+
zw[7] = rev32(c[12]) >> 1;
178+
179+
for i in 0..4 {
180+
let lw = zw[i];
181+
zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
182+
zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
183+
}
184+
185+
U32x4(zw[4], zw[5], zw[6], zw[7])
186+
}
187+
}
188+
189+
/// Multiplication in GF(2)[X], truncated to the low 32-bits, with “holes”
190+
/// (sequences of zeroes) to avoid carry spilling.
191+
///
192+
/// When carries do occur, they wind up in a "hole" and are subsequently masked
193+
/// out of the result.
194+
fn bmul32(x: u32, y: u32) -> u32 {
195+
let x0 = Wrapping(x & 0x1111_1111);
196+
let x1 = Wrapping(x & 0x2222_2222);
197+
let x2 = Wrapping(x & 0x4444_4444);
198+
let x3 = Wrapping(x & 0x8888_8888);
199+
let y0 = Wrapping(y & 0x1111_1111);
200+
let y1 = Wrapping(y & 0x2222_2222);
201+
let y2 = Wrapping(y & 0x4444_4444);
202+
let y3 = Wrapping(y & 0x8888_8888);
203+
204+
let mut z0 = ((x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1)).0;
205+
let mut z1 = ((x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2)).0;
206+
let mut z2 = ((x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3)).0;
207+
let mut z3 = ((x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0)).0;
208+
209+
z0 &= 0x1111_1111;
210+
z1 &= 0x2222_2222;
211+
z2 &= 0x4444_4444;
212+
z3 &= 0x8888_8888;
213+
214+
z0 | z1 | z2 | z3
215+
}
216+
217+
/// Bit-reverse a 32-bit word in constant time.
218+
fn rev32(mut x: u32) -> u32 {
219+
x = ((x & 0x5555_5555) << 1) | (x >> 1 & 0x5555_5555);
220+
x = ((x & 0x3333_3333) << 2) | (x >> 2 & 0x3333_3333);
221+
x = ((x & 0x0f0f_0f0f) << 4) | (x >> 4 & 0x0f0f_0f0f);
222+
x = ((x & 0x00ff_00ff) << 8) | (x >> 8 & 0x00ff_00ff);
223+
(x << 16) | (x >> 16)
224+
}

‎polyval/src/field/soft.rs ‎polyval/src/field/u64_soft.rs

+30-34
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//! Constant-time software implementation of POLYVAL
1+
//! Constant-time software implementation of POLYVAL for 64-bit architectures
22
//!
33
//! Adapted from BearSSL's `ghash_ctmul64.c`
44
//! <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c;hb=4b6046412bf927d6424f20fc7ee495bb96dbd227>
@@ -8,6 +8,7 @@
88
use crate::field::Block;
99
use core::{
1010
convert::TryInto,
11+
num::Wrapping,
1112
ops::{Add, Mul},
1213
};
1314

@@ -110,40 +111,25 @@ impl Mul for U64x2 {
110111
}
111112
}
112113

113-
/// Reverse a `u64` in constant time
114-
fn rev64(mut x: u64) -> u64 {
115-
x = ((x & 0x5555_5555_5555_5555) << 1) | ((x >> 1) & 0x5555_5555_5555_5555);
116-
x = ((x & 0x3333_3333_3333_3333) << 2) | ((x >> 2) & 0x3333_3333_3333_3333);
117-
x = ((x & 0x0f0f_0f0f_0f0f_0f0f) << 4) | ((x >> 4) & 0x0f0f_0f0f_0f0f_0f0f);
118-
x = ((x & 0x00ff_00ff_00ff_00ff) << 8) | ((x >> 8) & 0x00ff_00ff_00ff_00ff);
119-
x = ((x & 0xffff_0000_ffff) << 16) | ((x >> 16) & 0xffff_0000_ffff);
120-
(x << 32) | (x >> 32)
121-
}
122-
123-
/// Carryless integer multiplication with with “holes” (sequences of zeroes) to
124-
/// avoid carry spilling. When carries do occur, they wind up in a "hole" and
125-
/// are subsequently masked out of the result.
114+
/// Multiplication in GF(2)[X], truncated to the low 64-bits, with “holes”
115+
/// (sequences of zeroes) to avoid carry spilling.
116+
///
117+
/// When carries do occur, they wind up in a "hole" and are subsequently masked
118+
/// out of the result.
126119
fn bmul64(x: u64, y: u64) -> u64 {
127-
let x0 = x & 0x1111_1111_1111_1111;
128-
let x1 = x & 0x2222_2222_2222_2222;
129-
let x2 = x & 0x4444_4444_4444_4444;
130-
let x3 = x & 0x8888_8888_8888_8888;
131-
let y0 = y & 0x1111_1111_1111_1111;
132-
let y1 = y & 0x2222_2222_2222_2222;
133-
let y2 = y & 0x4444_4444_4444_4444;
134-
let y3 = y & 0x8888_8888_8888_8888;
135-
136-
let mut z0 =
137-
x0.wrapping_mul(y0) ^ x1.wrapping_mul(y3) ^ x2.wrapping_mul(y2) ^ x3.wrapping_mul(y1);
138-
139-
let mut z1 =
140-
x0.wrapping_mul(y1) ^ x1.wrapping_mul(y0) ^ x2.wrapping_mul(y3) ^ x3.wrapping_mul(y2);
141-
142-
let mut z2 =
143-
x0.wrapping_mul(y2) ^ x1.wrapping_mul(y1) ^ x2.wrapping_mul(y0) ^ x3.wrapping_mul(y3);
144-
145-
let mut z3 =
146-
x0.wrapping_mul(y3) ^ x1.wrapping_mul(y2) ^ x2.wrapping_mul(y1) ^ x3.wrapping_mul(y0);
120+
let x0 = Wrapping(x & 0x1111_1111_1111_1111);
121+
let x1 = Wrapping(x & 0x2222_2222_2222_2222);
122+
let x2 = Wrapping(x & 0x4444_4444_4444_4444);
123+
let x3 = Wrapping(x & 0x8888_8888_8888_8888);
124+
let y0 = Wrapping(y & 0x1111_1111_1111_1111);
125+
let y1 = Wrapping(y & 0x2222_2222_2222_2222);
126+
let y2 = Wrapping(y & 0x4444_4444_4444_4444);
127+
let y3 = Wrapping(y & 0x8888_8888_8888_8888);
128+
129+
let mut z0 = ((x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1)).0;
130+
let mut z1 = ((x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2)).0;
131+
let mut z2 = ((x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3)).0;
132+
let mut z3 = ((x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0)).0;
147133

148134
z0 &= 0x1111_1111_1111_1111;
149135
z1 &= 0x2222_2222_2222_2222;
@@ -152,3 +138,13 @@ fn bmul64(x: u64, y: u64) -> u64 {
152138

153139
z0 | z1 | z2 | z3
154140
}
141+
142+
/// Bit-reverse a `u64` in constant time
143+
fn rev64(mut x: u64) -> u64 {
144+
x = ((x & 0x5555_5555_5555_5555) << 1) | ((x >> 1) & 0x5555_5555_5555_5555);
145+
x = ((x & 0x3333_3333_3333_3333) << 2) | ((x >> 2) & 0x3333_3333_3333_3333);
146+
x = ((x & 0x0f0f_0f0f_0f0f_0f0f) << 4) | ((x >> 4) & 0x0f0f_0f0f_0f0f_0f0f);
147+
x = ((x & 0x00ff_00ff_00ff_00ff) << 8) | ((x >> 8) & 0x00ff_00ff_00ff_00ff);
148+
x = ((x & 0xffff_0000_ffff) << 16) | ((x >> 16) & 0xffff_0000_ffff);
149+
(x << 32) | (x >> 32)
150+
}

‎polyval/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
#![doc(html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo_small.png")]
4747
#![warn(missing_docs, rust_2018_idioms)]
4848

49-
mod field;
49+
pub mod field;
5050

5151
pub use universal_hash;
5252

0 commit comments

Comments
 (0)
Please sign in to comment.