1
- //! Software emulation support for CLMUL hardware intrinsics.
1
+ //! Constant-time software implementation of POLYVAL
2
2
//!
3
- //! WARNING: Not constant time! Should be made constant-time or disabled by default.
4
-
5
- // TODO(tarcieri): performance-oriented constant-time implementation
6
- // See: <https://bearssl .org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c >
3
+ //! Adapted from BearSSL's `ghash_ctmul64.c`
4
+ //! <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c;hb=4b6046412bf927d6424f20fc7ee495bb96dbd227>
5
+ //!
6
+ //! Copyright (c) 2016 Thomas Pornin <pornin@bolet .org>
7
7
8
8
use super :: Backend ;
9
9
use crate :: field:: Block ;
10
- use core:: { convert:: TryInto , ops:: Add } ;
10
+ use core:: {
11
+ convert:: TryInto ,
12
+ ops:: { Add , Mul } ,
13
+ } ;
11
14
12
- /// 2 x `u64` values emulating an XMM register
15
+ /// 2 x `u64` values
13
16
#[ derive( Copy , Clone , Debug , Eq , PartialEq ) ]
14
17
pub struct U64x2 ( u64 , u64 ) ;
15
18
19
+ impl Backend for U64x2 { }
20
+
16
21
impl From < Block > for U64x2 {
17
22
fn from ( bytes : Block ) -> U64x2 {
18
23
U64x2 (
@@ -29,67 +34,124 @@ impl From<U64x2> for Block {
29
34
}
30
35
}
31
36
32
- impl From < u128 > for U64x2 {
33
- fn from ( x : u128 ) -> U64x2 {
34
- let lo = ( x & 0xFFFF_FFFFF ) as u64 ;
35
- let hi = ( x >> 64 ) as u64 ;
36
- U64x2 ( lo, hi)
37
- }
38
- }
39
-
40
37
impl From < U64x2 > for u128 {
41
38
fn from ( u64x2 : U64x2 ) -> u128 {
42
39
u128:: from ( u64x2. 0 ) | ( u128:: from ( u64x2. 1 ) << 64 )
43
40
}
44
41
}
45
42
43
+ #[ allow( clippy:: suspicious_arithmetic_impl) ]
46
44
impl Add for U64x2 {
47
45
type Output = Self ;
48
46
49
47
/// Adds two POLYVAL field elements.
50
- fn add ( self , rhs : Self ) -> Self {
48
+ fn add ( self , rhs : Self ) -> Self :: Output {
51
49
U64x2 ( self . 0 ^ rhs. 0 , self . 1 ^ rhs. 1 )
52
50
}
53
51
}
54
52
55
- impl Backend for U64x2 {
56
- fn clmul ( self , other : Self , imm : u8 ) -> Self {
57
- let ( a, b) = match imm. into ( ) {
58
- 0x00 => ( self . 0 , other. 0 ) ,
59
- 0x01 => ( self . 1 , other. 0 ) ,
60
- 0x10 => ( self . 0 , other. 1 ) ,
61
- 0x11 => ( self . 1 , other. 1 ) ,
62
- _ => unreachable ! ( ) ,
63
- } ;
64
-
65
- let mut result = U64x2 ( 0 , 0 ) ;
66
-
67
- for i in 0 ..64 {
68
- if b & ( 1 << i) != 0 {
69
- result. 1 ^= a;
70
- }
71
-
72
- result. 0 >>= 1 ;
73
-
74
- if result. 1 & 1 != 0 {
75
- result. 0 ^= 1 << 63 ;
76
- }
77
-
78
- result. 1 >>= 1 ;
79
- }
80
-
81
- result
82
- }
53
+ #[ allow( clippy:: suspicious_arithmetic_impl) ]
54
+ impl Mul for U64x2 {
55
+ type Output = Self ;
83
56
84
- fn shuffle ( self ) -> Self {
85
- U64x2 ( self . 1 , self . 0 )
57
+ /// Computes carryless POLYVAL multiplication over GF(2^128) in constant time.
58
+ ///
59
+ /// Method described at:
60
+ /// <https://www.bearssl.org/constanttime.html#ghash-for-gcm>
61
+ ///
62
+ /// POLYVAL multiplication is effectively the little endian equivalent of
63
+ /// GHASH multiplication, aside from one small detail described here:
64
+ ///
65
+ /// <https://crypto.stackexchange.com/questions/66448/how-does-bearssls-gcm-modular-reduction-work/66462#66462>
66
+ ///
67
+ /// > The product of two bit-reversed 128-bit polynomials yields the
68
+ /// > bit-reversed result over 255 bits, not 256. The BearSSL code ends up
69
+ /// > with a 256-bit result in zw[], and that value is shifted by one bit,
70
+ /// > because of that reversed convention issue. Thus, the code must
71
+ /// > include a shifting step to put it back where it should
72
+ ///
73
+ /// This shift is unnecessary for POLYVAL and has been removed.
74
+ fn mul ( self , rhs : Self ) -> Self {
75
+ let h0 = self . 0 ;
76
+ let h1 = self . 1 ;
77
+ let h0r = rev64 ( h0) ;
78
+ let h1r = rev64 ( h1) ;
79
+ let h2 = h0 ^ h1;
80
+ let h2r = h0r ^ h1r;
81
+
82
+ let y0 = rhs. 0 ;
83
+ let y1 = rhs. 1 ;
84
+ let y0r = rev64 ( y0) ;
85
+ let y1r = rev64 ( y1) ;
86
+ let y2 = y0 ^ y1;
87
+ let y2r = y0r ^ y1r;
88
+ let z0 = bmul64 ( y0, h0) ;
89
+ let z1 = bmul64 ( y1, h1) ;
90
+
91
+ let mut z2 = bmul64 ( y2, h2) ;
92
+ let mut z0h = bmul64 ( y0r, h0r) ;
93
+ let mut z1h = bmul64 ( y1r, h1r) ;
94
+ let mut z2h = bmul64 ( y2r, h2r) ;
95
+
96
+ z2 ^= z0 ^ z1;
97
+ z2h ^= z0h ^ z1h;
98
+ z0h = rev64 ( z0h) >> 1 ;
99
+ z1h = rev64 ( z1h) >> 1 ;
100
+ z2h = rev64 ( z2h) >> 1 ;
101
+
102
+ let v0 = z0;
103
+ let mut v1 = z0h ^ z2;
104
+ let mut v2 = z1 ^ z2h;
105
+ let mut v3 = z1h;
106
+
107
+ v2 ^= v0 ^ v0 >> 1 ^ v0 >> 2 ^ v0 >> 7 ;
108
+ v1 ^= v0 << 63 ^ v0 << 62 ^ v0 << 57 ;
109
+ v3 ^= v1 ^ v1 >> 1 ^ v1 >> 2 ^ v1 >> 7 ;
110
+ v2 ^= v1 << 63 ^ v1 << 62 ^ v1 << 57 ;
111
+
112
+ U64x2 ( v2, v3)
86
113
}
114
+ }
87
115
88
- fn shl64 ( self ) -> Self {
89
- U64x2 ( 0 , self . 0 )
90
- }
116
+ /// Reverse a `u64` in constant time
117
+ fn rev64 ( mut x : u64 ) -> u64 {
118
+ x = ( ( x & 0x5555_5555_5555_5555 ) << 1 ) | ( ( x >> 1 ) & 0x5555_5555_5555_5555 ) ;
119
+ x = ( ( x & 0x3333_3333_3333_3333 ) << 2 ) | ( ( x >> 2 ) & 0x3333_3333_3333_3333 ) ;
120
+ x = ( ( x & 0x0f0f_0f0f_0f0f_0f0f ) << 4 ) | ( ( x >> 4 ) & 0x0f0f_0f0f_0f0f_0f0f ) ;
121
+ x = ( ( x & 0x00ff_00ff_00ff_00ff ) << 8 ) | ( ( x >> 8 ) & 0x00ff_00ff_00ff_00ff ) ;
122
+ x = ( ( x & 0xffff_0000_ffff ) << 16 ) | ( ( x >> 16 ) & 0xffff_0000_ffff ) ;
123
+ ( x << 32 ) | ( x >> 32 )
124
+ }
91
125
92
- fn shr64 ( self ) -> Self {
93
- U64x2 ( self . 1 , 0 )
94
- }
126
+ /// Carryless integer multiplication with with “holes” (sequences of zeroes) to
127
+ /// avoid carry spilling. When carries do occur, they wind up in a "hole" and
128
+ /// are subsequently masked out of the result.
129
+ fn bmul64 ( x : u64 , y : u64 ) -> u64 {
130
+ let x0 = x & 0x1111_1111_1111_1111 ;
131
+ let x1 = x & 0x2222_2222_2222_2222 ;
132
+ let x2 = x & 0x4444_4444_4444_4444 ;
133
+ let x3 = x & 0x8888_8888_8888_8888 ;
134
+ let y0 = y & 0x1111_1111_1111_1111 ;
135
+ let y1 = y & 0x2222_2222_2222_2222 ;
136
+ let y2 = y & 0x4444_4444_4444_4444 ;
137
+ let y3 = y & 0x8888_8888_8888_8888 ;
138
+
139
+ let mut z0 =
140
+ x0. wrapping_mul ( y0) ^ x1. wrapping_mul ( y3) ^ x2. wrapping_mul ( y2) ^ x3. wrapping_mul ( y1) ;
141
+
142
+ let mut z1 =
143
+ x0. wrapping_mul ( y1) ^ x1. wrapping_mul ( y0) ^ x2. wrapping_mul ( y3) ^ x3. wrapping_mul ( y2) ;
144
+
145
+ let mut z2 =
146
+ x0. wrapping_mul ( y2) ^ x1. wrapping_mul ( y1) ^ x2. wrapping_mul ( y0) ^ x3. wrapping_mul ( y3) ;
147
+
148
+ let mut z3 =
149
+ x0. wrapping_mul ( y3) ^ x1. wrapping_mul ( y2) ^ x2. wrapping_mul ( y1) ^ x3. wrapping_mul ( y0) ;
150
+
151
+ z0 &= 0x1111_1111_1111_1111 ;
152
+ z1 &= 0x2222_2222_2222_2222 ;
153
+ z2 &= 0x4444_4444_4444_4444 ;
154
+ z3 &= 0x8888_8888_8888_8888 ;
155
+
156
+ z0 | z1 | z2 | z3
95
157
}
0 commit comments