1
- //! Software emulation support for CLMUL hardware intrinsics.
2
- //!
3
- //! WARNING: Not constant time! Should be made constant-time or disabled by default.
4
-
5
- // TODO(tarcieri): performance-oriented constant-time implementation
6
- // See: <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c>
1
+ //! Constant-time software implementation of POLYVAL
2
+
3
+ // Adapted from BearSSL's `ghash_ctmul64.c`
4
+ // <https://bearssl.org/gitweb/?p=BearSSL;a=blob;f=src/hash/ghash_ctmul64.c;hb=4b6046412bf927d6424f20fc7ee495bb96dbd227>
5
+ //
6
+ // Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
7
+ //
8
+ // Permission is hereby granted, free of charge, to any person obtaining
9
+ // a copy of this software and associated documentation files (the
10
+ // "Software"), to deal in the Software without restriction, including
11
+ // without limitation the rights to use, copy, modify, merge, publish,
12
+ // distribute, sublicense, and/or sell copies of the Software, and to
13
+ // permit persons to whom the Software is furnished to do so, subject to
14
+ // the following conditions:
15
+ //
16
+ // The above copyright notice and this permission notice shall be
17
+ // included in all copies or substantial portions of the Software.
18
+ //
19
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
+ // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
+ // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
+ // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
23
+ // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
24
+ // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25
+ // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ // SOFTWARE.
27
+
28
+ #![ allow( missing_docs) ]
7
29
8
30
use super :: Backend ;
9
- use crate :: field:: {
10
- clmul:: { self , Clmul } ,
11
- Block ,
31
+ use crate :: field:: Block ;
32
+ use core:: {
33
+ convert:: TryInto ,
34
+ ops:: { Add , Mul } ,
12
35
} ;
13
- use core:: { convert:: TryInto , ops:: BitXor } ;
14
36
15
37
/// 2 x `u64` values emulating an XMM register
16
38
#[ derive( Copy , Clone , Debug , Eq , PartialEq ) ]
17
- pub struct U64x2 ( [ u64 ; 2 ] ) ;
39
+ pub struct U64x2 ( u64 , u64 ) ;
40
+
41
+ impl Backend for U64x2 { }
18
42
19
43
impl From < Block > for U64x2 {
20
44
fn from ( bytes : Block ) -> U64x2 {
21
- U64x2 ( [
45
+ U64x2 (
22
46
u64:: from_le_bytes ( bytes[ ..8 ] . try_into ( ) . unwrap ( ) ) ,
23
47
u64:: from_le_bytes ( bytes[ 8 ..] . try_into ( ) . unwrap ( ) ) ,
24
- ] )
48
+ )
25
49
}
26
50
}
27
51
@@ -36,66 +60,114 @@ impl From<u128> for U64x2 {
36
60
fn from ( x : u128 ) -> U64x2 {
37
61
let lo = ( x & 0xFFFF_FFFFF ) as u64 ;
38
62
let hi = ( x >> 64 ) as u64 ;
39
- U64x2 ( [ lo, hi] )
63
+ U64x2 ( lo, hi)
40
64
}
41
65
}
42
66
43
67
impl From < U64x2 > for u128 {
44
68
fn from ( u64x2 : U64x2 ) -> u128 {
45
- u128:: from ( u64x2. 0 [ 0 ] ) | ( u128:: from ( u64x2. 0 [ 1 ] ) << 64 )
69
+ u128:: from ( u64x2. 0 ) | ( u128:: from ( u64x2. 1 ) << 64 )
46
70
}
47
71
}
48
72
49
- impl BitXor for U64x2 {
73
+ #[ allow( clippy:: suspicious_arithmetic_impl) ]
74
+ impl Add for U64x2 {
50
75
type Output = Self ;
51
76
52
- fn bitxor ( self , rhs : Self ) -> Self :: Output {
53
- U64x2 ( [ self . 0 [ 0 ] ^ rhs. 0 [ 0 ] , self . 0 [ 1 ] ^ rhs. 0 [ 1 ] ] )
77
+ /// Adds two POLYVAL field elements.
78
+ fn add ( self , rhs : Self ) -> Self :: Output {
79
+ U64x2 ( self . 0 ^ rhs. 0 , self . 1 ^ rhs. 1 )
54
80
}
55
81
}
56
82
57
- impl Clmul for U64x2 {
58
- fn clmul < I > ( self , other : Self , imm : I ) -> Self
59
- where
60
- I : Into < clmul:: PseudoOp > ,
61
- {
62
- let ( a, b) = match imm. into ( ) {
63
- clmul:: PseudoOp :: PCLMULLQLQDQ => ( self . 0 [ 0 ] , other. 0 [ 0 ] ) ,
64
- clmul:: PseudoOp :: PCLMULHQLQDQ => ( self . 0 [ 1 ] , other. 0 [ 0 ] ) ,
65
- clmul:: PseudoOp :: PCLMULLQHQDQ => ( self . 0 [ 0 ] , other. 0 [ 1 ] ) ,
66
- clmul:: PseudoOp :: PCLMULHQHQDQ => ( self . 0 [ 1 ] , other. 0 [ 1 ] ) ,
67
- } ;
83
+ #[ allow( clippy:: suspicious_arithmetic_impl) ]
84
+ impl Mul for U64x2 {
85
+ type Output = Self ;
68
86
69
- let mut result = [ 0u64 ; 2 ] ;
87
+ /// Computes POLYVAL multiplication over GF(2^128).
88
+ // TODO(tarcieri): actually adapt the arithmetic below from GHASH
89
+ fn mul ( self , rhs : Self ) -> Self {
90
+ let h0 = self . 0 ;
91
+ let h1 = self . 1 ;
92
+ let h0r = rev64 ( h0) ;
93
+ let h1r = rev64 ( h1) ;
94
+ let h2 = h0 ^ h1;
95
+ let h2r = h0r ^ h1r;
96
+
97
+ let y0 = rhs. 0 ;
98
+ let y1 = rhs. 1 ;
99
+ let y0r = rev64 ( y0) ;
100
+ let y1r = rev64 ( y1) ;
101
+ let y2 = y0 ^ y1;
102
+ let y2r = y0r ^ y1r;
103
+ let z0 = bmul64 ( y0, h0) ;
104
+ let z1 = bmul64 ( y1, h1) ;
105
+
106
+ let mut z2 = bmul64 ( y2, h2) ;
107
+ let mut z0h = bmul64 ( y0r, h0r) ;
108
+ let mut z1h = bmul64 ( y1r, h1r) ;
109
+ let mut z2h = bmul64 ( y2r, h2r) ;
110
+
111
+ z2 ^= z0 ^ z1;
112
+ z2h ^= z0h ^ z1h;
113
+ z0h = rev64 ( z0h) >> 1 ;
114
+ z1h = rev64 ( z1h) >> 1 ;
115
+ z2h = rev64 ( z2h) >> 1 ;
116
+
117
+ let mut v0 = z0;
118
+ let mut v1 = z0h ^ z2;
119
+ let mut v2 = z1 ^ z2h;
120
+ let mut v3 = z1h;
121
+
122
+ v3 = v3 << 1 | v2 >> 63 ;
123
+ v2 = v2 << 1 | v1 >> 63 ;
124
+ v1 = v1 << 1 | v0 >> 63 ;
125
+ v0 <<= 1 ;
126
+
127
+ v2 ^= v0 ^ v0 >> 1 ^ v0 >> 2 ^ v0 >> 7 ;
128
+ v1 ^= v0 << 63 ^ v0 << 62 ^ v0 << 57 ;
129
+ v3 ^= v1 ^ v1 >> 1 ^ v1 >> 2 ^ v1 >> 7 ;
130
+ v2 ^= v1 << 63 ^ v1 << 62 ^ v1 << 57 ;
131
+
132
+ U64x2 ( v2, v3)
133
+ }
134
+ }
70
135
71
- for i in 0 ..64 {
72
- if b & ( 1 << i) != 0 {
73
- result[ 1 ] ^= a;
74
- }
136
+ fn rev64 ( mut x : u64 ) -> u64 {
137
+ x = ( ( x & 0x5555_5555_5555_5555 ) << 1 ) | ( ( x >> 1 ) & 0x5555_5555_5555_5555 ) ;
138
+ x = ( ( x & 0x3333_3333_3333_3333 ) << 2 ) | ( ( x >> 2 ) & 0x3333_3333_3333_3333 ) ;
139
+ x = ( ( x & 0x0f0f_0f0f_0f0f_0f0f ) << 4 ) | ( ( x >> 4 ) & 0x0f0f_0f0f_0f0f_0f0f ) ;
140
+ x = ( ( x & 0x00ff_00ff_00ff_00ff ) << 8 ) | ( ( x >> 8 ) & 0x00ff_00ff_00ff_00ff ) ;
141
+ x = ( ( x & 0xffff_0000_ffff ) << 16 ) | ( ( x >> 16 ) & 0xffff_0000_ffff ) ;
142
+ ( x << 32 ) | ( x >> 32 )
143
+ }
75
144
76
- result[ 0 ] >>= 1 ;
145
+ fn bmul64 ( x : u64 , y : u64 ) -> u64 {
146
+ let x0 = x & 0x1111_1111_1111_1111 ;
147
+ let x1 = x & 0x2222_2222_2222_2222 ;
148
+ let x2 = x & 0x4444_4444_4444_4444 ;
149
+ let x3 = x & 0x8888_8888_8888_8888 ;
150
+ let y0 = y & 0x1111_1111_1111_1111 ;
151
+ let y1 = y & 0x2222_2222_2222_2222 ;
152
+ let y2 = y & 0x4444_4444_4444_4444 ;
153
+ let y3 = y & 0x8888_8888_8888_8888 ;
77
154
78
- if result[ 1 ] & 1 != 0 {
79
- result[ 0 ] ^= 1 << 63 ;
80
- }
155
+ let mut z0 =
156
+ x0. wrapping_mul ( y0) ^ x1. wrapping_mul ( y3) ^ x2. wrapping_mul ( y2) ^ x3. wrapping_mul ( y1) ;
81
157
82
- result [ 1 ] >>= 1 ;
83
- }
158
+ let mut z1 =
159
+ x0 . wrapping_mul ( y1 ) ^ x1 . wrapping_mul ( y0 ) ^ x2 . wrapping_mul ( y3 ) ^ x3 . wrapping_mul ( y2 ) ;
84
160
85
- U64x2 ( result)
86
- }
87
- }
161
+ let mut z2 =
162
+ x0. wrapping_mul ( y2) ^ x1. wrapping_mul ( y1) ^ x2. wrapping_mul ( y0) ^ x3. wrapping_mul ( y3) ;
88
163
89
- impl Backend for U64x2 {
90
- fn shuffle ( self ) -> Self {
91
- U64x2 ( [ self . 0 [ 1 ] , self . 0 [ 0 ] ] )
92
- }
164
+ let mut z3 =
165
+ x0. wrapping_mul ( y3) ^ x1. wrapping_mul ( y2) ^ x2. wrapping_mul ( y1) ^ x3. wrapping_mul ( y0) ;
93
166
94
- fn shl64 ( self ) -> Self {
95
- U64x2 ( [ 0 , self . 0 [ 0 ] ] )
96
- }
167
+ z0 &= 0x1111_1111_1111_1111 ;
168
+ z1 &= 0x2222_2222_2222_2222 ;
169
+ z2 &= 0x4444_4444_4444_4444 ;
170
+ z3 &= 0x8888_8888_8888_8888 ;
97
171
98
- fn shr64 ( self ) -> Self {
99
- U64x2 ( [ self . 0 [ 1 ] , 0 ] )
100
- }
172
+ z0 | z1 | z2 | z3
101
173
}
0 commit comments