@@ -46,7 +46,7 @@ impl UniversalHash for Polyval {
46
46
#[ inline]
47
47
fn update ( & mut self , x : & Block ) {
48
48
unsafe {
49
- mul ( self , x) ;
49
+ self . mul ( x) ;
50
50
}
51
51
}
52
52
@@ -63,66 +63,68 @@ impl UniversalHash for Polyval {
63
63
}
64
64
}
65
65
66
- #[ inline]
67
- #[ target_feature( enable = "pclmulqdq" ) ]
68
- #[ target_feature( enable = "sse4.1" ) ]
69
- unsafe fn mul ( polyval : & mut Polyval , x : & Block ) {
70
- let h = polyval. h ;
71
-
72
- // `_mm_loadu_si128` performs an unaligned load
73
- #[ allow( clippy:: cast_ptr_alignment) ]
74
- let x = _mm_loadu_si128 ( x. as_ptr ( ) as * const __m128i ) ;
75
- let y = _mm_xor_si128 ( polyval. y , x) ;
76
-
77
- let h0 = h;
78
- let h1 = _mm_shuffle_epi32 ( h, 0x0E ) ;
79
- let h2 = _mm_xor_si128 ( h0, h1) ;
80
- let y0 = y;
81
-
82
- // Multiply values partitioned to 64-bit parts
83
- let y1 = _mm_shuffle_epi32 ( y, 0x0E ) ;
84
- let y2 = _mm_xor_si128 ( y0, y1) ;
85
- let t0 = _mm_clmulepi64_si128 ( y0, h0, 0x00 ) ;
86
- let t1 = _mm_clmulepi64_si128 ( y, h, 0x11 ) ;
87
- let t2 = _mm_clmulepi64_si128 ( y2, h2, 0x00 ) ;
88
- let t2 = _mm_xor_si128 ( t2, _mm_xor_si128 ( t0, t1) ) ;
89
- let v0 = t0;
90
- let v1 = _mm_xor_si128 ( _mm_shuffle_epi32 ( t0, 0x0E ) , t2) ;
91
- let v2 = _mm_xor_si128 ( t1, _mm_shuffle_epi32 ( t2, 0x0E ) ) ;
92
- let v3 = _mm_shuffle_epi32 ( t1, 0x0E ) ;
93
-
94
- // Polynomial reduction
95
- let v2 = xor5 (
96
- v2,
97
- v0,
98
- _mm_srli_epi64 ( v0, 1 ) ,
99
- _mm_srli_epi64 ( v0, 2 ) ,
100
- _mm_srli_epi64 ( v0, 7 ) ,
101
- ) ;
102
-
103
- let v1 = xor4 (
104
- v1,
105
- _mm_slli_epi64 ( v0, 63 ) ,
106
- _mm_slli_epi64 ( v0, 62 ) ,
107
- _mm_slli_epi64 ( v0, 57 ) ,
108
- ) ;
109
-
110
- let v3 = xor5 (
111
- v3,
112
- v1,
113
- _mm_srli_epi64 ( v1, 1 ) ,
114
- _mm_srli_epi64 ( v1, 2 ) ,
115
- _mm_srli_epi64 ( v1, 7 ) ,
116
- ) ;
117
-
118
- let v2 = xor4 (
119
- v2,
120
- _mm_slli_epi64 ( v1, 63 ) ,
121
- _mm_slli_epi64 ( v1, 62 ) ,
122
- _mm_slli_epi64 ( v1, 57 ) ,
123
- ) ;
124
-
125
- polyval. y = _mm_unpacklo_epi64 ( v2, v3) ;
66
+ impl Polyval {
67
+ #[ inline]
68
+ #[ target_feature( enable = "pclmulqdq" ) ]
69
+ #[ target_feature( enable = "sse4.1" ) ]
70
+ unsafe fn mul ( & mut self , x : & Block ) {
71
+ let h = self . h ;
72
+
73
+ // `_mm_loadu_si128` performs an unaligned load
74
+ #[ allow( clippy:: cast_ptr_alignment) ]
75
+ let x = _mm_loadu_si128 ( x. as_ptr ( ) as * const __m128i ) ;
76
+ let y = _mm_xor_si128 ( self . y , x) ;
77
+
78
+ let h0 = h;
79
+ let h1 = _mm_shuffle_epi32 ( h, 0x0E ) ;
80
+ let h2 = _mm_xor_si128 ( h0, h1) ;
81
+ let y0 = y;
82
+
83
+ // Multiply values partitioned to 64-bit parts
84
+ let y1 = _mm_shuffle_epi32 ( y, 0x0E ) ;
85
+ let y2 = _mm_xor_si128 ( y0, y1) ;
86
+ let t0 = _mm_clmulepi64_si128 ( y0, h0, 0x00 ) ;
87
+ let t1 = _mm_clmulepi64_si128 ( y, h, 0x11 ) ;
88
+ let t2 = _mm_clmulepi64_si128 ( y2, h2, 0x00 ) ;
89
+ let t2 = _mm_xor_si128 ( t2, _mm_xor_si128 ( t0, t1) ) ;
90
+ let v0 = t0;
91
+ let v1 = _mm_xor_si128 ( _mm_shuffle_epi32 ( t0, 0x0E ) , t2) ;
92
+ let v2 = _mm_xor_si128 ( t1, _mm_shuffle_epi32 ( t2, 0x0E ) ) ;
93
+ let v3 = _mm_shuffle_epi32 ( t1, 0x0E ) ;
94
+
95
+ // Polynomial reduction
96
+ let v2 = xor5 (
97
+ v2,
98
+ v0,
99
+ _mm_srli_epi64 ( v0, 1 ) ,
100
+ _mm_srli_epi64 ( v0, 2 ) ,
101
+ _mm_srli_epi64 ( v0, 7 ) ,
102
+ ) ;
103
+
104
+ let v1 = xor4 (
105
+ v1,
106
+ _mm_slli_epi64 ( v0, 63 ) ,
107
+ _mm_slli_epi64 ( v0, 62 ) ,
108
+ _mm_slli_epi64 ( v0, 57 ) ,
109
+ ) ;
110
+
111
+ let v3 = xor5 (
112
+ v3,
113
+ v1,
114
+ _mm_srli_epi64 ( v1, 1 ) ,
115
+ _mm_srli_epi64 ( v1, 2 ) ,
116
+ _mm_srli_epi64 ( v1, 7 ) ,
117
+ ) ;
118
+
119
+ let v2 = xor4 (
120
+ v2,
121
+ _mm_slli_epi64 ( v1, 63 ) ,
122
+ _mm_slli_epi64 ( v1, 62 ) ,
123
+ _mm_slli_epi64 ( v1, 57 ) ,
124
+ ) ;
125
+
126
+ self . y = _mm_unpacklo_epi64 ( v2, v3) ;
127
+ }
126
128
}
127
129
128
130
#[ inline( always) ]
0 commit comments