Skip to content

Commit c160d1c

Browse files
committed
poly1305: AVX2 detection
Automatically detects the availability of CLMUL based on CPUID, and falls back to the "soft" implementation if unavailable.
1 parent 0c84ace commit c160d1c

File tree

9 files changed

+132
-33
lines changed

9 files changed

+132
-33
lines changed

.github/workflows/poly1305.yml

+7
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ jobs:
3939
target: ${{ matrix.target }}
4040
override: true
4141
- run: cargo build --target ${{ matrix.target }} --release
42+
- run: cargo build --target ${{ matrix.target }} --release --features force-soft
4243

4344
# Tests for the portable software backend
4445
soft:
@@ -70,6 +71,8 @@ jobs:
7071
- run: ${{ matrix.deps }}
7172
- run: cargo check --target ${{ matrix.target }} --all-features
7273
- run: cargo test --target ${{ matrix.target }} --release
74+
- run: cargo test --target ${{ matrix.target }} --release --features force-soft
75+
- run: cargo test --target ${{ matrix.target }} --release --features std
7376
- run: cargo test --target ${{ matrix.target }} --release --all-features
7477

7578
# Tests for the AVX2 backend
@@ -104,6 +107,8 @@ jobs:
104107
- run: ${{ matrix.deps }}
105108
- run: cargo check --target ${{ matrix.target }} --all-features
106109
- run: cargo test --target ${{ matrix.target }} --release
110+
- run: cargo test --target ${{ matrix.target }} --release --features force-soft
111+
- run: cargo test --target ${{ matrix.target }} --release --features std
107112
- run: cargo test --target ${{ matrix.target }} --release --all-features
108113

109114
# Cross-compiled tests
@@ -135,4 +140,6 @@ jobs:
135140
override: true
136141
- run: cargo install cross
137142
- run: cross test --target ${{ matrix.target }} --release
143+
- run: cross test --target ${{ matrix.target }} --release --features force-soft
144+
- run: cross test --target ${{ matrix.target }} --release --features std
138145
- run: cross test --target ${{ matrix.target }} --release --all-features

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

poly1305/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@ edition = "2018"
1515
universal-hash = { version = "0.4", default-features = false }
1616
zeroize = { version = "1", optional = true, default-features = false }
1717

18+
[target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dependencies]
19+
cpuid-bool = "0.2"
20+
1821
[dev-dependencies]
1922
hex-literal = "0.2"
2023

2124
[features]
25+
force-soft = []
2226
std = ["universal-hash/std"]

poly1305/src/autodetect.rs

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
//! Autodetection support for AVX2 CPU intrinsics on x86 CPUs, with fallback
2+
//! to the "soft" backend when it's unavailable.
3+
4+
use crate::{backend, Block, Key, Tag};
5+
6+
cpuid_bool::new!(avx2_cpuid, "avx2");
7+
8+
pub struct State {
9+
inner: Inner,
10+
token: avx2_cpuid::InitToken,
11+
}
12+
13+
union Inner {
14+
avx2: backend::avx2::State,
15+
soft: backend::soft::State,
16+
}
17+
18+
impl State {
19+
/// Initialize Poly1305 [`State`] with the given key
20+
#[inline]
21+
pub(crate) fn new(key: &Key) -> State {
22+
let (token, avx2_present) = avx2_cpuid::init_get();
23+
24+
let inner = if avx2_present {
25+
Inner {
26+
avx2: backend::avx2::State::new(key),
27+
}
28+
} else {
29+
Inner {
30+
soft: backend::soft::State::new(key),
31+
}
32+
};
33+
34+
Self { inner, token }
35+
}
36+
37+
/// Reset internal state
38+
#[inline]
39+
pub(crate) fn reset(&mut self) {
40+
if self.token.get() {
41+
unsafe { self.inner.avx2.reset() }
42+
} else {
43+
unsafe { self.inner.soft.reset() }
44+
}
45+
}
46+
47+
/// Compute a Poly1305 block
48+
#[inline]
49+
pub(crate) fn compute_block(&mut self, block: &Block, partial: bool) {
50+
if self.token.get() {
51+
unsafe { self.inner.avx2.compute_block(block, partial) }
52+
} else {
53+
unsafe { self.inner.soft.compute_block(block, partial) }
54+
}
55+
}
56+
57+
/// Finalize output producing a [`Tag`]
58+
#[inline]
59+
pub(crate) fn finalize(&mut self) -> Tag {
60+
if self.token.get() {
61+
unsafe { self.inner.avx2.finalize() }
62+
} else {
63+
unsafe { self.inner.soft.finalize() }
64+
}
65+
}
66+
}
67+
68+
impl Clone for State {
69+
fn clone(&self) -> Self {
70+
let inner = if self.token.get() {
71+
Inner {
72+
avx2: unsafe { self.inner.avx2 },
73+
}
74+
} else {
75+
Inner {
76+
soft: unsafe { self.inner.soft },
77+
}
78+
};
79+
80+
Self {
81+
inner,
82+
token: self.token,
83+
}
84+
}
85+
}

poly1305/src/backend.rs

+3-21
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,9 @@
1+
//! Poly1305 backends
2+
13
#[cfg(all(
24
any(target_arch = "x86", target_arch = "x86_64"),
3-
target_feature = "avx2"
5+
not(feature = "force-soft")
46
))]
57
pub(crate) mod avx2;
68

7-
#[cfg(any(
8-
not(all(
9-
any(target_arch = "x86", target_arch = "x86_64"),
10-
target_feature = "avx2"
11-
)),
12-
fuzzing,
13-
test,
14-
))]
159
pub(crate) mod soft;
16-
17-
#[cfg(all(
18-
any(target_arch = "x86", target_arch = "x86_64"),
19-
target_feature = "avx2",
20-
))]
21-
pub(crate) use avx2::State;
22-
23-
#[cfg(not(all(
24-
any(target_arch = "x86", target_arch = "x86_64"),
25-
target_feature = "avx2",
26-
)))]
27-
pub(crate) use soft::State;

poly1305/src/backend/avx2.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@ use crate::{Block, Key, Tag};
2323
mod helpers;
2424
use self::helpers::*;
2525

26-
#[derive(Clone)]
26+
#[derive(Copy, Clone)]
2727
struct Initialized {
2828
p: Aligned4x130,
2929
m: SpacedMultiplier4x130,
3030
r4: PrecomputedMultiplier,
3131
}
3232

33-
#[derive(Clone)]
33+
#[derive(Copy, Clone)]
3434
pub(crate) struct State {
3535
k: AdditionKey,
3636
r1: PrecomputedMultiplier,
@@ -42,7 +42,7 @@ pub(crate) struct State {
4242
}
4343

4444
impl State {
45-
/// Initialize Poly1305 state with the given key
45+
/// Initialize Poly1305 [`State`] with the given key
4646
pub(crate) fn new(key: &Key) -> Self {
4747
// Prepare addition key and polynomial key.
4848
let (k, r1) = prepare_keys(key);
@@ -67,6 +67,7 @@ impl State {
6767
self.num_cached_blocks = 0;
6868
}
6969

70+
/// Compute a Poly1305 block
7071
pub(crate) fn compute_block(&mut self, block: &Block, partial: bool) {
7172
// We can cache a single partial block.
7273
if partial {
@@ -99,6 +100,7 @@ impl State {
99100
}
100101
}
101102

103+
/// Finalize output producing a [`Tag`]
102104
pub(crate) fn finalize(&mut self) -> Tag {
103105
assert!(self.num_cached_blocks < 4);
104106
let mut data = &self.cached_blocks[..];

poly1305/src/backend/avx2/helpers.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,7 @@ impl SpacedMultiplier4x130 {
890890
///
891891
/// Unlike `Aligned2x130` which wraps two `Aligned130`s, this struct represents the four
892892
/// integers as 20 limbs spread across three 256-bit vectors.
893-
#[derive(Clone, Debug)]
893+
#[derive(Copy, Clone, Debug)]
894894
pub(super) struct Aligned4x130 {
895895
v0: __m256i,
896896
v1: __m256i,
@@ -1116,7 +1116,7 @@ impl Mul<PrecomputedMultiplier> for &Aligned4x130 {
11161116
// x.v0 = [ x32, x30, x22, x20, x12, x10, x02, x00]
11171117
// y = [5·r_4, 5·r_3, 5·r_2, r_4, r_3, r_2, r_1, r_0]
11181118
// z = [5·r_1, 5·r_1, 5·r_1, 5·r_1, 5·r_1, 5·r_1, 5·r_1, 5·r_1]
1119-
let mut x = self.clone();
1119+
let mut x = *self;
11201120
let y = other.a;
11211121
let z = other.a_5;
11221122

poly1305/src/backend/soft.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ use zeroize::Zeroize;
1919

2020
use crate::{Block, Key, Tag};
2121

22-
#[derive(Clone, Default)]
22+
#[derive(Copy, Clone, Default)]
2323
pub(crate) struct State {
2424
r: [u32; 5],
2525
h: [u32; 5],
2626
pad: [u32; 4],
2727
}
2828

2929
impl State {
30-
/// Initialize Poly1305State with the given key
30+
/// Initialize Poly1305 [`State`] with the given key
3131
pub(crate) fn new(key: &Key) -> State {
3232
let mut poly = State::default();
3333

@@ -47,7 +47,6 @@ impl State {
4747
}
4848

4949
/// Reset internal state
50-
#[allow(dead_code)]
5150
pub(crate) fn reset(&mut self) {
5251
self.h = Default::default();
5352
}
@@ -144,6 +143,7 @@ impl State {
144143
self.h[4] = h4;
145144
}
146145

146+
/// Finalize output producing a [`Tag`]
147147
pub(crate) fn finalize(&mut self) -> Tag {
148148
// fully carry h
149149
let mut h0 = self.h[0];

poly1305/src/lib.rs

+22-4
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,33 @@ use universal_hash::{
7070
NewUniversalHash, UniversalHash,
7171
};
7272

73+
#[cfg(all(
74+
any(target_arch = "x86", target_arch = "x86_64"),
75+
not(feature = "force-soft")
76+
))]
77+
mod autodetect;
78+
7379
mod backend;
7480

7581
#[cfg(all(
7682
any(target_arch = "x86", target_arch = "x86_64"),
77-
target_feature = "avx2",
83+
not(feature = "force-soft"),
7884
any(fuzzing, test)
7985
))]
8086
mod fuzz;
8187

88+
#[cfg(all(
89+
any(target_arch = "x86", target_arch = "x86_64"),
90+
not(feature = "force-soft")
91+
))]
92+
use crate::autodetect::State;
93+
94+
#[cfg(not(all(
95+
any(target_arch = "x86", target_arch = "x86_64"),
96+
not(feature = "force-soft")
97+
)))]
98+
use crate::backend::soft::State;
99+
82100
/// Size of a Poly1305 key
83101
pub const KEY_SIZE: usize = 32;
84102

@@ -102,7 +120,7 @@ pub type Tag = universal_hash::Output<Poly1305>;
102120
/// For this reason it doesn't impl the `crypto_mac::Mac` trait.
103121
#[derive(Clone)]
104122
pub struct Poly1305 {
105-
state: backend::State,
123+
state: State,
106124
}
107125

108126
impl NewUniversalHash for Poly1305 {
@@ -111,7 +129,7 @@ impl NewUniversalHash for Poly1305 {
111129
/// Initialize Poly1305 with the given key
112130
fn new(key: &Key) -> Poly1305 {
113131
Poly1305 {
114-
state: backend::State::new(key),
132+
state: State::new(key),
115133
}
116134
}
117135
}
@@ -158,7 +176,7 @@ impl Poly1305 {
158176

159177
#[cfg(all(
160178
any(target_arch = "x86", target_arch = "x86_64"),
161-
target_feature = "avx2",
179+
not(feature = "force-soft"),
162180
any(fuzzing, test)
163181
))]
164182
pub use crate::fuzz::fuzz_avx2;

0 commit comments

Comments
 (0)