Skip to content

Commit cd5508c

Browse files
authored
poly1305: AVX2 detection (#97)
* poly1305: AVX2 detection Automatically detects the availability of CLMUL based on CPUID, and falls back to the "soft" implementation if unavailable. * poly1305: apply #[target_feature(enable="avx2")] * Extract problematic lambdas into named functions * poly1305: impl Zeroize for autodetect::State
1 parent 0c84ace commit cd5508c

File tree

10 files changed

+385
-266
lines changed

10 files changed

+385
-266
lines changed

.github/workflows/poly1305.yml

+7
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ jobs:
3939
target: ${{ matrix.target }}
4040
override: true
4141
- run: cargo build --target ${{ matrix.target }} --release
42+
- run: cargo build --target ${{ matrix.target }} --release --features force-soft
4243

4344
# Tests for the portable software backend
4445
soft:
@@ -70,6 +71,8 @@ jobs:
7071
- run: ${{ matrix.deps }}
7172
- run: cargo check --target ${{ matrix.target }} --all-features
7273
- run: cargo test --target ${{ matrix.target }} --release
74+
- run: cargo test --target ${{ matrix.target }} --release --features force-soft
75+
- run: cargo test --target ${{ matrix.target }} --release --features std
7376
- run: cargo test --target ${{ matrix.target }} --release --all-features
7477

7578
# Tests for the AVX2 backend
@@ -104,6 +107,8 @@ jobs:
104107
- run: ${{ matrix.deps }}
105108
- run: cargo check --target ${{ matrix.target }} --all-features
106109
- run: cargo test --target ${{ matrix.target }} --release
110+
- run: cargo test --target ${{ matrix.target }} --release --features force-soft
111+
- run: cargo test --target ${{ matrix.target }} --release --features std
107112
- run: cargo test --target ${{ matrix.target }} --release --all-features
108113

109114
# Cross-compiled tests
@@ -135,4 +140,6 @@ jobs:
135140
override: true
136141
- run: cargo install cross
137142
- run: cross test --target ${{ matrix.target }} --release
143+
- run: cross test --target ${{ matrix.target }} --release --features force-soft
144+
- run: cross test --target ${{ matrix.target }} --release --features std
138145
- run: cross test --target ${{ matrix.target }} --release --all-features

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

poly1305/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@ edition = "2018"
1515
universal-hash = { version = "0.4", default-features = false }
1616
zeroize = { version = "1", optional = true, default-features = false }
1717

18+
[target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dependencies]
19+
cpuid-bool = "0.2"
20+
1821
[dev-dependencies]
1922
hex-literal = "0.2"
2023

2124
[features]
25+
force-soft = []
2226
std = ["universal-hash/std"]

poly1305/src/autodetect.rs

+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
//! Autodetection support for AVX2 CPU intrinsics on x86 CPUs, with fallback
2+
//! to the "soft" backend when it's unavailable.
3+
4+
use crate::{backend, Block, Key, Tag};
5+
6+
cpuid_bool::new!(avx2_cpuid, "avx2");
7+
8+
pub struct State {
9+
inner: Inner,
10+
token: avx2_cpuid::InitToken,
11+
}
12+
13+
union Inner {
14+
avx2: backend::avx2::State,
15+
soft: backend::soft::State,
16+
}
17+
18+
impl State {
19+
/// Initialize Poly1305 [`State`] with the given key
20+
#[inline]
21+
pub(crate) fn new(key: &Key) -> State {
22+
let (token, avx2_present) = avx2_cpuid::init_get();
23+
24+
let inner = if avx2_present {
25+
Inner {
26+
avx2: backend::avx2::State::new(key),
27+
}
28+
} else {
29+
Inner {
30+
soft: backend::soft::State::new(key),
31+
}
32+
};
33+
34+
Self { inner, token }
35+
}
36+
37+
/// Reset internal state
38+
#[inline]
39+
pub(crate) fn reset(&mut self) {
40+
if self.token.get() {
41+
unsafe { self.inner.avx2.reset() }
42+
} else {
43+
unsafe { self.inner.soft.reset() }
44+
}
45+
}
46+
47+
/// Compute a Poly1305 block
48+
#[inline]
49+
pub(crate) fn compute_block(&mut self, block: &Block, partial: bool) {
50+
if self.token.get() {
51+
unsafe { self.inner.avx2.compute_block(block, partial) }
52+
} else {
53+
unsafe { self.inner.soft.compute_block(block, partial) }
54+
}
55+
}
56+
57+
/// Finalize output producing a [`Tag`]
58+
#[inline]
59+
pub(crate) fn finalize(&mut self) -> Tag {
60+
if self.token.get() {
61+
unsafe { self.inner.avx2.finalize() }
62+
} else {
63+
unsafe { self.inner.soft.finalize() }
64+
}
65+
}
66+
}
67+
68+
impl Clone for State {
69+
fn clone(&self) -> Self {
70+
let inner = if self.token.get() {
71+
Inner {
72+
avx2: unsafe { self.inner.avx2 },
73+
}
74+
} else {
75+
Inner {
76+
soft: unsafe { self.inner.soft },
77+
}
78+
};
79+
80+
Self {
81+
inner,
82+
token: self.token,
83+
}
84+
}
85+
}
86+
87+
#[cfg(feature = "zeroize")]
88+
impl Drop for State {
89+
fn drop(&mut self) {
90+
use zeroize::Zeroize;
91+
const SIZE: usize = core::mem::size_of::<State>();
92+
93+
let inner_array = unsafe { &mut *(self as *mut State as *mut [u8; SIZE]) };
94+
95+
inner_slice.zeroize();
96+
}
97+
}

poly1305/src/backend.rs

+3-21
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,9 @@
1+
//! Poly1305 backends
2+
13
#[cfg(all(
24
any(target_arch = "x86", target_arch = "x86_64"),
3-
target_feature = "avx2"
5+
not(feature = "force-soft")
46
))]
57
pub(crate) mod avx2;
68

7-
#[cfg(any(
8-
not(all(
9-
any(target_arch = "x86", target_arch = "x86_64"),
10-
target_feature = "avx2"
11-
)),
12-
fuzzing,
13-
test,
14-
))]
159
pub(crate) mod soft;
16-
17-
#[cfg(all(
18-
any(target_arch = "x86", target_arch = "x86_64"),
19-
target_feature = "avx2",
20-
))]
21-
pub(crate) use avx2::State;
22-
23-
#[cfg(not(all(
24-
any(target_arch = "x86", target_arch = "x86_64"),
25-
target_feature = "avx2",
26-
)))]
27-
pub(crate) use soft::State;

poly1305/src/backend/avx2.rs

+10-6
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@ use crate::{Block, Key, Tag};
2323
mod helpers;
2424
use self::helpers::*;
2525

26-
#[derive(Clone)]
26+
#[derive(Copy, Clone)]
2727
struct Initialized {
2828
p: Aligned4x130,
2929
m: SpacedMultiplier4x130,
3030
r4: PrecomputedMultiplier,
3131
}
3232

33-
#[derive(Clone)]
33+
#[derive(Copy, Clone)]
3434
pub(crate) struct State {
3535
k: AdditionKey,
3636
r1: PrecomputedMultiplier,
@@ -42,10 +42,10 @@ pub(crate) struct State {
4242
}
4343

4444
impl State {
45-
/// Initialize Poly1305 state with the given key
45+
/// Initialize Poly1305 [`State`] with the given key
4646
pub(crate) fn new(key: &Key) -> Self {
4747
// Prepare addition key and polynomial key.
48-
let (k, r1) = prepare_keys(key);
48+
let (k, r1) = unsafe { prepare_keys(key) };
4949

5050
// Precompute R^2.
5151
let r2 = (r1 * r1).reduce();
@@ -67,7 +67,9 @@ impl State {
6767
self.num_cached_blocks = 0;
6868
}
6969

70-
pub(crate) fn compute_block(&mut self, block: &Block, partial: bool) {
70+
/// Compute a Poly1305 block
71+
#[target_feature(enable = "avx2")]
72+
pub(crate) unsafe fn compute_block(&mut self, block: &Block, partial: bool) {
7173
// We can cache a single partial block.
7274
if partial {
7375
assert!(self.partial_block.is_none());
@@ -99,7 +101,9 @@ impl State {
99101
}
100102
}
101103

102-
pub(crate) fn finalize(&mut self) -> Tag {
104+
/// Finalize output producing a [`Tag`]
105+
#[target_feature(enable = "avx2")]
106+
pub(crate) unsafe fn finalize(&mut self) -> Tag {
103107
assert!(self.num_cached_blocks < 4);
104108
let mut data = &self.cached_blocks[..];
105109

0 commit comments

Comments
 (0)