You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
938 lines
32 KiB
938 lines
32 KiB
5 years ago
|
From d24f26b1650156b1da4fced389559cee17805910 Mon Sep 17 00:00:00 2001
|
||
|
From: Artyom Pavlov <newpavlov@gmail.com>
|
||
|
Date: Wed, 10 Jun 2020 20:11:36 +0300
|
||
|
Subject: [PATCH] Update block-buffer to v0.9 (#164)
|
||
|
|
||
|
---
|
||
|
src/consts.rs | 119 ++++++++++++++++-----------------
|
||
|
src/lib.rs | 2 -
|
||
|
src/sha256.rs | 25 +++----
|
||
|
src/sha256_utils.rs | 155 ++++++++++++++++++++++++++-----------------
|
||
|
src/sha512.rs | 48 +++++++-------
|
||
|
src/sha512_utils.rs | 157 ++++++++++++++++++++++++--------------------
|
||
|
6 files changed, 274 insertions(+), 232 deletions(-)
|
||
|
|
||
|
diff --git a/src/consts.rs b/src/consts.rs
|
||
|
index ce87088..f126dc6 100644
|
||
|
--- a/src/consts.rs
|
||
|
+++ b/src/consts.rs
|
||
|
@@ -1,8 +1,5 @@
|
||
|
#![allow(dead_code, clippy::unreadable_literal)]
|
||
|
|
||
|
-use crate::simd::u32x4;
|
||
|
-use crate::simd::u64x2;
|
||
|
-
|
||
|
pub const STATE_LEN: usize = 8;
|
||
|
pub const BLOCK_LEN: usize = 16;
|
||
|
|
||
|
@@ -19,23 +16,23 @@ pub const K32: [u32; 64] = [
|
||
|
];
|
||
|
|
||
|
/// Constants necessary for SHA-256 family of digests.
|
||
|
-pub const K32X4: [u32x4; 16] = [
|
||
|
- u32x4(K32[3], K32[2], K32[1], K32[0]),
|
||
|
- u32x4(K32[7], K32[6], K32[5], K32[4]),
|
||
|
- u32x4(K32[11], K32[10], K32[9], K32[8]),
|
||
|
- u32x4(K32[15], K32[14], K32[13], K32[12]),
|
||
|
- u32x4(K32[19], K32[18], K32[17], K32[16]),
|
||
|
- u32x4(K32[23], K32[22], K32[21], K32[20]),
|
||
|
- u32x4(K32[27], K32[26], K32[25], K32[24]),
|
||
|
- u32x4(K32[31], K32[30], K32[29], K32[28]),
|
||
|
- u32x4(K32[35], K32[34], K32[33], K32[32]),
|
||
|
- u32x4(K32[39], K32[38], K32[37], K32[36]),
|
||
|
- u32x4(K32[43], K32[42], K32[41], K32[40]),
|
||
|
- u32x4(K32[47], K32[46], K32[45], K32[44]),
|
||
|
- u32x4(K32[51], K32[50], K32[49], K32[48]),
|
||
|
- u32x4(K32[55], K32[54], K32[53], K32[52]),
|
||
|
- u32x4(K32[59], K32[58], K32[57], K32[56]),
|
||
|
- u32x4(K32[63], K32[62], K32[61], K32[60]),
|
||
|
+pub const K32X4: [[u32; 4]; 16] = [
|
||
|
+ [K32[3], K32[2], K32[1], K32[0]],
|
||
|
+ [K32[7], K32[6], K32[5], K32[4]],
|
||
|
+ [K32[11], K32[10], K32[9], K32[8]],
|
||
|
+ [K32[15], K32[14], K32[13], K32[12]],
|
||
|
+ [K32[19], K32[18], K32[17], K32[16]],
|
||
|
+ [K32[23], K32[22], K32[21], K32[20]],
|
||
|
+ [K32[27], K32[26], K32[25], K32[24]],
|
||
|
+ [K32[31], K32[30], K32[29], K32[28]],
|
||
|
+ [K32[35], K32[34], K32[33], K32[32]],
|
||
|
+ [K32[39], K32[38], K32[37], K32[36]],
|
||
|
+ [K32[43], K32[42], K32[41], K32[40]],
|
||
|
+ [K32[47], K32[46], K32[45], K32[44]],
|
||
|
+ [K32[51], K32[50], K32[49], K32[48]],
|
||
|
+ [K32[55], K32[54], K32[53], K32[52]],
|
||
|
+ [K32[59], K32[58], K32[57], K32[56]],
|
||
|
+ [K32[63], K32[62], K32[61], K32[60]],
|
||
|
];
|
||
|
|
||
|
/// Constants necessary for SHA-512 family of digests.
|
||
|
@@ -123,47 +120,47 @@ pub const K64: [u64; 80] = [
|
||
|
];
|
||
|
|
||
|
/// Constants necessary for SHA-512 family of digests.
|
||
|
-pub const K64X2: [u64x2; 40] = [
|
||
|
- u64x2(K64[1], K64[0]),
|
||
|
- u64x2(K64[3], K64[2]),
|
||
|
- u64x2(K64[5], K64[4]),
|
||
|
- u64x2(K64[7], K64[6]),
|
||
|
- u64x2(K64[9], K64[8]),
|
||
|
- u64x2(K64[11], K64[10]),
|
||
|
- u64x2(K64[13], K64[12]),
|
||
|
- u64x2(K64[15], K64[14]),
|
||
|
- u64x2(K64[17], K64[16]),
|
||
|
- u64x2(K64[19], K64[18]),
|
||
|
- u64x2(K64[21], K64[20]),
|
||
|
- u64x2(K64[23], K64[22]),
|
||
|
- u64x2(K64[25], K64[24]),
|
||
|
- u64x2(K64[27], K64[26]),
|
||
|
- u64x2(K64[29], K64[28]),
|
||
|
- u64x2(K64[31], K64[30]),
|
||
|
- u64x2(K64[33], K64[32]),
|
||
|
- u64x2(K64[35], K64[34]),
|
||
|
- u64x2(K64[37], K64[36]),
|
||
|
- u64x2(K64[39], K64[38]),
|
||
|
- u64x2(K64[41], K64[40]),
|
||
|
- u64x2(K64[43], K64[42]),
|
||
|
- u64x2(K64[45], K64[44]),
|
||
|
- u64x2(K64[47], K64[46]),
|
||
|
- u64x2(K64[49], K64[48]),
|
||
|
- u64x2(K64[51], K64[50]),
|
||
|
- u64x2(K64[53], K64[52]),
|
||
|
- u64x2(K64[55], K64[54]),
|
||
|
- u64x2(K64[57], K64[56]),
|
||
|
- u64x2(K64[59], K64[58]),
|
||
|
- u64x2(K64[61], K64[60]),
|
||
|
- u64x2(K64[63], K64[62]),
|
||
|
- u64x2(K64[65], K64[64]),
|
||
|
- u64x2(K64[67], K64[66]),
|
||
|
- u64x2(K64[69], K64[68]),
|
||
|
- u64x2(K64[71], K64[70]),
|
||
|
- u64x2(K64[73], K64[72]),
|
||
|
- u64x2(K64[75], K64[74]),
|
||
|
- u64x2(K64[77], K64[76]),
|
||
|
- u64x2(K64[79], K64[78]),
|
||
|
+pub const K64X2: [[u64; 2]; 40] = [
|
||
|
+ [K64[1], K64[0]],
|
||
|
+ [K64[3], K64[2]],
|
||
|
+ [K64[5], K64[4]],
|
||
|
+ [K64[7], K64[6]],
|
||
|
+ [K64[9], K64[8]],
|
||
|
+ [K64[11], K64[10]],
|
||
|
+ [K64[13], K64[12]],
|
||
|
+ [K64[15], K64[14]],
|
||
|
+ [K64[17], K64[16]],
|
||
|
+ [K64[19], K64[18]],
|
||
|
+ [K64[21], K64[20]],
|
||
|
+ [K64[23], K64[22]],
|
||
|
+ [K64[25], K64[24]],
|
||
|
+ [K64[27], K64[26]],
|
||
|
+ [K64[29], K64[28]],
|
||
|
+ [K64[31], K64[30]],
|
||
|
+ [K64[33], K64[32]],
|
||
|
+ [K64[35], K64[34]],
|
||
|
+ [K64[37], K64[36]],
|
||
|
+ [K64[39], K64[38]],
|
||
|
+ [K64[41], K64[40]],
|
||
|
+ [K64[43], K64[42]],
|
||
|
+ [K64[45], K64[44]],
|
||
|
+ [K64[47], K64[46]],
|
||
|
+ [K64[49], K64[48]],
|
||
|
+ [K64[51], K64[50]],
|
||
|
+ [K64[53], K64[52]],
|
||
|
+ [K64[55], K64[54]],
|
||
|
+ [K64[57], K64[56]],
|
||
|
+ [K64[59], K64[58]],
|
||
|
+ [K64[61], K64[60]],
|
||
|
+ [K64[63], K64[62]],
|
||
|
+ [K64[65], K64[64]],
|
||
|
+ [K64[67], K64[66]],
|
||
|
+ [K64[69], K64[68]],
|
||
|
+ [K64[71], K64[70]],
|
||
|
+ [K64[73], K64[72]],
|
||
|
+ [K64[75], K64[74]],
|
||
|
+ [K64[77], K64[76]],
|
||
|
+ [K64[79], K64[78]],
|
||
|
];
|
||
|
|
||
|
pub static H224: [u32; STATE_LEN] = [
|
||
|
diff --git a/src/lib.rs b/src/lib.rs
|
||
|
index 814ba60..c87c064 100644
|
||
|
--- a/src/lib.rs
|
||
|
+++ b/src/lib.rs
|
||
|
@@ -103,5 +103,3 @@ pub use digest::{self, Digest};
|
||
|
pub use sha256_utils::compress256;
|
||
|
#[cfg(feature = "compress")]
|
||
|
pub use sha512_utils::compress512;
|
||
|
-
|
||
|
-use fake_simd as simd;
|
||
|
diff --git a/src/sha256.rs b/src/sha256.rs
|
||
|
index ddc7433..c30671b 100644
|
||
|
--- a/src/sha256.rs
|
||
|
+++ b/src/sha256.rs
|
||
|
@@ -1,10 +1,7 @@
|
||
|
//! SHA-256
|
||
|
|
||
|
use crate::consts::{H224, H256, STATE_LEN};
|
||
|
-use block_buffer::{
|
||
|
- byteorder::{ByteOrder, BE},
|
||
|
- BlockBuffer,
|
||
|
-};
|
||
|
+use block_buffer::BlockBuffer;
|
||
|
use digest::impl_write;
|
||
|
use digest::{
|
||
|
consts::{U28, U32, U64},
|
||
|
@@ -74,16 +71,14 @@ impl Engine256 {
|
||
|
fn update(&mut self, input: &[u8]) {
|
||
|
// Assumes that input.len() can be converted to u64 without overflow
|
||
|
self.len += (input.len() as u64) << 3;
|
||
|
- let self_state = &mut self.state;
|
||
|
- self.buffer
|
||
|
- .input(input, |input| self_state.process_block(input));
|
||
|
+ let s = &mut self.state;
|
||
|
+ self.buffer.input_block(input, |b| s.process_block(b));
|
||
|
}
|
||
|
|
||
|
fn finish(&mut self) {
|
||
|
- let self_state = &mut self.state;
|
||
|
+ let s = &mut self.state;
|
||
|
let l = self.len;
|
||
|
- self.buffer
|
||
|
- .len64_padding::<BE, _>(l, |b| self_state.process_block(b));
|
||
|
+ self.buffer.len64_padding_be(l, |b| s.process_block(b));
|
||
|
}
|
||
|
|
||
|
fn reset(&mut self, h: &[u32; STATE_LEN]) {
|
||
|
@@ -122,7 +117,10 @@ impl FixedOutputDirty for Sha256 {
|
||
|
|
||
|
fn finalize_into_dirty(&mut self, out: &mut digest::Output<Self>) {
|
||
|
self.engine.finish();
|
||
|
- BE::write_u32_into(&self.engine.state.h, out.as_mut_slice());
|
||
|
+ let h = self.engine.state.h;
|
||
|
+ for (chunk, v) in out.chunks_exact_mut(4).zip(h.iter()) {
|
||
|
+ chunk.copy_from_slice(&v.to_be_bytes());
|
||
|
+ }
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@@ -162,7 +160,10 @@ impl FixedOutputDirty for Sha224 {
|
||
|
|
||
|
fn finalize_into_dirty(&mut self, out: &mut digest::Output<Self>) {
|
||
|
self.engine.finish();
|
||
|
- BE::write_u32_into(&self.engine.state.h[..7], out.as_mut_slice());
|
||
|
+ let h = &self.engine.state.h[..7];
|
||
|
+ for (chunk, v) in out.chunks_exact_mut(4).zip(h.iter()) {
|
||
|
+ chunk.copy_from_slice(&v.to_be_bytes());
|
||
|
+ }
|
||
|
}
|
||
|
}
|
||
|
|
||
|
diff --git a/src/sha256_utils.rs b/src/sha256_utils.rs
|
||
|
index ec8109c..7d2ec9f 100644
|
||
|
--- a/src/sha256_utils.rs
|
||
|
+++ b/src/sha256_utils.rs
|
||
|
@@ -1,64 +1,93 @@
|
||
|
#![allow(clippy::many_single_char_names)]
|
||
|
-
|
||
|
use crate::consts::{BLOCK_LEN, K32X4};
|
||
|
-use crate::simd::u32x4;
|
||
|
-use block_buffer::byteorder::{ByteOrder, BE};
|
||
|
+use core::convert::TryInto;
|
||
|
+
|
||
|
+#[inline(always)]
|
||
|
+fn shl(v: [u32; 4], o: u32) -> [u32; 4] {
|
||
|
+ [v[0] >> o, v[1] >> o, v[2] >> o, v[3] >> o]
|
||
|
+}
|
||
|
+
|
||
|
+#[inline(always)]
|
||
|
+fn shr(v: [u32; 4], o: u32) -> [u32; 4] {
|
||
|
+ [v[0] << o, v[1] << o, v[2] << o, v[3] << o]
|
||
|
+}
|
||
|
+
|
||
|
+#[inline(always)]
|
||
|
+fn or(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
|
||
|
+ [a[0] | b[0], a[1] | b[1], a[2] | b[2], a[3] | b[3]]
|
||
|
+}
|
||
|
+
|
||
|
+#[inline(always)]
|
||
|
+fn xor(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
|
||
|
+ [a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]]
|
||
|
+}
|
||
|
+
|
||
|
+#[inline(always)]
|
||
|
+fn add(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
|
||
|
+ [
|
||
|
+ a[0].wrapping_add(b[0]),
|
||
|
+ a[1].wrapping_add(b[1]),
|
||
|
+ a[2].wrapping_add(b[2]),
|
||
|
+ a[3].wrapping_add(b[3]),
|
||
|
+ ]
|
||
|
+}
|
||
|
|
||
|
/// Not an intrinsic, but works like an unaligned load.
|
||
|
#[inline]
|
||
|
-fn sha256load(v2: u32x4, v3: u32x4) -> u32x4 {
|
||
|
- u32x4(v3.3, v2.0, v2.1, v2.2)
|
||
|
+fn sha256load(v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
|
||
|
+ [v3[3], v2[0], v2[1], v2[2]]
|
||
|
}
|
||
|
|
||
|
/// Not an intrinsic, but useful for swapping vectors.
|
||
|
#[inline]
|
||
|
-fn sha256swap(v0: u32x4) -> u32x4 {
|
||
|
- u32x4(v0.2, v0.3, v0.0, v0.1)
|
||
|
+fn sha256swap(v0: [u32; 4]) -> [u32; 4] {
|
||
|
+ [v0[2], v0[3], v0[0], v0[1]]
|
||
|
}
|
||
|
|
||
|
/// Emulates `llvm.x86.sha256msg1` intrinsic.
|
||
|
// #[inline]
|
||
|
-fn sha256msg1(v0: u32x4, v1: u32x4) -> u32x4 {
|
||
|
+fn sha256msg1(v0: [u32; 4], v1: [u32; 4]) -> [u32; 4] {
|
||
|
// sigma 0 on vectors
|
||
|
#[inline]
|
||
|
- fn sigma0x4(x: u32x4) -> u32x4 {
|
||
|
- ((x >> u32x4(7, 7, 7, 7)) | (x << u32x4(25, 25, 25, 25)))
|
||
|
- ^ ((x >> u32x4(18, 18, 18, 18)) | (x << u32x4(14, 14, 14, 14)))
|
||
|
- ^ (x >> u32x4(3, 3, 3, 3))
|
||
|
+ fn sigma0x4(x: [u32; 4]) -> [u32; 4] {
|
||
|
+ let t1 = or(shl(x, 7), shr(x, 25));
|
||
|
+ let t2 = or(shl(x, 18), shr(x, 14));
|
||
|
+ let t3 = shl(x, 3);
|
||
|
+ xor(xor(t1, t2), t3)
|
||
|
}
|
||
|
|
||
|
- v0 + sigma0x4(sha256load(v0, v1))
|
||
|
+ add(v0, sigma0x4(sha256load(v0, v1)))
|
||
|
}
|
||
|
|
||
|
/// Emulates `llvm.x86.sha256msg2` intrinsic.
|
||
|
// #[inline]
|
||
|
-fn sha256msg2(v4: u32x4, v3: u32x4) -> u32x4 {
|
||
|
+fn sha256msg2(v4: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
|
||
|
macro_rules! sigma1 {
|
||
|
($a:expr) => {
|
||
|
$a.rotate_right(17) ^ $a.rotate_right(19) ^ ($a >> 10)
|
||
|
};
|
||
|
}
|
||
|
|
||
|
- let u32x4(x3, x2, x1, x0) = v4;
|
||
|
- let u32x4(w15, w14, _, _) = v3;
|
||
|
+ let [x3, x2, x1, x0] = v4;
|
||
|
+ let [w15, w14, _, _] = v3;
|
||
|
|
||
|
let w16 = x0.wrapping_add(sigma1!(w14));
|
||
|
let w17 = x1.wrapping_add(sigma1!(w15));
|
||
|
let w18 = x2.wrapping_add(sigma1!(w16));
|
||
|
let w19 = x3.wrapping_add(sigma1!(w17));
|
||
|
|
||
|
- u32x4(w19, w18, w17, w16)
|
||
|
+ [w19, w18, w17, w16]
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
/// Performs 4 rounds of the SHA-256 message schedule update.
|
||
|
-fn sha256_schedule_x4(v0: u32x4, v1: u32x4, v2: u32x4, v3: u32x4) -> u32x4 {
|
||
|
+fn sha256_schedule_x4(v0: [u32; 4], v1: [u32; 4], v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
|
||
|
sha256msg2(sha256msg1(v0, v1) + sha256load(v2, v3), v3)
|
||
|
}*/
|
||
|
|
||
|
/// Emulates `llvm.x86.sha256rnds2` intrinsic.
|
||
|
// #[inline]
|
||
|
-fn sha256_digest_round_x2(cdgh: u32x4, abef: u32x4, wk: u32x4) -> u32x4 {
|
||
|
+fn sha256_digest_round_x2(cdgh: [u32; 4], abef: [u32; 4], wk: [u32; 4]) -> [u32; 4] {
|
||
|
macro_rules! big_sigma0 {
|
||
|
($a:expr) => {
|
||
|
($a.rotate_right(2) ^ $a.rotate_right(13) ^ $a.rotate_right(22))
|
||
|
@@ -80,9 +109,9 @@ fn sha256_digest_round_x2(cdgh: u32x4, abef: u32x4, wk: u32x4) -> u32x4 {
|
||
|
};
|
||
|
} // Majority, SHA1M
|
||
|
|
||
|
- let u32x4(_, _, wk1, wk0) = wk;
|
||
|
- let u32x4(a0, b0, e0, f0) = abef;
|
||
|
- let u32x4(c0, d0, g0, h0) = cdgh;
|
||
|
+ let [_, _, wk1, wk0] = wk;
|
||
|
+ let [a0, b0, e0, f0] = abef;
|
||
|
+ let [c0, d0, g0, h0] = cdgh;
|
||
|
|
||
|
// a round
|
||
|
let x0 = big_sigma1!(e0)
|
||
|
@@ -118,7 +147,7 @@ fn sha256_digest_round_x2(cdgh: u32x4, abef: u32x4, wk: u32x4) -> u32x4 {
|
||
|
g1,
|
||
|
);
|
||
|
|
||
|
- u32x4(a2, b2, e2, f2)
|
||
|
+ [a2, b2, e2, f2]
|
||
|
}
|
||
|
|
||
|
/// Process a block with the SHA-256 algorithm.
|
||
|
@@ -127,7 +156,7 @@ fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
|
||
|
|
||
|
macro_rules! schedule {
|
||
|
($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {
|
||
|
- sha256msg2(sha256msg1($v0, $v1) + sha256load($v2, $v3), $v3)
|
||
|
+ sha256msg2(add(sha256msg1($v0, $v1), sha256load($v2, $v3)), $v3)
|
||
|
};
|
||
|
}
|
||
|
|
||
|
@@ -138,45 +167,45 @@ fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
|
||
|
}};
|
||
|
}
|
||
|
|
||
|
- let mut abef = u32x4(state[0], state[1], state[4], state[5]);
|
||
|
- let mut cdgh = u32x4(state[2], state[3], state[6], state[7]);
|
||
|
+ let mut abef = [state[0], state[1], state[4], state[5]];
|
||
|
+ let mut cdgh = [state[2], state[3], state[6], state[7]];
|
||
|
|
||
|
// Rounds 0..64
|
||
|
- let mut w0 = u32x4(block[3], block[2], block[1], block[0]);
|
||
|
- rounds4!(abef, cdgh, k[0] + w0);
|
||
|
- let mut w1 = u32x4(block[7], block[6], block[5], block[4]);
|
||
|
- rounds4!(abef, cdgh, k[1] + w1);
|
||
|
- let mut w2 = u32x4(block[11], block[10], block[9], block[8]);
|
||
|
- rounds4!(abef, cdgh, k[2] + w2);
|
||
|
- let mut w3 = u32x4(block[15], block[14], block[13], block[12]);
|
||
|
- rounds4!(abef, cdgh, k[3] + w3);
|
||
|
+ let mut w0 = [block[3], block[2], block[1], block[0]];
|
||
|
+ rounds4!(abef, cdgh, add(k[0], w0));
|
||
|
+ let mut w1 = [block[7], block[6], block[5], block[4]];
|
||
|
+ rounds4!(abef, cdgh, add(k[1], w1));
|
||
|
+ let mut w2 = [block[11], block[10], block[9], block[8]];
|
||
|
+ rounds4!(abef, cdgh, add(k[2], w2));
|
||
|
+ let mut w3 = [block[15], block[14], block[13], block[12]];
|
||
|
+ rounds4!(abef, cdgh, add(k[3], w3));
|
||
|
let mut w4 = schedule!(w0, w1, w2, w3);
|
||
|
- rounds4!(abef, cdgh, k[4] + w4);
|
||
|
+ rounds4!(abef, cdgh, add(k[4], w4));
|
||
|
w0 = schedule!(w1, w2, w3, w4);
|
||
|
- rounds4!(abef, cdgh, k[5] + w0);
|
||
|
+ rounds4!(abef, cdgh, add(k[5], w0));
|
||
|
w1 = schedule!(w2, w3, w4, w0);
|
||
|
- rounds4!(abef, cdgh, k[6] + w1);
|
||
|
+ rounds4!(abef, cdgh, add(k[6], w1));
|
||
|
w2 = schedule!(w3, w4, w0, w1);
|
||
|
- rounds4!(abef, cdgh, k[7] + w2);
|
||
|
+ rounds4!(abef, cdgh, add(k[7], w2));
|
||
|
w3 = schedule!(w4, w0, w1, w2);
|
||
|
- rounds4!(abef, cdgh, k[8] + w3);
|
||
|
+ rounds4!(abef, cdgh, add(k[8], w3));
|
||
|
w4 = schedule!(w0, w1, w2, w3);
|
||
|
- rounds4!(abef, cdgh, k[9] + w4);
|
||
|
+ rounds4!(abef, cdgh, add(k[9], w4));
|
||
|
w0 = schedule!(w1, w2, w3, w4);
|
||
|
- rounds4!(abef, cdgh, k[10] + w0);
|
||
|
+ rounds4!(abef, cdgh, add(k[10], w0));
|
||
|
w1 = schedule!(w2, w3, w4, w0);
|
||
|
- rounds4!(abef, cdgh, k[11] + w1);
|
||
|
+ rounds4!(abef, cdgh, add(k[11], w1));
|
||
|
w2 = schedule!(w3, w4, w0, w1);
|
||
|
- rounds4!(abef, cdgh, k[12] + w2);
|
||
|
+ rounds4!(abef, cdgh, add(k[12], w2));
|
||
|
w3 = schedule!(w4, w0, w1, w2);
|
||
|
- rounds4!(abef, cdgh, k[13] + w3);
|
||
|
+ rounds4!(abef, cdgh, add(k[13], w3));
|
||
|
w4 = schedule!(w0, w1, w2, w3);
|
||
|
- rounds4!(abef, cdgh, k[14] + w4);
|
||
|
+ rounds4!(abef, cdgh, add(k[14], w4));
|
||
|
w0 = schedule!(w1, w2, w3, w4);
|
||
|
- rounds4!(abef, cdgh, k[15] + w0);
|
||
|
+ rounds4!(abef, cdgh, add(k[15], w0));
|
||
|
|
||
|
- let u32x4(a, b, e, f) = abef;
|
||
|
- let u32x4(c, d, g, h) = cdgh;
|
||
|
+ let [a, b, e, f] = abef;
|
||
|
+ let [c, d, g, h] = cdgh;
|
||
|
|
||
|
state[0] = state[0].wrapping_add(a);
|
||
|
state[1] = state[1].wrapping_add(b);
|
||
|
@@ -204,23 +233,23 @@ fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
|
||
|
/// calculated as:
|
||
|
///
|
||
|
/// ```ignore
|
||
|
-/// use std::simd::u32x4;
|
||
|
+/// use std::simd::[u32; 4];
|
||
|
/// use self::crypto::sha2::{
|
||
|
/// sha256msg1,
|
||
|
/// sha256msg2,
|
||
|
/// sha256load
|
||
|
/// };
|
||
|
///
|
||
|
-/// fn schedule4_data(work: &mut [u32x4], w: &[u32]) {
|
||
|
+/// fn schedule4_data(work: &mut [[u32; 4]], w: &[u32]) {
|
||
|
///
|
||
|
/// // this is to illustrate the data order
|
||
|
-/// work[0] = u32x4(w[3], w[2], w[1], w[0]);
|
||
|
-/// work[1] = u32x4(w[7], w[6], w[5], w[4]);
|
||
|
-/// work[2] = u32x4(w[11], w[10], w[9], w[8]);
|
||
|
-/// work[3] = u32x4(w[15], w[14], w[13], w[12]);
|
||
|
+/// work[0] = [w[3], w[2], w[1], w[0]);
|
||
|
+/// work[1] = [w[7], w[6], w[5], w[4]);
|
||
|
+/// work[2] = [w[11], w[10], w[9], w[8]);
|
||
|
+/// work[3] = [w[15], w[14], w[13], w[12]);
|
||
|
/// }
|
||
|
///
|
||
|
-/// fn schedule4_work(work: &mut [u32x4], t: usize) {
|
||
|
+/// fn schedule4_work(work: &mut [[u32; 4]], t: usize) {
|
||
|
///
|
||
|
/// // this is the core expression
|
||
|
/// work[t] = sha256msg2(sha256msg1(work[t - 4], work[t - 3]) +
|
||
|
@@ -240,26 +269,26 @@ fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
|
||
|
/// and the digest-related instructions allow 4 rounds to be calculated as:
|
||
|
///
|
||
|
/// ```ignore
|
||
|
-/// use std::simd::u32x4;
|
||
|
+/// use std::simd::[u32; 4];
|
||
|
/// use self::crypto::sha2::{K32X4,
|
||
|
/// sha256rnds2,
|
||
|
/// sha256swap
|
||
|
/// };
|
||
|
///
|
||
|
-/// fn rounds4(state: &mut [u32; 8], work: &mut [u32x4], t: usize) {
|
||
|
+/// fn rounds4(state: &mut [u32; 8], work: &mut [[u32; 4]], t: usize) {
|
||
|
/// let [a, b, c, d, e, f, g, h]: [u32; 8] = *state;
|
||
|
///
|
||
|
/// // this is to illustrate the data order
|
||
|
-/// let mut abef = u32x4(a, b, e, f);
|
||
|
-/// let mut cdgh = u32x4(c, d, g, h);
|
||
|
+/// let mut abef = [a, b, e, f);
|
||
|
+/// let mut cdgh = [c, d, g, h);
|
||
|
/// let temp = K32X4[t] + work[t];
|
||
|
///
|
||
|
/// // this is the core expression
|
||
|
/// cdgh = sha256rnds2(cdgh, abef, temp);
|
||
|
/// abef = sha256rnds2(abef, cdgh, sha256swap(temp));
|
||
|
///
|
||
|
-/// *state = [abef.0, abef.1, cdgh.0, cdgh.1,
|
||
|
-/// abef.2, abef.3, cdgh.2, cdgh.3];
|
||
|
+/// *state = [abef[0], abef[1], cdgh[0], cdgh[1],
|
||
|
+/// abef[2], abef[3], cdgh[2], cdgh[3]];
|
||
|
/// }
|
||
|
/// ```
|
||
|
///
|
||
|
@@ -282,6 +311,8 @@ fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
|
||
|
/// support in LLVM (and GCC, etc.).
|
||
|
pub fn compress256(state: &mut [u32; 8], block: &[u8; 64]) {
|
||
|
let mut block_u32 = [0u32; BLOCK_LEN];
|
||
|
- BE::read_u32_into(block, &mut block_u32[..]);
|
||
|
+ for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(4)) {
|
||
|
+ *o = u32::from_be_bytes(chunk.try_into().unwrap());
|
||
|
+ }
|
||
|
sha256_digest_block_u32(state, &block_u32);
|
||
|
}
|
||
|
diff --git a/src/sha512.rs b/src/sha512.rs
|
||
|
index 0a4a760..ed3a1cc 100644
|
||
|
--- a/src/sha512.rs
|
||
|
+++ b/src/sha512.rs
|
||
|
@@ -1,10 +1,7 @@
|
||
|
//! SHA-512
|
||
|
|
||
|
use crate::consts::{H384, H512, H512_TRUNC_224, H512_TRUNC_256, STATE_LEN};
|
||
|
-use block_buffer::{
|
||
|
- byteorder::{ByteOrder, BE},
|
||
|
- BlockBuffer,
|
||
|
-};
|
||
|
+use block_buffer::BlockBuffer;
|
||
|
use digest::impl_write;
|
||
|
use digest::{
|
||
|
consts::{U128, U28, U32, U48, U64},
|
||
|
@@ -43,7 +40,7 @@ impl Engine512State {
|
||
|
/// contains the logic necessary to perform the final calculations.
|
||
|
#[derive(Clone)]
|
||
|
struct Engine512 {
|
||
|
- len: (u64, u64), // TODO: replace with u128 on MSRV bump
|
||
|
+ len: u128,
|
||
|
buffer: BlockBuffer<BlockSize>,
|
||
|
state: Engine512State,
|
||
|
}
|
||
|
@@ -51,31 +48,26 @@ struct Engine512 {
|
||
|
impl Engine512 {
|
||
|
fn new(h: &[u64; STATE_LEN]) -> Engine512 {
|
||
|
Engine512 {
|
||
|
- len: (0, 0),
|
||
|
+ len: 0,
|
||
|
buffer: Default::default(),
|
||
|
state: Engine512State::new(h),
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fn update(&mut self, input: &[u8]) {
|
||
|
- let (res, over) = self.len.1.overflowing_add((input.len() as u64) << 3);
|
||
|
- self.len.1 = res;
|
||
|
- if over {
|
||
|
- self.len.0 += 1;
|
||
|
- }
|
||
|
- let self_state = &mut self.state;
|
||
|
- self.buffer.input(input, |d| self_state.process_block(d));
|
||
|
+ self.len += (input.len() as u128) << 3;
|
||
|
+ let s = &mut self.state;
|
||
|
+ self.buffer.input_block(input, |d| s.process_block(d));
|
||
|
}
|
||
|
|
||
|
fn finish(&mut self) {
|
||
|
- let self_state = &mut self.state;
|
||
|
- let (hi, lo) = self.len;
|
||
|
+ let s = &mut self.state;
|
||
|
self.buffer
|
||
|
- .len128_padding_be(hi, lo, |d| self_state.process_block(d));
|
||
|
+ .len128_padding_be(self.len, |d| s.process_block(d));
|
||
|
}
|
||
|
|
||
|
fn reset(&mut self, h: &[u64; STATE_LEN]) {
|
||
|
- self.len = (0, 0);
|
||
|
+ self.len = 0;
|
||
|
self.buffer.reset();
|
||
|
self.state = Engine512State::new(h);
|
||
|
}
|
||
|
@@ -110,7 +102,10 @@ impl FixedOutputDirty for Sha512 {
|
||
|
|
||
|
fn finalize_into_dirty(&mut self, out: &mut digest::Output<Self>) {
|
||
|
self.engine.finish();
|
||
|
- BE::write_u64_into(&self.engine.state.h[..], out.as_mut_slice());
|
||
|
+ let h = self.engine.state.h;
|
||
|
+ for (chunk, v) in out.chunks_exact_mut(8).zip(h.iter()) {
|
||
|
+ chunk.copy_from_slice(&v.to_be_bytes());
|
||
|
+ }
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@@ -150,7 +145,10 @@ impl FixedOutputDirty for Sha384 {
|
||
|
|
||
|
fn finalize_into_dirty(&mut self, out: &mut digest::Output<Self>) {
|
||
|
self.engine.finish();
|
||
|
- BE::write_u64_into(&self.engine.state.h[..6], out.as_mut_slice());
|
||
|
+ let h = &self.engine.state.h[..6];
|
||
|
+ for (chunk, v) in out.chunks_exact_mut(8).zip(h.iter()) {
|
||
|
+ chunk.copy_from_slice(&v.to_be_bytes());
|
||
|
+ }
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@@ -190,7 +188,10 @@ impl FixedOutputDirty for Sha512Trunc256 {
|
||
|
|
||
|
fn finalize_into_dirty(&mut self, out: &mut digest::Output<Self>) {
|
||
|
self.engine.finish();
|
||
|
- BE::write_u64_into(&self.engine.state.h[..4], out.as_mut_slice());
|
||
|
+ let h = &self.engine.state.h[..4];
|
||
|
+ for (chunk, v) in out.chunks_exact_mut(8).zip(h.iter()) {
|
||
|
+ chunk.copy_from_slice(&v.to_be_bytes());
|
||
|
+ }
|
||
|
}
|
||
|
}
|
||
|
|
||
|
@@ -230,8 +231,11 @@ impl FixedOutputDirty for Sha512Trunc224 {
|
||
|
|
||
|
fn finalize_into_dirty(&mut self, out: &mut digest::Output<Self>) {
|
||
|
self.engine.finish();
|
||
|
- BE::write_u64_into(&self.engine.state.h[..3], &mut out[..24]);
|
||
|
- BE::write_u32(&mut out[24..28], (self.engine.state.h[3] >> 32) as u32);
|
||
|
+ let h = &self.engine.state.h;
|
||
|
+ for (chunk, v) in out.chunks_exact_mut(8).zip(h[..3].iter()) {
|
||
|
+ chunk.copy_from_slice(&v.to_be_bytes());
|
||
|
+ }
|
||
|
+ out[24..28].copy_from_slice(&h[3].to_be_bytes()[..4]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
diff --git a/src/sha512_utils.rs b/src/sha512_utils.rs
|
||
|
index 9fb822f..eaa9d51 100644
|
||
|
--- a/src/sha512_utils.rs
|
||
|
+++ b/src/sha512_utils.rs
|
||
|
@@ -1,17 +1,20 @@
|
||
|
#![allow(clippy::many_single_char_names)]
|
||
|
-
|
||
|
use crate::consts::{BLOCK_LEN, K64X2};
|
||
|
-use crate::simd::u64x2;
|
||
|
-use block_buffer::byteorder::{ByteOrder, BE};
|
||
|
+use core::convert::TryInto;
|
||
|
+
|
||
|
+#[inline(always)]
|
||
|
+fn add(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
|
||
|
+ [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
|
||
|
+}
|
||
|
|
||
|
/// Not an intrinsic, but works like an unaligned load.
|
||
|
#[inline]
|
||
|
-fn sha512load(v0: u64x2, v1: u64x2) -> u64x2 {
|
||
|
- u64x2(v1.1, v0.0)
|
||
|
+fn sha512load(v0: [u64; 2], v1: [u64; 2]) -> [u64; 2] {
|
||
|
+ [v1[1], v0[0]]
|
||
|
}
|
||
|
|
||
|
/// Performs 2 rounds of the SHA-512 message schedule update.
|
||
|
-pub fn sha512_schedule_x2(v0: u64x2, v1: u64x2, v4to5: u64x2, v7: u64x2) -> u64x2 {
|
||
|
+pub fn sha512_schedule_x2(v0: [u64; 2], v1: [u64; 2], v4to5: [u64; 2], v7: [u64; 2]) -> [u64; 2] {
|
||
|
// sigma 0
|
||
|
fn sigma0(x: u64) -> u64 {
|
||
|
((x << 63) | (x >> 1)) ^ ((x << 56) | (x >> 8)) ^ (x >> 7)
|
||
|
@@ -22,10 +25,10 @@ pub fn sha512_schedule_x2(v0: u64x2, v1: u64x2, v4to5: u64x2, v7: u64x2) -> u64x
|
||
|
((x << 45) | (x >> 19)) ^ ((x << 3) | (x >> 61)) ^ (x >> 6)
|
||
|
}
|
||
|
|
||
|
- let u64x2(w1, w0) = v0;
|
||
|
- let u64x2(_, w2) = v1;
|
||
|
- let u64x2(w10, w9) = v4to5;
|
||
|
- let u64x2(w15, w14) = v7;
|
||
|
+ let [w1, w0] = v0;
|
||
|
+ let [_, w2] = v1;
|
||
|
+ let [w10, w9] = v4to5;
|
||
|
+ let [w15, w14] = v7;
|
||
|
|
||
|
let w16 = sigma1(w14)
|
||
|
.wrapping_add(w9)
|
||
|
@@ -36,11 +39,17 @@ pub fn sha512_schedule_x2(v0: u64x2, v1: u64x2, v4to5: u64x2, v7: u64x2) -> u64x
|
||
|
.wrapping_add(sigma0(w2))
|
||
|
.wrapping_add(w1);
|
||
|
|
||
|
- u64x2(w17, w16)
|
||
|
+ [w17, w16]
|
||
|
}
|
||
|
|
||
|
/// Performs one round of the SHA-512 message block digest.
|
||
|
-pub fn sha512_digest_round(ae: u64x2, bf: u64x2, cg: u64x2, dh: u64x2, wk0: u64) -> u64x2 {
|
||
|
+pub fn sha512_digest_round(
|
||
|
+ ae: [u64; 2],
|
||
|
+ bf: [u64; 2],
|
||
|
+ cg: [u64; 2],
|
||
|
+ dh: [u64; 2],
|
||
|
+ wk0: u64,
|
||
|
+) -> [u64; 2] {
|
||
|
macro_rules! big_sigma0 {
|
||
|
($a:expr) => {
|
||
|
($a.rotate_right(28) ^ $a.rotate_right(34) ^ $a.rotate_right(39))
|
||
|
@@ -62,10 +71,10 @@ pub fn sha512_digest_round(ae: u64x2, bf: u64x2, cg: u64x2, dh: u64x2, wk0: u64)
|
||
|
};
|
||
|
} // Majority, SHA1M
|
||
|
|
||
|
- let u64x2(a0, e0) = ae;
|
||
|
- let u64x2(b0, f0) = bf;
|
||
|
- let u64x2(c0, g0) = cg;
|
||
|
- let u64x2(d0, h0) = dh;
|
||
|
+ let [a0, e0] = ae;
|
||
|
+ let [b0, f0] = bf;
|
||
|
+ let [c0, g0] = cg;
|
||
|
+ let [d0, h0] = dh;
|
||
|
|
||
|
// a round
|
||
|
let x0 = big_sigma1!(e0)
|
||
|
@@ -84,7 +93,7 @@ pub fn sha512_digest_round(ae: u64x2, bf: u64x2, cg: u64x2, dh: u64x2, wk0: u64)
|
||
|
g0,
|
||
|
);
|
||
|
|
||
|
- u64x2(a1, e1)
|
||
|
+ [a1, e1]
|
||
|
}
|
||
|
|
||
|
/// Process a block with the SHA-512 algorithm.
|
||
|
@@ -99,8 +108,8 @@ pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
|
||
|
|
||
|
macro_rules! rounds4 {
|
||
|
($ae:ident, $bf:ident, $cg:ident, $dh:ident, $wk0:expr, $wk1:expr) => {{
|
||
|
- let u64x2(u, t) = $wk0;
|
||
|
- let u64x2(w, v) = $wk1;
|
||
|
+ let [u, t] = $wk0;
|
||
|
+ let [w, v] = $wk1;
|
||
|
|
||
|
$dh = sha512_digest_round($ae, $bf, $cg, $dh, t);
|
||
|
$cg = sha512_digest_round($dh, $ae, $bf, $cg, u);
|
||
|
@@ -109,79 +118,79 @@ pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
|
||
|
}};
|
||
|
}
|
||
|
|
||
|
- let mut ae = u64x2(state[0], state[4]);
|
||
|
- let mut bf = u64x2(state[1], state[5]);
|
||
|
- let mut cg = u64x2(state[2], state[6]);
|
||
|
- let mut dh = u64x2(state[3], state[7]);
|
||
|
+ let mut ae = [state[0], state[4]];
|
||
|
+ let mut bf = [state[1], state[5]];
|
||
|
+ let mut cg = [state[2], state[6]];
|
||
|
+ let mut dh = [state[3], state[7]];
|
||
|
|
||
|
// Rounds 0..20
|
||
|
- let (mut w1, mut w0) = (u64x2(block[3], block[2]), u64x2(block[1], block[0]));
|
||
|
- rounds4!(ae, bf, cg, dh, k[0] + w0, k[1] + w1);
|
||
|
- let (mut w3, mut w2) = (u64x2(block[7], block[6]), u64x2(block[5], block[4]));
|
||
|
- rounds4!(ae, bf, cg, dh, k[2] + w2, k[3] + w3);
|
||
|
- let (mut w5, mut w4) = (u64x2(block[11], block[10]), u64x2(block[9], block[8]));
|
||
|
- rounds4!(ae, bf, cg, dh, k[4] + w4, k[5] + w5);
|
||
|
- let (mut w7, mut w6) = (u64x2(block[15], block[14]), u64x2(block[13], block[12]));
|
||
|
- rounds4!(ae, bf, cg, dh, k[6] + w6, k[7] + w7);
|
||
|
+ let (mut w1, mut w0) = ([block[3], block[2]], [block[1], block[0]]);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[0], w0), add(k[1], w1));
|
||
|
+ let (mut w3, mut w2) = ([block[7], block[6]], [block[5], block[4]]);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[2], w2), add(k[3], w3));
|
||
|
+ let (mut w5, mut w4) = ([block[11], block[10]], [block[9], block[8]]);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[4], w4), add(k[5], w5));
|
||
|
+ let (mut w7, mut w6) = ([block[15], block[14]], [block[13], block[12]]);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[6], w6), add(k[7], w7));
|
||
|
let mut w8 = schedule!(w0, w1, w4, w5, w7);
|
||
|
let mut w9 = schedule!(w1, w2, w5, w6, w8);
|
||
|
- rounds4!(ae, bf, cg, dh, k[8] + w8, k[9] + w9);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[8], w8), add(k[9], w9));
|
||
|
|
||
|
// Rounds 20..40
|
||
|
w0 = schedule!(w2, w3, w6, w7, w9);
|
||
|
w1 = schedule!(w3, w4, w7, w8, w0);
|
||
|
- rounds4!(ae, bf, cg, dh, k[10] + w0, k[11] + w1);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[10], w0), add(k[11], w1));
|
||
|
w2 = schedule!(w4, w5, w8, w9, w1);
|
||
|
w3 = schedule!(w5, w6, w9, w0, w2);
|
||
|
- rounds4!(ae, bf, cg, dh, k[12] + w2, k[13] + w3);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[12], w2), add(k[13], w3));
|
||
|
w4 = schedule!(w6, w7, w0, w1, w3);
|
||
|
w5 = schedule!(w7, w8, w1, w2, w4);
|
||
|
- rounds4!(ae, bf, cg, dh, k[14] + w4, k[15] + w5);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[14], w4), add(k[15], w5));
|
||
|
w6 = schedule!(w8, w9, w2, w3, w5);
|
||
|
w7 = schedule!(w9, w0, w3, w4, w6);
|
||
|
- rounds4!(ae, bf, cg, dh, k[16] + w6, k[17] + w7);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[16], w6), add(k[17], w7));
|
||
|
w8 = schedule!(w0, w1, w4, w5, w7);
|
||
|
w9 = schedule!(w1, w2, w5, w6, w8);
|
||
|
- rounds4!(ae, bf, cg, dh, k[18] + w8, k[19] + w9);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[18], w8), add(k[19], w9));
|
||
|
|
||
|
// Rounds 40..60
|
||
|
w0 = schedule!(w2, w3, w6, w7, w9);
|
||
|
w1 = schedule!(w3, w4, w7, w8, w0);
|
||
|
- rounds4!(ae, bf, cg, dh, k[20] + w0, k[21] + w1);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[20], w0), add(k[21], w1));
|
||
|
w2 = schedule!(w4, w5, w8, w9, w1);
|
||
|
w3 = schedule!(w5, w6, w9, w0, w2);
|
||
|
- rounds4!(ae, bf, cg, dh, k[22] + w2, k[23] + w3);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[22], w2), add(k[23], w3));
|
||
|
w4 = schedule!(w6, w7, w0, w1, w3);
|
||
|
w5 = schedule!(w7, w8, w1, w2, w4);
|
||
|
- rounds4!(ae, bf, cg, dh, k[24] + w4, k[25] + w5);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[24], w4), add(k[25], w5));
|
||
|
w6 = schedule!(w8, w9, w2, w3, w5);
|
||
|
w7 = schedule!(w9, w0, w3, w4, w6);
|
||
|
- rounds4!(ae, bf, cg, dh, k[26] + w6, k[27] + w7);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[26], w6), add(k[27], w7));
|
||
|
w8 = schedule!(w0, w1, w4, w5, w7);
|
||
|
w9 = schedule!(w1, w2, w5, w6, w8);
|
||
|
- rounds4!(ae, bf, cg, dh, k[28] + w8, k[29] + w9);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[28], w8), add(k[29], w9));
|
||
|
|
||
|
// Rounds 60..80
|
||
|
w0 = schedule!(w2, w3, w6, w7, w9);
|
||
|
w1 = schedule!(w3, w4, w7, w8, w0);
|
||
|
- rounds4!(ae, bf, cg, dh, k[30] + w0, k[31] + w1);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[30], w0), add(k[31], w1));
|
||
|
w2 = schedule!(w4, w5, w8, w9, w1);
|
||
|
w3 = schedule!(w5, w6, w9, w0, w2);
|
||
|
- rounds4!(ae, bf, cg, dh, k[32] + w2, k[33] + w3);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[32], w2), add(k[33], w3));
|
||
|
w4 = schedule!(w6, w7, w0, w1, w3);
|
||
|
w5 = schedule!(w7, w8, w1, w2, w4);
|
||
|
- rounds4!(ae, bf, cg, dh, k[34] + w4, k[35] + w5);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[34], w4), add(k[35], w5));
|
||
|
w6 = schedule!(w8, w9, w2, w3, w5);
|
||
|
w7 = schedule!(w9, w0, w3, w4, w6);
|
||
|
- rounds4!(ae, bf, cg, dh, k[36] + w6, k[37] + w7);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[36], w6), add(k[37], w7));
|
||
|
w8 = schedule!(w0, w1, w4, w5, w7);
|
||
|
w9 = schedule!(w1, w2, w5, w6, w8);
|
||
|
- rounds4!(ae, bf, cg, dh, k[38] + w8, k[39] + w9);
|
||
|
+ rounds4!(ae, bf, cg, dh, add(k[38], w8), add(k[39], w9));
|
||
|
|
||
|
- let u64x2(a, e) = ae;
|
||
|
- let u64x2(b, f) = bf;
|
||
|
- let u64x2(c, g) = cg;
|
||
|
- let u64x2(d, h) = dh;
|
||
|
+ let [a, e] = ae;
|
||
|
+ let [b, f] = bf;
|
||
|
+ let [c, g] = cg;
|
||
|
+ let [d, h] = dh;
|
||
|
|
||
|
state[0] = state[0].wrapping_add(a);
|
||
|
state[1] = state[1].wrapping_add(b);
|
||
|
@@ -210,26 +219,26 @@ pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
|
||
|
/// functions allow 4 rounds to be calculated as:
|
||
|
///
|
||
|
/// ```ignore
|
||
|
-/// use std::simd::u64x2;
|
||
|
+/// use std::simd::[u64; 2];
|
||
|
/// use self::crypto::sha2::{
|
||
|
/// sha512msg,
|
||
|
/// sha512load
|
||
|
/// };
|
||
|
///
|
||
|
-/// fn schedule4_data(work: &mut [u64x2], w: &[u64]) {
|
||
|
+/// fn schedule4_data(work: &mut [[u64; 2]], w: &[u64]) {
|
||
|
///
|
||
|
/// // this is to illustrate the data order
|
||
|
-/// work[0] = u64x2(w[1], w[0]);
|
||
|
-/// work[1] = u64x2(w[3], w[2]);
|
||
|
-/// work[2] = u64x2(w[5], w[4]);
|
||
|
-/// work[3] = u64x2(w[7], w[6]);
|
||
|
-/// work[4] = u64x2(w[9], w[8]);
|
||
|
-/// work[5] = u64x2(w[11], w[10]);
|
||
|
-/// work[6] = u64x2(w[13], w[12]);
|
||
|
-/// work[7] = u64x2(w[15], w[14]);
|
||
|
+/// work[0] = [w[1], w[0]);
|
||
|
+/// work[1] = [w[3], w[2]);
|
||
|
+/// work[2] = [w[5], w[4]);
|
||
|
+/// work[3] = [w[7], w[6]);
|
||
|
+/// work[4] = [w[9], w[8]);
|
||
|
+/// work[5] = [w[11], w[10]);
|
||
|
+/// work[6] = [w[13], w[12]);
|
||
|
+/// work[7] = [w[15], w[14]);
|
||
|
/// }
|
||
|
///
|
||
|
-/// fn schedule4_work(work: &mut [u64x2], t: usize) {
|
||
|
+/// fn schedule4_work(work: &mut [[u64; 2]], t: usize) {
|
||
|
///
|
||
|
/// // this is the core expression
|
||
|
/// work[t] = sha512msg(work[t - 8],
|
||
|
@@ -250,19 +259,19 @@ pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
|
||
|
/// and the digest-related functions allow 4 rounds to be calculated as:
|
||
|
///
|
||
|
/// ```ignore
|
||
|
-/// use std::simd::u64x2;
|
||
|
+/// use std::simd::[u64; 2];
|
||
|
/// use self::crypto::sha2::{K64X2, sha512rnd};
|
||
|
///
|
||
|
-/// fn rounds4(state: &mut [u64; 8], work: &mut [u64x2], t: usize) {
|
||
|
+/// fn rounds4(state: &mut [u64; 8], work: &mut [[u64; 2]], t: usize) {
|
||
|
/// let [a, b, c, d, e, f, g, h]: [u64; 8] = *state;
|
||
|
///
|
||
|
/// // this is to illustrate the data order
|
||
|
-/// let mut ae = u64x2(a, e);
|
||
|
-/// let mut bf = u64x2(b, f);
|
||
|
-/// let mut cg = u64x2(c, g);
|
||
|
-/// let mut dh = u64x2(d, h);
|
||
|
-/// let u64x2(w1, w0) = K64X2[2*t] + work[2*t];
|
||
|
-/// let u64x2(w3, w2) = K64X2[2*t + 1] + work[2*t + 1];
|
||
|
+/// let mut ae = [a, e);
|
||
|
+/// let mut bf = [b, f);
|
||
|
+/// let mut cg = [c, g);
|
||
|
+/// let mut dh = [d, h);
|
||
|
+/// let [w1, w0) = K64X2[2*t] + work[2*t];
|
||
|
+/// let [w3, w2) = K64X2[2*t + 1] + work[2*t + 1];
|
||
|
///
|
||
|
/// // this is the core expression
|
||
|
/// dh = sha512rnd(ae, bf, cg, dh, w0);
|
||
|
@@ -270,8 +279,8 @@ pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
|
||
|
/// bf = sha512rnd(cg, dh, ae, bf, w2);
|
||
|
/// ae = sha512rnd(bf, cg, dh, ae, w3);
|
||
|
///
|
||
|
-/// *state = [ae.0, bf.0, cg.0, dh.0,
|
||
|
-/// ae.1, bf.1, cg.1, dh.1];
|
||
|
+/// *state = [ae[0], bf[0], cg[0], dh[0],
|
||
|
+/// ae[1], bf[1], cg[1], dh[1]];
|
||
|
/// }
|
||
|
/// ```
|
||
|
///
|
||
|
@@ -290,6 +299,8 @@ pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
|
||
|
///
|
||
|
pub fn compress512(state: &mut [u64; 8], block: &[u8; 128]) {
|
||
|
let mut block_u64 = [0u64; BLOCK_LEN];
|
||
|
- BE::read_u64_into(block, &mut block_u64[..]);
|
||
|
+ for (o, chunk) in block_u64.iter_mut().zip(block.chunks_exact(8)) {
|
||
|
+ *o = u64::from_be_bytes(chunk.try_into().unwrap());
|
||
|
+ }
|
||
|
sha512_digest_block_u64(state, &block_u64);
|
||
|
}
|
||
|
--
|
||
|
2.27.0
|
||
|
|