You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
8904 lines
240 KiB
8904 lines
240 KiB
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/abi_self_test.cc
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/abi_self_test.cc
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/abi_self_test.cc
|
|
@@ -521,3 +521,289 @@ TEST(ABITest, AArch64) {
|
|
CHECK_ABI_NO_UNWIND(abi_test_clobber_v15_upper);
|
|
}
|
|
#endif // OPENSSL_AARCH64 && SUPPORTS_ABI_TEST
|
|
+
|
|
+#if defined(OPENSSL_PPC64LE) && defined(SUPPORTS_ABI_TEST)
|
|
+extern "C" {
|
|
+void abi_test_clobber_r0(void);
|
|
+// r1 is the stack pointer.
|
|
+void abi_test_clobber_r2(void);
|
|
+void abi_test_clobber_r3(void);
|
|
+void abi_test_clobber_r4(void);
|
|
+void abi_test_clobber_r5(void);
|
|
+void abi_test_clobber_r6(void);
|
|
+void abi_test_clobber_r7(void);
|
|
+void abi_test_clobber_r8(void);
|
|
+void abi_test_clobber_r9(void);
|
|
+void abi_test_clobber_r10(void);
|
|
+void abi_test_clobber_r11(void);
|
|
+void abi_test_clobber_r12(void);
|
|
+// r13 is the thread pointer.
|
|
+void abi_test_clobber_r14(void);
|
|
+void abi_test_clobber_r15(void);
|
|
+void abi_test_clobber_r16(void);
|
|
+void abi_test_clobber_r17(void);
|
|
+void abi_test_clobber_r18(void);
|
|
+void abi_test_clobber_r19(void);
|
|
+void abi_test_clobber_r20(void);
|
|
+void abi_test_clobber_r21(void);
|
|
+void abi_test_clobber_r22(void);
|
|
+void abi_test_clobber_r23(void);
|
|
+void abi_test_clobber_r24(void);
|
|
+void abi_test_clobber_r25(void);
|
|
+void abi_test_clobber_r26(void);
|
|
+void abi_test_clobber_r27(void);
|
|
+void abi_test_clobber_r28(void);
|
|
+void abi_test_clobber_r29(void);
|
|
+void abi_test_clobber_r30(void);
|
|
+void abi_test_clobber_r31(void);
|
|
+
|
|
+void abi_test_clobber_f0(void);
|
|
+void abi_test_clobber_f1(void);
|
|
+void abi_test_clobber_f2(void);
|
|
+void abi_test_clobber_f3(void);
|
|
+void abi_test_clobber_f4(void);
|
|
+void abi_test_clobber_f5(void);
|
|
+void abi_test_clobber_f6(void);
|
|
+void abi_test_clobber_f7(void);
|
|
+void abi_test_clobber_f8(void);
|
|
+void abi_test_clobber_f9(void);
|
|
+void abi_test_clobber_f10(void);
|
|
+void abi_test_clobber_f11(void);
|
|
+void abi_test_clobber_f12(void);
|
|
+void abi_test_clobber_f13(void);
|
|
+void abi_test_clobber_f14(void);
|
|
+void abi_test_clobber_f15(void);
|
|
+void abi_test_clobber_f16(void);
|
|
+void abi_test_clobber_f17(void);
|
|
+void abi_test_clobber_f18(void);
|
|
+void abi_test_clobber_f19(void);
|
|
+void abi_test_clobber_f20(void);
|
|
+void abi_test_clobber_f21(void);
|
|
+void abi_test_clobber_f22(void);
|
|
+void abi_test_clobber_f23(void);
|
|
+void abi_test_clobber_f24(void);
|
|
+void abi_test_clobber_f25(void);
|
|
+void abi_test_clobber_f26(void);
|
|
+void abi_test_clobber_f27(void);
|
|
+void abi_test_clobber_f28(void);
|
|
+void abi_test_clobber_f29(void);
|
|
+void abi_test_clobber_f30(void);
|
|
+void abi_test_clobber_f31(void);
|
|
+
|
|
+void abi_test_clobber_v0(void);
|
|
+void abi_test_clobber_v1(void);
|
|
+void abi_test_clobber_v2(void);
|
|
+void abi_test_clobber_v3(void);
|
|
+void abi_test_clobber_v4(void);
|
|
+void abi_test_clobber_v5(void);
|
|
+void abi_test_clobber_v6(void);
|
|
+void abi_test_clobber_v7(void);
|
|
+void abi_test_clobber_v8(void);
|
|
+void abi_test_clobber_v9(void);
|
|
+void abi_test_clobber_v10(void);
|
|
+void abi_test_clobber_v11(void);
|
|
+void abi_test_clobber_v12(void);
|
|
+void abi_test_clobber_v13(void);
|
|
+void abi_test_clobber_v14(void);
|
|
+void abi_test_clobber_v15(void);
|
|
+void abi_test_clobber_v16(void);
|
|
+void abi_test_clobber_v17(void);
|
|
+void abi_test_clobber_v18(void);
|
|
+void abi_test_clobber_v19(void);
|
|
+void abi_test_clobber_v20(void);
|
|
+void abi_test_clobber_v21(void);
|
|
+void abi_test_clobber_v22(void);
|
|
+void abi_test_clobber_v23(void);
|
|
+void abi_test_clobber_v24(void);
|
|
+void abi_test_clobber_v25(void);
|
|
+void abi_test_clobber_v26(void);
|
|
+void abi_test_clobber_v27(void);
|
|
+void abi_test_clobber_v28(void);
|
|
+void abi_test_clobber_v29(void);
|
|
+void abi_test_clobber_v30(void);
|
|
+void abi_test_clobber_v31(void);
|
|
+
|
|
+void abi_test_clobber_cr0(void);
|
|
+void abi_test_clobber_cr1(void);
|
|
+void abi_test_clobber_cr2(void);
|
|
+void abi_test_clobber_cr3(void);
|
|
+void abi_test_clobber_cr4(void);
|
|
+void abi_test_clobber_cr5(void);
|
|
+void abi_test_clobber_cr6(void);
|
|
+void abi_test_clobber_cr7(void);
|
|
+
|
|
+void abi_test_clobber_ctr(void);
|
|
+void abi_test_clobber_lr(void);
|
|
+
|
|
+} // extern "C"
|
|
+
|
|
+TEST(ABITest, PPC64LE) {
|
|
+ // abi_test_trampoline hides unsaved registers from the caller, so we can
|
|
+ // safely call the abi_test_clobber_* functions below.
|
|
+ abi_test::internal::CallerState state;
|
|
+ RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_trampoline,
|
|
+ reinterpret_cast<crypto_word_t>(abi_test_clobber_r14),
|
|
+ &state, nullptr, 0, 0 /* no breakpoint */);
|
|
+
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r0);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r2);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r3);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r4);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r5);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r6);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r7);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r8);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r9);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r10);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r11);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r12);
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r14),
|
|
+ "r14 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r15),
|
|
+ "r15 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r16),
|
|
+ "r16 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r17),
|
|
+ "r17 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r18),
|
|
+ "r18 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r19),
|
|
+ "r19 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r20),
|
|
+ "r20 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r21),
|
|
+ "r21 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r22),
|
|
+ "r22 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r23),
|
|
+ "r23 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r24),
|
|
+ "r24 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r25),
|
|
+ "r25 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r26),
|
|
+ "r26 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r27),
|
|
+ "r27 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r28),
|
|
+ "r28 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r29),
|
|
+ "r29 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r30),
|
|
+ "r30 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r31),
|
|
+ "r31 was not restored after return");
|
|
+
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f0);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f1);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f2);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f3);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f4);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f5);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f6);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f7);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f8);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f9);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f10);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f11);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f12);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f13);
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f14),
|
|
+ "f14 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f15),
|
|
+ "f15 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f16),
|
|
+ "f16 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f17),
|
|
+ "f17 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f18),
|
|
+ "f18 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f19),
|
|
+ "f19 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f20),
|
|
+ "f20 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f21),
|
|
+ "f21 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f22),
|
|
+ "f22 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f23),
|
|
+ "f23 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f24),
|
|
+ "f24 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f25),
|
|
+ "f25 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f26),
|
|
+ "f26 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f27),
|
|
+ "f27 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f28),
|
|
+ "f28 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f29),
|
|
+ "f29 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f30),
|
|
+ "f30 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f31),
|
|
+ "f31 was not restored after return");
|
|
+
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v0);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v1);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v2);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v3);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v4);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v5);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v6);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v7);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v8);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v9);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v10);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v11);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v12);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v13);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v14);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v15);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v16);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v17);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v18);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v19);
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v20),
|
|
+ "v20 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v21),
|
|
+ "v21 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v22),
|
|
+ "v22 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v23),
|
|
+ "v23 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v24),
|
|
+ "v24 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v25),
|
|
+ "v25 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v26),
|
|
+ "v26 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v27),
|
|
+ "v27 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v28),
|
|
+ "v28 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v29),
|
|
+ "v29 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v30),
|
|
+ "v30 was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v31),
|
|
+ "v31 was not restored after return");
|
|
+
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr0);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr1);
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr2),
|
|
+ "cr was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr3),
|
|
+ "cr was not restored after return");
|
|
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr4),
|
|
+ "cr was not restored after return");
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr5);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr6);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr7);
|
|
+
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_ctr);
|
|
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_lr);
|
|
+}
|
|
+#endif // OPENSSL_PPC64LE && SUPPORTS_ABI_TEST
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/cpu_ppc64le.c
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/cpu_ppc64le.c
|
|
@@ -0,0 +1,38 @@
|
|
+/* Copyright (c) 2016, Google Inc.
|
|
+ *
|
|
+ * Permission to use, copy, modify, and/or distribute this software for any
|
|
+ * purpose with or without fee is hereby granted, provided that the above
|
|
+ * copyright notice and this permission notice appear in all copies.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
|
+
|
|
+#include <openssl/base.h>
|
|
+
|
|
+#if defined(OPENSSL_PPC64LE)
|
|
+
|
|
+#include <sys/auxv.h>
|
|
+
|
|
+#include "internal.h"
|
|
+
|
|
+
|
|
+#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
|
|
+// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
|
|
+// ABI for Linux Supplement”.
|
|
+#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
|
|
+#endif
|
|
+
|
|
+void OPENSSL_cpuid_setup(void) {
|
|
+ OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
|
|
+}
|
|
+
|
|
+int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
|
|
+ return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
|
|
+}
|
|
+
|
|
+#endif // OPENSSL_PPC64LE
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/crypto.c
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/crypto.c
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/crypto.c
|
|
@@ -66,6 +66,10 @@ uint32_t OPENSSL_get_ia32cap(int idx) {
|
|
return OPENSSL_ia32cap_P[idx];
|
|
}
|
|
|
|
+#elif defined(OPENSSL_PPC64LE)
|
|
+
|
|
+HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0;
|
|
+
|
|
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
|
|
|
|
#include <openssl/arm_arch.h>
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
|
|
@@ -0,0 +1,3809 @@
|
|
+#! /usr/bin/env perl
|
|
+# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
|
|
+#
|
|
+# Licensed under the OpenSSL license (the "License"). You may not use
|
|
+# this file except in compliance with the License. You can obtain a copy
|
|
+# in the file LICENSE in the source distribution or at
|
|
+# https://www.openssl.org/source/license.html
|
|
+
|
|
+#
|
|
+# ====================================================================
|
|
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
|
+# project. The module is, however, dual licensed under OpenSSL and
|
|
+# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
+# details see http://www.openssl.org/~appro/cryptogams/.
|
|
+# ====================================================================
|
|
+#
|
|
+# This module implements support for AES instructions as per PowerISA
|
|
+# specification version 2.07, first implemented by POWER8 processor.
|
|
+# The module is endian-agnostic in sense that it supports both big-
|
|
+# and little-endian cases. Data alignment in parallelizable modes is
|
|
+# handled with VSX loads and stores, which implies MSR.VSX flag being
|
|
+# set. It should also be noted that ISA specification doesn't prohibit
|
|
+# alignment exceptions for these instructions on page boundaries.
|
|
+# Initially alignment was handled in pure AltiVec/VMX way [when data
|
|
+# is aligned programmatically, which in turn guarantees exception-
|
|
+# free execution], but it turned to hamper performance when vcipher
|
|
+# instructions are interleaved. It's reckoned that eventual
|
|
+# misalignment penalties at page boundaries are in average lower
|
|
+# than additional overhead in pure AltiVec approach.
|
|
+#
|
|
+# May 2016
|
|
+#
|
|
+# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
|
|
+# systems were measured.
|
|
+#
|
|
+######################################################################
|
|
+# Current large-block performance in cycles per byte processed with
|
|
+# 128-bit key (less is better).
|
|
+#
|
|
+# CBC en-/decrypt CTR XTS
|
|
+# POWER8[le] 3.96/0.72 0.74 1.1
|
|
+# POWER8[be] 3.75/0.65 0.66 1.0
|
|
+# POWER9[le] 4.02/0.86 0.84 1.05
|
|
+# POWER9[be] 3.99/0.78 0.79 0.97
|
|
+
|
|
+$flavour = shift;
|
|
+$output = shift;
|
|
+
|
|
+if ($flavour =~ /64/) {
|
|
+ $SIZE_T =8;
|
|
+ $LRSAVE =2*$SIZE_T;
|
|
+ $STU ="stdu";
|
|
+ $POP ="ld";
|
|
+ $PUSH ="std";
|
|
+ $UCMP ="cmpld";
|
|
+ $SHL ="sldi";
|
|
+} elsif ($flavour =~ /32/) {
|
|
+ $SIZE_T =4;
|
|
+ $LRSAVE =$SIZE_T;
|
|
+ $STU ="stwu";
|
|
+ $POP ="lwz";
|
|
+ $PUSH ="stw";
|
|
+ $UCMP ="cmplw";
|
|
+ $SHL ="slwi";
|
|
+} else { die "nonsense $flavour"; }
|
|
+
|
|
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
|
|
+
|
|
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
|
+( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
|
+die "can't locate ppc-xlate.pl";
|
|
+
|
|
+open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
|
|
+*STDOUT=*OUT;
|
|
+
|
|
+$FRAME=8*$SIZE_T;
|
|
+$prefix="aes_hw";
|
|
+
|
|
+$sp="r1";
|
|
+$vrsave="r12";
|
|
+
|
|
+#########################################################################
|
|
+{{{ # Key setup procedures #
|
|
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
|
|
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
|
|
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
|
|
+
|
|
+$code.=<<___;
|
|
+.machine "any"
|
|
+
|
|
+.text
|
|
+
|
|
+.align 7
|
|
+Lrcon:
|
|
+.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
|
|
+.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
|
|
+.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
|
|
+.long 0,0,0,0 ?asis
|
|
+Lconsts:
|
|
+ mflr r0
|
|
+ bcl 20,31,\$+4
|
|
+ mflr $ptr #vvvvv "distance between . and rcon
|
|
+ addi $ptr,$ptr,-0x48
|
|
+ mtlr r0
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,0,0
|
|
+.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
|
|
+
|
|
+.globl .${prefix}_set_encrypt_key
|
|
+.align 5
|
|
+.${prefix}_set_encrypt_key:
|
|
+Lset_encrypt_key:
|
|
+ mflr r11
|
|
+ $PUSH r11,$LRSAVE($sp)
|
|
+
|
|
+ li $ptr,-1
|
|
+ ${UCMP}i $inp,0
|
|
+ beq- Lenc_key_abort # if ($inp==0) return -1;
|
|
+ ${UCMP}i $out,0
|
|
+ beq- Lenc_key_abort # if ($out==0) return -1;
|
|
+ li $ptr,-2
|
|
+ cmpwi $bits,128
|
|
+ blt- Lenc_key_abort
|
|
+ cmpwi $bits,256
|
|
+ bgt- Lenc_key_abort
|
|
+ andi. r0,$bits,0x3f
|
|
+ bne- Lenc_key_abort
|
|
+
|
|
+ lis r0,0xfff0
|
|
+ mfspr $vrsave,256
|
|
+ mtspr 256,r0
|
|
+
|
|
+ bl Lconsts
|
|
+ mtlr r11
|
|
+
|
|
+ neg r9,$inp
|
|
+ lvx $in0,0,$inp
|
|
+ addi $inp,$inp,15 # 15 is not typo
|
|
+ lvsr $key,0,r9 # borrow $key
|
|
+ li r8,0x20
|
|
+ cmpwi $bits,192
|
|
+ lvx $in1,0,$inp
|
|
+ le?vspltisb $mask,0x0f # borrow $mask
|
|
+ lvx $rcon,0,$ptr
|
|
+ le?vxor $key,$key,$mask # adjust for byte swap
|
|
+ lvx $mask,r8,$ptr
|
|
+ addi $ptr,$ptr,0x10
|
|
+ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
|
|
+ li $cnt,8
|
|
+ vxor $zero,$zero,$zero
|
|
+ mtctr $cnt
|
|
+
|
|
+ ?lvsr $outperm,0,$out
|
|
+ vspltisb $outmask,-1
|
|
+ lvx $outhead,0,$out
|
|
+ ?vperm $outmask,$zero,$outmask,$outperm
|
|
+
|
|
+ blt Loop128
|
|
+ addi $inp,$inp,8
|
|
+ beq L192
|
|
+ addi $inp,$inp,8
|
|
+ b L256
|
|
+
|
|
+.align 4
|
|
+Loop128:
|
|
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ vcipherlast $key,$key,$rcon
|
|
+ stvx $stage,0,$out
|
|
+ addi $out,$out,16
|
|
+
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vadduwm $rcon,$rcon,$rcon
|
|
+ vxor $in0,$in0,$key
|
|
+ bdnz Loop128
|
|
+
|
|
+ lvx $rcon,0,$ptr # last two round keys
|
|
+
|
|
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ vcipherlast $key,$key,$rcon
|
|
+ stvx $stage,0,$out
|
|
+ addi $out,$out,16
|
|
+
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vadduwm $rcon,$rcon,$rcon
|
|
+ vxor $in0,$in0,$key
|
|
+
|
|
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ vcipherlast $key,$key,$rcon
|
|
+ stvx $stage,0,$out
|
|
+ addi $out,$out,16
|
|
+
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vxor $in0,$in0,$key
|
|
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ stvx $stage,0,$out
|
|
+
|
|
+ addi $inp,$out,15 # 15 is not typo
|
|
+ addi $out,$out,0x50
|
|
+
|
|
+ li $rounds,10
|
|
+ b Ldone
|
|
+
|
|
+.align 4
|
|
+L192:
|
|
+ lvx $tmp,0,$inp
|
|
+ li $cnt,4
|
|
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ stvx $stage,0,$out
|
|
+ addi $out,$out,16
|
|
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
|
|
+ vspltisb $key,8 # borrow $key
|
|
+ mtctr $cnt
|
|
+ vsububm $mask,$mask,$key # adjust the mask
|
|
+
|
|
+Loop192:
|
|
+ vperm $key,$in1,$in1,$mask # roate-n-splat
|
|
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
+ vcipherlast $key,$key,$rcon
|
|
+
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+
|
|
+ vsldoi $stage,$zero,$in1,8
|
|
+ vspltw $tmp,$in0,3
|
|
+ vxor $tmp,$tmp,$in1
|
|
+ vsldoi $in1,$zero,$in1,12 # >>32
|
|
+ vadduwm $rcon,$rcon,$rcon
|
|
+ vxor $in1,$in1,$tmp
|
|
+ vxor $in0,$in0,$key
|
|
+ vxor $in1,$in1,$key
|
|
+ vsldoi $stage,$stage,$in0,8
|
|
+
|
|
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
|
|
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
+ vperm $outtail,$stage,$stage,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ vcipherlast $key,$key,$rcon
|
|
+ stvx $stage,0,$out
|
|
+ addi $out,$out,16
|
|
+
|
|
+ vsldoi $stage,$in0,$in1,8
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vperm $outtail,$stage,$stage,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ stvx $stage,0,$out
|
|
+ addi $out,$out,16
|
|
+
|
|
+ vspltw $tmp,$in0,3
|
|
+ vxor $tmp,$tmp,$in1
|
|
+ vsldoi $in1,$zero,$in1,12 # >>32
|
|
+ vadduwm $rcon,$rcon,$rcon
|
|
+ vxor $in1,$in1,$tmp
|
|
+ vxor $in0,$in0,$key
|
|
+ vxor $in1,$in1,$key
|
|
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ stvx $stage,0,$out
|
|
+ addi $inp,$out,15 # 15 is not typo
|
|
+ addi $out,$out,16
|
|
+ bdnz Loop192
|
|
+
|
|
+ li $rounds,12
|
|
+ addi $out,$out,0x20
|
|
+ b Ldone
|
|
+
|
|
+.align 4
|
|
+L256:
|
|
+ lvx $tmp,0,$inp
|
|
+ li $cnt,7
|
|
+ li $rounds,14
|
|
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ stvx $stage,0,$out
|
|
+ addi $out,$out,16
|
|
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
|
|
+ mtctr $cnt
|
|
+
|
|
+Loop256:
|
|
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
|
|
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
+ vperm $outtail,$in1,$in1,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ vcipherlast $key,$key,$rcon
|
|
+ stvx $stage,0,$out
|
|
+ addi $out,$out,16
|
|
+
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in0,$in0,$tmp
|
|
+ vadduwm $rcon,$rcon,$rcon
|
|
+ vxor $in0,$in0,$key
|
|
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
+ vsel $stage,$outhead,$outtail,$outmask
|
|
+ vmr $outhead,$outtail
|
|
+ stvx $stage,0,$out
|
|
+ addi $inp,$out,15 # 15 is not typo
|
|
+ addi $out,$out,16
|
|
+ bdz Ldone
|
|
+
|
|
+ vspltw $key,$in0,3 # just splat
|
|
+ vsldoi $tmp,$zero,$in1,12 # >>32
|
|
+ vsbox $key,$key
|
|
+
|
|
+ vxor $in1,$in1,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in1,$in1,$tmp
|
|
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
+ vxor $in1,$in1,$tmp
|
|
+
|
|
+ vxor $in1,$in1,$key
|
|
+ b Loop256
|
|
+
|
|
+.align 4
|
|
+Ldone:
|
|
+ lvx $in1,0,$inp # redundant in aligned case
|
|
+ vsel $in1,$outhead,$in1,$outmask
|
|
+ stvx $in1,0,$inp
|
|
+ li $ptr,0
|
|
+ mtspr 256,$vrsave
|
|
+ stw $rounds,0($out)
|
|
+
|
|
+Lenc_key_abort:
|
|
+ mr r3,$ptr
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,1,0,0,3,0
|
|
+ .long 0
|
|
+.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
|
|
+
|
|
+.globl .${prefix}_set_decrypt_key
|
|
+.align 5
|
|
+.${prefix}_set_decrypt_key:
|
|
+ $STU $sp,-$FRAME($sp)
|
|
+ mflr r10
|
|
+ $PUSH r10,`$FRAME+$LRSAVE`($sp)
|
|
+ bl Lset_encrypt_key
|
|
+ mtlr r10
|
|
+
|
|
+ cmpwi r3,0
|
|
+ bne- Ldec_key_abort
|
|
+
|
|
+ slwi $cnt,$rounds,4
|
|
+ subi $inp,$out,240 # first round key
|
|
+ srwi $rounds,$rounds,1
|
|
+ add $out,$inp,$cnt # last round key
|
|
+ mtctr $rounds
|
|
+
|
|
+Ldeckey:
|
|
+ lwz r0, 0($inp)
|
|
+ lwz r6, 4($inp)
|
|
+ lwz r7, 8($inp)
|
|
+ lwz r8, 12($inp)
|
|
+ addi $inp,$inp,16
|
|
+ lwz r9, 0($out)
|
|
+ lwz r10,4($out)
|
|
+ lwz r11,8($out)
|
|
+ lwz r12,12($out)
|
|
+ stw r0, 0($out)
|
|
+ stw r6, 4($out)
|
|
+ stw r7, 8($out)
|
|
+ stw r8, 12($out)
|
|
+ subi $out,$out,16
|
|
+ stw r9, -16($inp)
|
|
+ stw r10,-12($inp)
|
|
+ stw r11,-8($inp)
|
|
+ stw r12,-4($inp)
|
|
+ bdnz Ldeckey
|
|
+
|
|
+ xor r3,r3,r3 # return value
|
|
+Ldec_key_abort:
|
|
+ addi $sp,$sp,$FRAME
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,4,1,0x80,0,3,0
|
|
+ .long 0
|
|
+.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
|
|
+___
|
|
+}}}
|
|
+#########################################################################
|
|
+{{{ # Single block en- and decrypt procedures #
|
|
+sub gen_block () {
|
|
+my $dir = shift;
|
|
+my $n = $dir eq "de" ? "n" : "";
|
|
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
|
|
+
|
|
+$code.=<<___;
|
|
+.globl .${prefix}_${dir}crypt
|
|
+.align 5
|
|
+.${prefix}_${dir}crypt:
|
|
+ lwz $rounds,240($key)
|
|
+ lis r0,0xfc00
|
|
+ mfspr $vrsave,256
|
|
+ li $idx,15 # 15 is not typo
|
|
+ mtspr 256,r0
|
|
+
|
|
+ lvx v0,0,$inp
|
|
+ neg r11,$out
|
|
+ lvx v1,$idx,$inp
|
|
+ lvsl v2,0,$inp # inpperm
|
|
+ le?vspltisb v4,0x0f
|
|
+ ?lvsl v3,0,r11 # outperm
|
|
+ le?vxor v2,v2,v4
|
|
+ li $idx,16
|
|
+ vperm v0,v0,v1,v2 # align [and byte swap in LE]
|
|
+ lvx v1,0,$key
|
|
+ ?lvsl v5,0,$key # keyperm
|
|
+ srwi $rounds,$rounds,1
|
|
+ lvx v2,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ subi $rounds,$rounds,1
|
|
+ ?vperm v1,v1,v2,v5 # align round key
|
|
+
|
|
+ vxor v0,v0,v1
|
|
+ lvx v1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ mtctr $rounds
|
|
+
|
|
+Loop_${dir}c:
|
|
+ ?vperm v2,v2,v1,v5
|
|
+ v${n}cipher v0,v0,v2
|
|
+ lvx v2,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm v1,v1,v2,v5
|
|
+ v${n}cipher v0,v0,v1
|
|
+ lvx v1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Loop_${dir}c
|
|
+
|
|
+ ?vperm v2,v2,v1,v5
|
|
+ v${n}cipher v0,v0,v2
|
|
+ lvx v2,$idx,$key
|
|
+ ?vperm v1,v1,v2,v5
|
|
+ v${n}cipherlast v0,v0,v1
|
|
+
|
|
+ vspltisb v2,-1
|
|
+ vxor v1,v1,v1
|
|
+ li $idx,15 # 15 is not typo
|
|
+ ?vperm v2,v1,v2,v3 # outmask
|
|
+ le?vxor v3,v3,v4
|
|
+ lvx v1,0,$out # outhead
|
|
+ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
|
|
+ vsel v1,v1,v0,v2
|
|
+ lvx v4,$idx,$out
|
|
+ stvx v1,0,$out
|
|
+ vsel v0,v0,v4,v2
|
|
+ stvx v0,$idx,$out
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,3,0
|
|
+ .long 0
|
|
+.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
|
|
+___
|
|
+}
|
|
+&gen_block("en");
|
|
+&gen_block("de");
|
|
+}}}
|
|
+#########################################################################
|
|
+{{{ # CBC en- and decrypt procedures #
|
|
+my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
|
|
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
|
|
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
|
|
+ map("v$_",(4..10));
|
|
+$code.=<<___;
|
|
+.globl .${prefix}_cbc_encrypt
|
|
+.align 5
|
|
+.${prefix}_cbc_encrypt:
|
|
+ ${UCMP}i $len,16
|
|
+ bltlr-
|
|
+
|
|
+ cmpwi $enc,0 # test direction
|
|
+ lis r0,0xffe0
|
|
+ mfspr $vrsave,256
|
|
+ mtspr 256,r0
|
|
+
|
|
+ li $idx,15
|
|
+ vxor $rndkey0,$rndkey0,$rndkey0
|
|
+ le?vspltisb $tmp,0x0f
|
|
+
|
|
+ lvx $ivec,0,$ivp # load [unaligned] iv
|
|
+ lvsl $inpperm,0,$ivp
|
|
+ lvx $inptail,$idx,$ivp
|
|
+ le?vxor $inpperm,$inpperm,$tmp
|
|
+ vperm $ivec,$ivec,$inptail,$inpperm
|
|
+
|
|
+ neg r11,$inp
|
|
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
|
|
+ lwz $rounds,240($key)
|
|
+
|
|
+ lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,15 # 15 is not typo
|
|
+ le?vxor $inpperm,$inpperm,$tmp
|
|
+
|
|
+ ?lvsr $outperm,0,$out # prepare for unaligned store
|
|
+ vspltisb $outmask,-1
|
|
+ lvx $outhead,0,$out
|
|
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
+ le?vxor $outperm,$outperm,$tmp
|
|
+
|
|
+ srwi $rounds,$rounds,1
|
|
+ li $idx,16
|
|
+ subi $rounds,$rounds,1
|
|
+ beq Lcbc_dec
|
|
+
|
|
+Lcbc_enc:
|
|
+ vmr $inout,$inptail
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,16
|
|
+ mtctr $rounds
|
|
+ subi $len,$len,16 # len-=16
|
|
+
|
|
+ lvx $rndkey0,0,$key
|
|
+ vperm $inout,$inout,$inptail,$inpperm
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ vxor $inout,$inout,$ivec
|
|
+
|
|
+Loop_cbc_enc:
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Loop_cbc_enc
|
|
+
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ li $idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vcipherlast $ivec,$inout,$rndkey0
|
|
+ ${UCMP}i $len,16
|
|
+
|
|
+ vperm $tmp,$ivec,$ivec,$outperm
|
|
+ vsel $inout,$outhead,$tmp,$outmask
|
|
+ vmr $outhead,$tmp
|
|
+ stvx $inout,0,$out
|
|
+ addi $out,$out,16
|
|
+ bge Lcbc_enc
|
|
+
|
|
+ b Lcbc_done
|
|
+
|
|
+.align 4
|
|
+Lcbc_dec:
|
|
+ ${UCMP}i $len,128
|
|
+ bge _aesp8_cbc_decrypt8x
|
|
+ vmr $tmp,$inptail
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,16
|
|
+ mtctr $rounds
|
|
+ subi $len,$len,16 # len-=16
|
|
+
|
|
+ lvx $rndkey0,0,$key
|
|
+ vperm $tmp,$tmp,$inptail,$inpperm
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $inout,$tmp,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+
|
|
+Loop_cbc_dec:
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Loop_cbc_dec
|
|
+
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ li $idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vncipherlast $inout,$inout,$rndkey0
|
|
+ ${UCMP}i $len,16
|
|
+
|
|
+ vxor $inout,$inout,$ivec
|
|
+ vmr $ivec,$tmp
|
|
+ vperm $tmp,$inout,$inout,$outperm
|
|
+ vsel $inout,$outhead,$tmp,$outmask
|
|
+ vmr $outhead,$tmp
|
|
+ stvx $inout,0,$out
|
|
+ addi $out,$out,16
|
|
+ bge Lcbc_dec
|
|
+
|
|
+Lcbc_done:
|
|
+ addi $out,$out,-1
|
|
+ lvx $inout,0,$out # redundant in aligned case
|
|
+ vsel $inout,$outhead,$inout,$outmask
|
|
+ stvx $inout,0,$out
|
|
+
|
|
+ neg $enc,$ivp # write [unaligned] iv
|
|
+ li $idx,15 # 15 is not typo
|
|
+ vxor $rndkey0,$rndkey0,$rndkey0
|
|
+ vspltisb $outmask,-1
|
|
+ le?vspltisb $tmp,0x0f
|
|
+ ?lvsl $outperm,0,$enc
|
|
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
+ le?vxor $outperm,$outperm,$tmp
|
|
+ lvx $outhead,0,$ivp
|
|
+ vperm $ivec,$ivec,$ivec,$outperm
|
|
+ vsel $inout,$outhead,$ivec,$outmask
|
|
+ lvx $inptail,$idx,$ivp
|
|
+ stvx $inout,0,$ivp
|
|
+ vsel $inout,$ivec,$inptail,$outmask
|
|
+ stvx $inout,$idx,$ivp
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,6,0
|
|
+ .long 0
|
|
+___
|
|
+#########################################################################
|
|
+{{ # Optimized CBC decrypt procedure #
|
|
+my $key_="r11";
|
|
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
|
|
+ $x00=0 if ($flavour =~ /osx/);
|
|
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
|
|
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
|
|
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
|
+ # v26-v31 last 6 round keys
|
|
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
|
|
+
|
|
+$code.=<<___;
|
|
+.align 5
|
|
+_aesp8_cbc_decrypt8x:
|
|
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
+ li r10,`$FRAME+8*16+15`
|
|
+ li r11,`$FRAME+8*16+31`
|
|
+ stvx v20,r10,$sp # ABI says so
|
|
+ addi r10,r10,32
|
|
+ stvx v21,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v22,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v23,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v24,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v25,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v26,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v27,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v28,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v29,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v30,r10,$sp
|
|
+ stvx v31,r11,$sp
|
|
+ li r0,-1
|
|
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
+ li $x10,0x10
|
|
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
+ li $x20,0x20
|
|
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
+ li $x30,0x30
|
|
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
+ li $x40,0x40
|
|
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
+ li $x50,0x50
|
|
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
+ li $x60,0x60
|
|
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
+ li $x70,0x70
|
|
+ mtspr 256,r0
|
|
+
|
|
+ subi $rounds,$rounds,3 # -4 in total
|
|
+ subi $len,$len,128 # bias
|
|
+
|
|
+ lvx $rndkey0,$x00,$key # load key schedule
|
|
+ lvx v30,$x10,$key
|
|
+ addi $key,$key,0x20
|
|
+ lvx v31,$x00,$key
|
|
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
+ addi $key_,$sp,`$FRAME+15`
|
|
+ mtctr $rounds
|
|
+
|
|
+Load_cbc_dec_key:
|
|
+ ?vperm v24,v30,v31,$keyperm
|
|
+ lvx v30,$x10,$key
|
|
+ addi $key,$key,0x20
|
|
+ stvx v24,$x00,$key_ # off-load round[1]
|
|
+ ?vperm v25,v31,v30,$keyperm
|
|
+ lvx v31,$x00,$key
|
|
+ stvx v25,$x10,$key_ # off-load round[2]
|
|
+ addi $key_,$key_,0x20
|
|
+ bdnz Load_cbc_dec_key
|
|
+
|
|
+ lvx v26,$x10,$key
|
|
+ ?vperm v24,v30,v31,$keyperm
|
|
+ lvx v27,$x20,$key
|
|
+ stvx v24,$x00,$key_ # off-load round[3]
|
|
+ ?vperm v25,v31,v26,$keyperm
|
|
+ lvx v28,$x30,$key
|
|
+ stvx v25,$x10,$key_ # off-load round[4]
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ ?vperm v26,v26,v27,$keyperm
|
|
+ lvx v29,$x40,$key
|
|
+ ?vperm v27,v27,v28,$keyperm
|
|
+ lvx v30,$x50,$key
|
|
+ ?vperm v28,v28,v29,$keyperm
|
|
+ lvx v31,$x60,$key
|
|
+ ?vperm v29,v29,v30,$keyperm
|
|
+ lvx $out0,$x70,$key # borrow $out0
|
|
+ ?vperm v30,v30,v31,$keyperm
|
|
+ lvx v24,$x00,$key_ # pre-load round[1]
|
|
+ ?vperm v31,v31,$out0,$keyperm
|
|
+ lvx v25,$x10,$key_ # pre-load round[2]
|
|
+
|
|
+ #lvx $inptail,0,$inp # "caller" already did this
|
|
+ #addi $inp,$inp,15 # 15 is not typo
|
|
+ subi $inp,$inp,15 # undo "caller"
|
|
+
|
|
+ le?li $idx,8
|
|
+ lvx_u $in0,$x00,$inp # load first 8 "words"
|
|
+ le?lvsl $inpperm,0,$idx
|
|
+ le?vspltisb $tmp,0x0f
|
|
+ lvx_u $in1,$x10,$inp
|
|
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
|
|
+ lvx_u $in2,$x20,$inp
|
|
+ le?vperm $in0,$in0,$in0,$inpperm
|
|
+ lvx_u $in3,$x30,$inp
|
|
+ le?vperm $in1,$in1,$in1,$inpperm
|
|
+ lvx_u $in4,$x40,$inp
|
|
+ le?vperm $in2,$in2,$in2,$inpperm
|
|
+ vxor $out0,$in0,$rndkey0
|
|
+ lvx_u $in5,$x50,$inp
|
|
+ le?vperm $in3,$in3,$in3,$inpperm
|
|
+ vxor $out1,$in1,$rndkey0
|
|
+ lvx_u $in6,$x60,$inp
|
|
+ le?vperm $in4,$in4,$in4,$inpperm
|
|
+ vxor $out2,$in2,$rndkey0
|
|
+ lvx_u $in7,$x70,$inp
|
|
+ addi $inp,$inp,0x80
|
|
+ le?vperm $in5,$in5,$in5,$inpperm
|
|
+ vxor $out3,$in3,$rndkey0
|
|
+ le?vperm $in6,$in6,$in6,$inpperm
|
|
+ vxor $out4,$in4,$rndkey0
|
|
+ le?vperm $in7,$in7,$in7,$inpperm
|
|
+ vxor $out5,$in5,$rndkey0
|
|
+ vxor $out6,$in6,$rndkey0
|
|
+ vxor $out7,$in7,$rndkey0
|
|
+
|
|
+ mtctr $rounds
|
|
+ b Loop_cbc_dec8x
|
|
+.align 5
|
|
+Loop_cbc_dec8x:
|
|
+ vncipher $out0,$out0,v24
|
|
+ vncipher $out1,$out1,v24
|
|
+ vncipher $out2,$out2,v24
|
|
+ vncipher $out3,$out3,v24
|
|
+ vncipher $out4,$out4,v24
|
|
+ vncipher $out5,$out5,v24
|
|
+ vncipher $out6,$out6,v24
|
|
+ vncipher $out7,$out7,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vncipher $out0,$out0,v25
|
|
+ vncipher $out1,$out1,v25
|
|
+ vncipher $out2,$out2,v25
|
|
+ vncipher $out3,$out3,v25
|
|
+ vncipher $out4,$out4,v25
|
|
+ vncipher $out5,$out5,v25
|
|
+ vncipher $out6,$out6,v25
|
|
+ vncipher $out7,$out7,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz Loop_cbc_dec8x
|
|
+
|
|
+ subic $len,$len,128 # $len-=128
|
|
+ vncipher $out0,$out0,v24
|
|
+ vncipher $out1,$out1,v24
|
|
+ vncipher $out2,$out2,v24
|
|
+ vncipher $out3,$out3,v24
|
|
+ vncipher $out4,$out4,v24
|
|
+ vncipher $out5,$out5,v24
|
|
+ vncipher $out6,$out6,v24
|
|
+ vncipher $out7,$out7,v24
|
|
+
|
|
+ subfe. r0,r0,r0 # borrow?-1:0
|
|
+ vncipher $out0,$out0,v25
|
|
+ vncipher $out1,$out1,v25
|
|
+ vncipher $out2,$out2,v25
|
|
+ vncipher $out3,$out3,v25
|
|
+ vncipher $out4,$out4,v25
|
|
+ vncipher $out5,$out5,v25
|
|
+ vncipher $out6,$out6,v25
|
|
+ vncipher $out7,$out7,v25
|
|
+
|
|
+ and r0,r0,$len
|
|
+ vncipher $out0,$out0,v26
|
|
+ vncipher $out1,$out1,v26
|
|
+ vncipher $out2,$out2,v26
|
|
+ vncipher $out3,$out3,v26
|
|
+ vncipher $out4,$out4,v26
|
|
+ vncipher $out5,$out5,v26
|
|
+ vncipher $out6,$out6,v26
|
|
+ vncipher $out7,$out7,v26
|
|
+
|
|
+ add $inp,$inp,r0 # $inp is adjusted in such
|
|
+ # way that at exit from the
|
|
+ # loop inX-in7 are loaded
|
|
+ # with last "words"
|
|
+ vncipher $out0,$out0,v27
|
|
+ vncipher $out1,$out1,v27
|
|
+ vncipher $out2,$out2,v27
|
|
+ vncipher $out3,$out3,v27
|
|
+ vncipher $out4,$out4,v27
|
|
+ vncipher $out5,$out5,v27
|
|
+ vncipher $out6,$out6,v27
|
|
+ vncipher $out7,$out7,v27
|
|
+
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vncipher $out0,$out0,v28
|
|
+ vncipher $out1,$out1,v28
|
|
+ vncipher $out2,$out2,v28
|
|
+ vncipher $out3,$out3,v28
|
|
+ vncipher $out4,$out4,v28
|
|
+ vncipher $out5,$out5,v28
|
|
+ vncipher $out6,$out6,v28
|
|
+ vncipher $out7,$out7,v28
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+
|
|
+ vncipher $out0,$out0,v29
|
|
+ vncipher $out1,$out1,v29
|
|
+ vncipher $out2,$out2,v29
|
|
+ vncipher $out3,$out3,v29
|
|
+ vncipher $out4,$out4,v29
|
|
+ vncipher $out5,$out5,v29
|
|
+ vncipher $out6,$out6,v29
|
|
+ vncipher $out7,$out7,v29
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+
|
|
+ vncipher $out0,$out0,v30
|
|
+ vxor $ivec,$ivec,v31 # xor with last round key
|
|
+ vncipher $out1,$out1,v30
|
|
+ vxor $in0,$in0,v31
|
|
+ vncipher $out2,$out2,v30
|
|
+ vxor $in1,$in1,v31
|
|
+ vncipher $out3,$out3,v30
|
|
+ vxor $in2,$in2,v31
|
|
+ vncipher $out4,$out4,v30
|
|
+ vxor $in3,$in3,v31
|
|
+ vncipher $out5,$out5,v30
|
|
+ vxor $in4,$in4,v31
|
|
+ vncipher $out6,$out6,v30
|
|
+ vxor $in5,$in5,v31
|
|
+ vncipher $out7,$out7,v30
|
|
+ vxor $in6,$in6,v31
|
|
+
|
|
+ vncipherlast $out0,$out0,$ivec
|
|
+ vncipherlast $out1,$out1,$in0
|
|
+ lvx_u $in0,$x00,$inp # load next input block
|
|
+ vncipherlast $out2,$out2,$in1
|
|
+ lvx_u $in1,$x10,$inp
|
|
+ vncipherlast $out3,$out3,$in2
|
|
+ le?vperm $in0,$in0,$in0,$inpperm
|
|
+ lvx_u $in2,$x20,$inp
|
|
+ vncipherlast $out4,$out4,$in3
|
|
+ le?vperm $in1,$in1,$in1,$inpperm
|
|
+ lvx_u $in3,$x30,$inp
|
|
+ vncipherlast $out5,$out5,$in4
|
|
+ le?vperm $in2,$in2,$in2,$inpperm
|
|
+ lvx_u $in4,$x40,$inp
|
|
+ vncipherlast $out6,$out6,$in5
|
|
+ le?vperm $in3,$in3,$in3,$inpperm
|
|
+ lvx_u $in5,$x50,$inp
|
|
+ vncipherlast $out7,$out7,$in6
|
|
+ le?vperm $in4,$in4,$in4,$inpperm
|
|
+ lvx_u $in6,$x60,$inp
|
|
+ vmr $ivec,$in7
|
|
+ le?vperm $in5,$in5,$in5,$inpperm
|
|
+ lvx_u $in7,$x70,$inp
|
|
+ addi $inp,$inp,0x80
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ stvx_u $out0,$x00,$out
|
|
+ le?vperm $in6,$in6,$in6,$inpperm
|
|
+ vxor $out0,$in0,$rndkey0
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $in7,$in7,$in7,$inpperm
|
|
+ vxor $out1,$in1,$rndkey0
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ vxor $out2,$in2,$rndkey0
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ vxor $out3,$in3,$rndkey0
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ stvx_u $out4,$x40,$out
|
|
+ vxor $out4,$in4,$rndkey0
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ stvx_u $out5,$x50,$out
|
|
+ vxor $out5,$in5,$rndkey0
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out6,$x60,$out
|
|
+ vxor $out6,$in6,$rndkey0
|
|
+ stvx_u $out7,$x70,$out
|
|
+ addi $out,$out,0x80
|
|
+ vxor $out7,$in7,$rndkey0
|
|
+
|
|
+ mtctr $rounds
|
|
+ beq Loop_cbc_dec8x # did $len-=128 borrow?
|
|
+
|
|
+ addic. $len,$len,128
|
|
+ beq Lcbc_dec8x_done
|
|
+ nop
|
|
+ nop
|
|
+
|
|
+Loop_cbc_dec8x_tail: # up to 7 "words" tail...
|
|
+ vncipher $out1,$out1,v24
|
|
+ vncipher $out2,$out2,v24
|
|
+ vncipher $out3,$out3,v24
|
|
+ vncipher $out4,$out4,v24
|
|
+ vncipher $out5,$out5,v24
|
|
+ vncipher $out6,$out6,v24
|
|
+ vncipher $out7,$out7,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vncipher $out1,$out1,v25
|
|
+ vncipher $out2,$out2,v25
|
|
+ vncipher $out3,$out3,v25
|
|
+ vncipher $out4,$out4,v25
|
|
+ vncipher $out5,$out5,v25
|
|
+ vncipher $out6,$out6,v25
|
|
+ vncipher $out7,$out7,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz Loop_cbc_dec8x_tail
|
|
+
|
|
+ vncipher $out1,$out1,v24
|
|
+ vncipher $out2,$out2,v24
|
|
+ vncipher $out3,$out3,v24
|
|
+ vncipher $out4,$out4,v24
|
|
+ vncipher $out5,$out5,v24
|
|
+ vncipher $out6,$out6,v24
|
|
+ vncipher $out7,$out7,v24
|
|
+
|
|
+ vncipher $out1,$out1,v25
|
|
+ vncipher $out2,$out2,v25
|
|
+ vncipher $out3,$out3,v25
|
|
+ vncipher $out4,$out4,v25
|
|
+ vncipher $out5,$out5,v25
|
|
+ vncipher $out6,$out6,v25
|
|
+ vncipher $out7,$out7,v25
|
|
+
|
|
+ vncipher $out1,$out1,v26
|
|
+ vncipher $out2,$out2,v26
|
|
+ vncipher $out3,$out3,v26
|
|
+ vncipher $out4,$out4,v26
|
|
+ vncipher $out5,$out5,v26
|
|
+ vncipher $out6,$out6,v26
|
|
+ vncipher $out7,$out7,v26
|
|
+
|
|
+ vncipher $out1,$out1,v27
|
|
+ vncipher $out2,$out2,v27
|
|
+ vncipher $out3,$out3,v27
|
|
+ vncipher $out4,$out4,v27
|
|
+ vncipher $out5,$out5,v27
|
|
+ vncipher $out6,$out6,v27
|
|
+ vncipher $out7,$out7,v27
|
|
+
|
|
+ vncipher $out1,$out1,v28
|
|
+ vncipher $out2,$out2,v28
|
|
+ vncipher $out3,$out3,v28
|
|
+ vncipher $out4,$out4,v28
|
|
+ vncipher $out5,$out5,v28
|
|
+ vncipher $out6,$out6,v28
|
|
+ vncipher $out7,$out7,v28
|
|
+
|
|
+ vncipher $out1,$out1,v29
|
|
+ vncipher $out2,$out2,v29
|
|
+ vncipher $out3,$out3,v29
|
|
+ vncipher $out4,$out4,v29
|
|
+ vncipher $out5,$out5,v29
|
|
+ vncipher $out6,$out6,v29
|
|
+ vncipher $out7,$out7,v29
|
|
+
|
|
+ vncipher $out1,$out1,v30
|
|
+ vxor $ivec,$ivec,v31 # last round key
|
|
+ vncipher $out2,$out2,v30
|
|
+ vxor $in1,$in1,v31
|
|
+ vncipher $out3,$out3,v30
|
|
+ vxor $in2,$in2,v31
|
|
+ vncipher $out4,$out4,v30
|
|
+ vxor $in3,$in3,v31
|
|
+ vncipher $out5,$out5,v30
|
|
+ vxor $in4,$in4,v31
|
|
+ vncipher $out6,$out6,v30
|
|
+ vxor $in5,$in5,v31
|
|
+ vncipher $out7,$out7,v30
|
|
+ vxor $in6,$in6,v31
|
|
+
|
|
+ cmplwi $len,32 # switch($len)
|
|
+ blt Lcbc_dec8x_one
|
|
+ nop
|
|
+ beq Lcbc_dec8x_two
|
|
+ cmplwi $len,64
|
|
+ blt Lcbc_dec8x_three
|
|
+ nop
|
|
+ beq Lcbc_dec8x_four
|
|
+ cmplwi $len,96
|
|
+ blt Lcbc_dec8x_five
|
|
+ nop
|
|
+ beq Lcbc_dec8x_six
|
|
+
|
|
+Lcbc_dec8x_seven:
|
|
+ vncipherlast $out1,$out1,$ivec
|
|
+ vncipherlast $out2,$out2,$in1
|
|
+ vncipherlast $out3,$out3,$in2
|
|
+ vncipherlast $out4,$out4,$in3
|
|
+ vncipherlast $out5,$out5,$in4
|
|
+ vncipherlast $out6,$out6,$in5
|
|
+ vncipherlast $out7,$out7,$in6
|
|
+ vmr $ivec,$in7
|
|
+
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ stvx_u $out1,$x00,$out
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ stvx_u $out2,$x10,$out
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ stvx_u $out3,$x20,$out
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ stvx_u $out4,$x30,$out
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ stvx_u $out5,$x40,$out
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out6,$x50,$out
|
|
+ stvx_u $out7,$x60,$out
|
|
+ addi $out,$out,0x70
|
|
+ b Lcbc_dec8x_done
|
|
+
|
|
+.align 5
|
|
+Lcbc_dec8x_six:
|
|
+ vncipherlast $out2,$out2,$ivec
|
|
+ vncipherlast $out3,$out3,$in2
|
|
+ vncipherlast $out4,$out4,$in3
|
|
+ vncipherlast $out5,$out5,$in4
|
|
+ vncipherlast $out6,$out6,$in5
|
|
+ vncipherlast $out7,$out7,$in6
|
|
+ vmr $ivec,$in7
|
|
+
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ stvx_u $out2,$x00,$out
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ stvx_u $out3,$x10,$out
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ stvx_u $out4,$x20,$out
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ stvx_u $out5,$x30,$out
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out6,$x40,$out
|
|
+ stvx_u $out7,$x50,$out
|
|
+ addi $out,$out,0x60
|
|
+ b Lcbc_dec8x_done
|
|
+
|
|
+.align 5
|
|
+Lcbc_dec8x_five:
|
|
+ vncipherlast $out3,$out3,$ivec
|
|
+ vncipherlast $out4,$out4,$in3
|
|
+ vncipherlast $out5,$out5,$in4
|
|
+ vncipherlast $out6,$out6,$in5
|
|
+ vncipherlast $out7,$out7,$in6
|
|
+ vmr $ivec,$in7
|
|
+
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ stvx_u $out3,$x00,$out
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ stvx_u $out4,$x10,$out
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ stvx_u $out5,$x20,$out
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out6,$x30,$out
|
|
+ stvx_u $out7,$x40,$out
|
|
+ addi $out,$out,0x50
|
|
+ b Lcbc_dec8x_done
|
|
+
|
|
+.align 5
|
|
+Lcbc_dec8x_four:
|
|
+ vncipherlast $out4,$out4,$ivec
|
|
+ vncipherlast $out5,$out5,$in4
|
|
+ vncipherlast $out6,$out6,$in5
|
|
+ vncipherlast $out7,$out7,$in6
|
|
+ vmr $ivec,$in7
|
|
+
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ stvx_u $out4,$x00,$out
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ stvx_u $out5,$x10,$out
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out6,$x20,$out
|
|
+ stvx_u $out7,$x30,$out
|
|
+ addi $out,$out,0x40
|
|
+ b Lcbc_dec8x_done
|
|
+
|
|
+.align 5
|
|
+Lcbc_dec8x_three:
|
|
+ vncipherlast $out5,$out5,$ivec
|
|
+ vncipherlast $out6,$out6,$in5
|
|
+ vncipherlast $out7,$out7,$in6
|
|
+ vmr $ivec,$in7
|
|
+
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ stvx_u $out5,$x00,$out
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out6,$x10,$out
|
|
+ stvx_u $out7,$x20,$out
|
|
+ addi $out,$out,0x30
|
|
+ b Lcbc_dec8x_done
|
|
+
|
|
+.align 5
|
|
+Lcbc_dec8x_two:
|
|
+ vncipherlast $out6,$out6,$ivec
|
|
+ vncipherlast $out7,$out7,$in6
|
|
+ vmr $ivec,$in7
|
|
+
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out6,$x00,$out
|
|
+ stvx_u $out7,$x10,$out
|
|
+ addi $out,$out,0x20
|
|
+ b Lcbc_dec8x_done
|
|
+
|
|
+.align 5
|
|
+Lcbc_dec8x_one:
|
|
+ vncipherlast $out7,$out7,$ivec
|
|
+ vmr $ivec,$in7
|
|
+
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out7,0,$out
|
|
+ addi $out,$out,0x10
|
|
+
|
|
+Lcbc_dec8x_done:
|
|
+ le?vperm $ivec,$ivec,$ivec,$inpperm
|
|
+ stvx_u $ivec,0,$ivp # write [unaligned] iv
|
|
+
|
|
+ li r10,`$FRAME+15`
|
|
+ li r11,`$FRAME+31`
|
|
+ stvx $inpperm,r10,$sp # wipe copies of round keys
|
|
+ addi r10,r10,32
|
|
+ stvx $inpperm,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $inpperm,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $inpperm,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $inpperm,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $inpperm,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $inpperm,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $inpperm,r11,$sp
|
|
+ addi r11,r11,32
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ lvx v20,r10,$sp # ABI says so
|
|
+ addi r10,r10,32
|
|
+ lvx v21,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v22,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v23,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v24,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v25,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v26,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v27,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v28,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v29,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v30,r10,$sp
|
|
+ lvx v31,r11,$sp
|
|
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x04,0,0x80,6,6,0
|
|
+ .long 0
|
|
+.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
|
|
+___
|
|
+}} }}}
|
|
+
|
|
+#########################################################################
|
|
+{{{ # CTR procedure[s] #
|
|
+my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
|
|
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
|
|
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
|
|
+ map("v$_",(4..11));
|
|
+my $dat=$tmp;
|
|
+
|
|
+$code.=<<___;
|
|
+.globl .${prefix}_ctr32_encrypt_blocks
|
|
+.align 5
|
|
+.${prefix}_ctr32_encrypt_blocks:
|
|
+ ${UCMP}i $len,1
|
|
+ bltlr-
|
|
+
|
|
+ lis r0,0xfff0
|
|
+ mfspr $vrsave,256
|
|
+ mtspr 256,r0
|
|
+
|
|
+ li $idx,15
|
|
+ vxor $rndkey0,$rndkey0,$rndkey0
|
|
+ le?vspltisb $tmp,0x0f
|
|
+
|
|
+ lvx $ivec,0,$ivp # load [unaligned] iv
|
|
+ lvsl $inpperm,0,$ivp
|
|
+ lvx $inptail,$idx,$ivp
|
|
+ vspltisb $one,1
|
|
+ le?vxor $inpperm,$inpperm,$tmp
|
|
+ vperm $ivec,$ivec,$inptail,$inpperm
|
|
+ vsldoi $one,$rndkey0,$one,1
|
|
+
|
|
+ neg r11,$inp
|
|
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
|
|
+ lwz $rounds,240($key)
|
|
+
|
|
+ lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,15 # 15 is not typo
|
|
+ le?vxor $inpperm,$inpperm,$tmp
|
|
+
|
|
+ srwi $rounds,$rounds,1
|
|
+ li $idx,16
|
|
+ subi $rounds,$rounds,1
|
|
+
|
|
+ ${UCMP}i $len,8
|
|
+ bge _aesp8_ctr32_encrypt8x
|
|
+
|
|
+ ?lvsr $outperm,0,$out # prepare for unaligned store
|
|
+ vspltisb $outmask,-1
|
|
+ lvx $outhead,0,$out
|
|
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
+ le?vxor $outperm,$outperm,$tmp
|
|
+
|
|
+ lvx $rndkey0,0,$key
|
|
+ mtctr $rounds
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $inout,$ivec,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ b Loop_ctr32_enc
|
|
+
|
|
+.align 5
|
|
+Loop_ctr32_enc:
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Loop_ctr32_enc
|
|
+
|
|
+ vadduwm $ivec,$ivec,$one
|
|
+ vmr $dat,$inptail
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,16
|
|
+ subic. $len,$len,1 # blocks--
|
|
+
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ vperm $dat,$dat,$inptail,$inpperm
|
|
+ li $idx,16
|
|
+ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
|
|
+ lvx $rndkey0,0,$key
|
|
+ vxor $dat,$dat,$rndkey1 # last round key
|
|
+ vcipherlast $inout,$inout,$dat
|
|
+
|
|
+ lvx $rndkey1,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ vperm $inout,$inout,$inout,$outperm
|
|
+ vsel $dat,$outhead,$inout,$outmask
|
|
+ mtctr $rounds
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vmr $outhead,$inout
|
|
+ vxor $inout,$ivec,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key
|
|
+ addi $idx,$idx,16
|
|
+ stvx $dat,0,$out
|
|
+ addi $out,$out,16
|
|
+ bne Loop_ctr32_enc
|
|
+
|
|
+ addi $out,$out,-1
|
|
+ lvx $inout,0,$out # redundant in aligned case
|
|
+ vsel $inout,$outhead,$inout,$outmask
|
|
+ stvx $inout,0,$out
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,6,0
|
|
+ .long 0
|
|
+___
|
|
+#########################################################################
|
|
+{{ # Optimized CTR procedure #
|
|
+my $key_="r11";
|
|
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
|
|
+ $x00=0 if ($flavour =~ /osx/);
|
|
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
|
|
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
|
|
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
|
+ # v26-v31 last 6 round keys
|
|
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
|
|
+my ($two,$three,$four)=($outhead,$outperm,$outmask);
|
|
+
|
|
+$code.=<<___;
|
|
+.align 5
|
|
+_aesp8_ctr32_encrypt8x:
|
|
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
+ li r10,`$FRAME+8*16+15`
|
|
+ li r11,`$FRAME+8*16+31`
|
|
+ stvx v20,r10,$sp # ABI says so
|
|
+ addi r10,r10,32
|
|
+ stvx v21,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v22,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v23,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v24,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v25,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v26,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v27,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v28,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v29,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v30,r10,$sp
|
|
+ stvx v31,r11,$sp
|
|
+ li r0,-1
|
|
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
+ li $x10,0x10
|
|
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
+ li $x20,0x20
|
|
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
+ li $x30,0x30
|
|
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
+ li $x40,0x40
|
|
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
+ li $x50,0x50
|
|
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
+ li $x60,0x60
|
|
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
+ li $x70,0x70
|
|
+ mtspr 256,r0
|
|
+
|
|
+ subi $rounds,$rounds,3 # -4 in total
|
|
+
|
|
+ lvx $rndkey0,$x00,$key # load key schedule
|
|
+ lvx v30,$x10,$key
|
|
+ addi $key,$key,0x20
|
|
+ lvx v31,$x00,$key
|
|
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
+ addi $key_,$sp,`$FRAME+15`
|
|
+ mtctr $rounds
|
|
+
|
|
+Load_ctr32_enc_key:
|
|
+ ?vperm v24,v30,v31,$keyperm
|
|
+ lvx v30,$x10,$key
|
|
+ addi $key,$key,0x20
|
|
+ stvx v24,$x00,$key_ # off-load round[1]
|
|
+ ?vperm v25,v31,v30,$keyperm
|
|
+ lvx v31,$x00,$key
|
|
+ stvx v25,$x10,$key_ # off-load round[2]
|
|
+ addi $key_,$key_,0x20
|
|
+ bdnz Load_ctr32_enc_key
|
|
+
|
|
+ lvx v26,$x10,$key
|
|
+ ?vperm v24,v30,v31,$keyperm
|
|
+ lvx v27,$x20,$key
|
|
+ stvx v24,$x00,$key_ # off-load round[3]
|
|
+ ?vperm v25,v31,v26,$keyperm
|
|
+ lvx v28,$x30,$key
|
|
+ stvx v25,$x10,$key_ # off-load round[4]
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ ?vperm v26,v26,v27,$keyperm
|
|
+ lvx v29,$x40,$key
|
|
+ ?vperm v27,v27,v28,$keyperm
|
|
+ lvx v30,$x50,$key
|
|
+ ?vperm v28,v28,v29,$keyperm
|
|
+ lvx v31,$x60,$key
|
|
+ ?vperm v29,v29,v30,$keyperm
|
|
+ lvx $out0,$x70,$key # borrow $out0
|
|
+ ?vperm v30,v30,v31,$keyperm
|
|
+ lvx v24,$x00,$key_ # pre-load round[1]
|
|
+ ?vperm v31,v31,$out0,$keyperm
|
|
+ lvx v25,$x10,$key_ # pre-load round[2]
|
|
+
|
|
+ vadduwm $two,$one,$one
|
|
+ subi $inp,$inp,15 # undo "caller"
|
|
+ $SHL $len,$len,4
|
|
+
|
|
+ vadduwm $out1,$ivec,$one # counter values ...
|
|
+ vadduwm $out2,$ivec,$two
|
|
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
|
|
+ le?li $idx,8
|
|
+ vadduwm $out3,$out1,$two
|
|
+ vxor $out1,$out1,$rndkey0
|
|
+ le?lvsl $inpperm,0,$idx
|
|
+ vadduwm $out4,$out2,$two
|
|
+ vxor $out2,$out2,$rndkey0
|
|
+ le?vspltisb $tmp,0x0f
|
|
+ vadduwm $out5,$out3,$two
|
|
+ vxor $out3,$out3,$rndkey0
|
|
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
|
|
+ vadduwm $out6,$out4,$two
|
|
+ vxor $out4,$out4,$rndkey0
|
|
+ vadduwm $out7,$out5,$two
|
|
+ vxor $out5,$out5,$rndkey0
|
|
+ vadduwm $ivec,$out6,$two # next counter value
|
|
+ vxor $out6,$out6,$rndkey0
|
|
+ vxor $out7,$out7,$rndkey0
|
|
+
|
|
+ mtctr $rounds
|
|
+ b Loop_ctr32_enc8x
|
|
+.align 5
|
|
+Loop_ctr32_enc8x:
|
|
+ vcipher $out0,$out0,v24
|
|
+ vcipher $out1,$out1,v24
|
|
+ vcipher $out2,$out2,v24
|
|
+ vcipher $out3,$out3,v24
|
|
+ vcipher $out4,$out4,v24
|
|
+ vcipher $out5,$out5,v24
|
|
+ vcipher $out6,$out6,v24
|
|
+ vcipher $out7,$out7,v24
|
|
+Loop_ctr32_enc8x_middle:
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vcipher $out0,$out0,v25
|
|
+ vcipher $out1,$out1,v25
|
|
+ vcipher $out2,$out2,v25
|
|
+ vcipher $out3,$out3,v25
|
|
+ vcipher $out4,$out4,v25
|
|
+ vcipher $out5,$out5,v25
|
|
+ vcipher $out6,$out6,v25
|
|
+ vcipher $out7,$out7,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz Loop_ctr32_enc8x
|
|
+
|
|
+ subic r11,$len,256 # $len-256, borrow $key_
|
|
+ vcipher $out0,$out0,v24
|
|
+ vcipher $out1,$out1,v24
|
|
+ vcipher $out2,$out2,v24
|
|
+ vcipher $out3,$out3,v24
|
|
+ vcipher $out4,$out4,v24
|
|
+ vcipher $out5,$out5,v24
|
|
+ vcipher $out6,$out6,v24
|
|
+ vcipher $out7,$out7,v24
|
|
+
|
|
+ subfe r0,r0,r0 # borrow?-1:0
|
|
+ vcipher $out0,$out0,v25
|
|
+ vcipher $out1,$out1,v25
|
|
+ vcipher $out2,$out2,v25
|
|
+ vcipher $out3,$out3,v25
|
|
+ vcipher $out4,$out4,v25
|
|
+ vcipher $out5,$out5,v25
|
|
+ vcipher $out6,$out6,v25
|
|
+ vcipher $out7,$out7,v25
|
|
+
|
|
+ and r0,r0,r11
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vcipher $out0,$out0,v26
|
|
+ vcipher $out1,$out1,v26
|
|
+ vcipher $out2,$out2,v26
|
|
+ vcipher $out3,$out3,v26
|
|
+ vcipher $out4,$out4,v26
|
|
+ vcipher $out5,$out5,v26
|
|
+ vcipher $out6,$out6,v26
|
|
+ vcipher $out7,$out7,v26
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+
|
|
+ subic $len,$len,129 # $len-=129
|
|
+ vcipher $out0,$out0,v27
|
|
+ addi $len,$len,1 # $len-=128 really
|
|
+ vcipher $out1,$out1,v27
|
|
+ vcipher $out2,$out2,v27
|
|
+ vcipher $out3,$out3,v27
|
|
+ vcipher $out4,$out4,v27
|
|
+ vcipher $out5,$out5,v27
|
|
+ vcipher $out6,$out6,v27
|
|
+ vcipher $out7,$out7,v27
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+
|
|
+ vcipher $out0,$out0,v28
|
|
+ lvx_u $in0,$x00,$inp # load input
|
|
+ vcipher $out1,$out1,v28
|
|
+ lvx_u $in1,$x10,$inp
|
|
+ vcipher $out2,$out2,v28
|
|
+ lvx_u $in2,$x20,$inp
|
|
+ vcipher $out3,$out3,v28
|
|
+ lvx_u $in3,$x30,$inp
|
|
+ vcipher $out4,$out4,v28
|
|
+ lvx_u $in4,$x40,$inp
|
|
+ vcipher $out5,$out5,v28
|
|
+ lvx_u $in5,$x50,$inp
|
|
+ vcipher $out6,$out6,v28
|
|
+ lvx_u $in6,$x60,$inp
|
|
+ vcipher $out7,$out7,v28
|
|
+ lvx_u $in7,$x70,$inp
|
|
+ addi $inp,$inp,0x80
|
|
+
|
|
+ vcipher $out0,$out0,v29
|
|
+ le?vperm $in0,$in0,$in0,$inpperm
|
|
+ vcipher $out1,$out1,v29
|
|
+ le?vperm $in1,$in1,$in1,$inpperm
|
|
+ vcipher $out2,$out2,v29
|
|
+ le?vperm $in2,$in2,$in2,$inpperm
|
|
+ vcipher $out3,$out3,v29
|
|
+ le?vperm $in3,$in3,$in3,$inpperm
|
|
+ vcipher $out4,$out4,v29
|
|
+ le?vperm $in4,$in4,$in4,$inpperm
|
|
+ vcipher $out5,$out5,v29
|
|
+ le?vperm $in5,$in5,$in5,$inpperm
|
|
+ vcipher $out6,$out6,v29
|
|
+ le?vperm $in6,$in6,$in6,$inpperm
|
|
+ vcipher $out7,$out7,v29
|
|
+ le?vperm $in7,$in7,$in7,$inpperm
|
|
+
|
|
+ add $inp,$inp,r0 # $inp is adjusted in such
|
|
+ # way that at exit from the
|
|
+ # loop inX-in7 are loaded
|
|
+ # with last "words"
|
|
+ subfe. r0,r0,r0 # borrow?-1:0
|
|
+ vcipher $out0,$out0,v30
|
|
+ vxor $in0,$in0,v31 # xor with last round key
|
|
+ vcipher $out1,$out1,v30
|
|
+ vxor $in1,$in1,v31
|
|
+ vcipher $out2,$out2,v30
|
|
+ vxor $in2,$in2,v31
|
|
+ vcipher $out3,$out3,v30
|
|
+ vxor $in3,$in3,v31
|
|
+ vcipher $out4,$out4,v30
|
|
+ vxor $in4,$in4,v31
|
|
+ vcipher $out5,$out5,v30
|
|
+ vxor $in5,$in5,v31
|
|
+ vcipher $out6,$out6,v30
|
|
+ vxor $in6,$in6,v31
|
|
+ vcipher $out7,$out7,v30
|
|
+ vxor $in7,$in7,v31
|
|
+
|
|
+ bne Lctr32_enc8x_break # did $len-129 borrow?
|
|
+
|
|
+ vcipherlast $in0,$out0,$in0
|
|
+ vcipherlast $in1,$out1,$in1
|
|
+ vadduwm $out1,$ivec,$one # counter values ...
|
|
+ vcipherlast $in2,$out2,$in2
|
|
+ vadduwm $out2,$ivec,$two
|
|
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
|
|
+ vcipherlast $in3,$out3,$in3
|
|
+ vadduwm $out3,$out1,$two
|
|
+ vxor $out1,$out1,$rndkey0
|
|
+ vcipherlast $in4,$out4,$in4
|
|
+ vadduwm $out4,$out2,$two
|
|
+ vxor $out2,$out2,$rndkey0
|
|
+ vcipherlast $in5,$out5,$in5
|
|
+ vadduwm $out5,$out3,$two
|
|
+ vxor $out3,$out3,$rndkey0
|
|
+ vcipherlast $in6,$out6,$in6
|
|
+ vadduwm $out6,$out4,$two
|
|
+ vxor $out4,$out4,$rndkey0
|
|
+ vcipherlast $in7,$out7,$in7
|
|
+ vadduwm $out7,$out5,$two
|
|
+ vxor $out5,$out5,$rndkey0
|
|
+ le?vperm $in0,$in0,$in0,$inpperm
|
|
+ vadduwm $ivec,$out6,$two # next counter value
|
|
+ vxor $out6,$out6,$rndkey0
|
|
+ le?vperm $in1,$in1,$in1,$inpperm
|
|
+ vxor $out7,$out7,$rndkey0
|
|
+ mtctr $rounds
|
|
+
|
|
+ vcipher $out0,$out0,v24
|
|
+ stvx_u $in0,$x00,$out
|
|
+ le?vperm $in2,$in2,$in2,$inpperm
|
|
+ vcipher $out1,$out1,v24
|
|
+ stvx_u $in1,$x10,$out
|
|
+ le?vperm $in3,$in3,$in3,$inpperm
|
|
+ vcipher $out2,$out2,v24
|
|
+ stvx_u $in2,$x20,$out
|
|
+ le?vperm $in4,$in4,$in4,$inpperm
|
|
+ vcipher $out3,$out3,v24
|
|
+ stvx_u $in3,$x30,$out
|
|
+ le?vperm $in5,$in5,$in5,$inpperm
|
|
+ vcipher $out4,$out4,v24
|
|
+ stvx_u $in4,$x40,$out
|
|
+ le?vperm $in6,$in6,$in6,$inpperm
|
|
+ vcipher $out5,$out5,v24
|
|
+ stvx_u $in5,$x50,$out
|
|
+ le?vperm $in7,$in7,$in7,$inpperm
|
|
+ vcipher $out6,$out6,v24
|
|
+ stvx_u $in6,$x60,$out
|
|
+ vcipher $out7,$out7,v24
|
|
+ stvx_u $in7,$x70,$out
|
|
+ addi $out,$out,0x80
|
|
+
|
|
+ b Loop_ctr32_enc8x_middle
|
|
+
|
|
+.align 5
|
|
+Lctr32_enc8x_break:
|
|
+ cmpwi $len,-0x60
|
|
+ blt Lctr32_enc8x_one
|
|
+ nop
|
|
+ beq Lctr32_enc8x_two
|
|
+ cmpwi $len,-0x40
|
|
+ blt Lctr32_enc8x_three
|
|
+ nop
|
|
+ beq Lctr32_enc8x_four
|
|
+ cmpwi $len,-0x20
|
|
+ blt Lctr32_enc8x_five
|
|
+ nop
|
|
+ beq Lctr32_enc8x_six
|
|
+ cmpwi $len,0x00
|
|
+ blt Lctr32_enc8x_seven
|
|
+
|
|
+Lctr32_enc8x_eight:
|
|
+ vcipherlast $out0,$out0,$in0
|
|
+ vcipherlast $out1,$out1,$in1
|
|
+ vcipherlast $out2,$out2,$in2
|
|
+ vcipherlast $out3,$out3,$in3
|
|
+ vcipherlast $out4,$out4,$in4
|
|
+ vcipherlast $out5,$out5,$in5
|
|
+ vcipherlast $out6,$out6,$in6
|
|
+ vcipherlast $out7,$out7,$in7
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ stvx_u $out0,$x00,$out
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ stvx_u $out4,$x40,$out
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ stvx_u $out5,$x50,$out
|
|
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
+ stvx_u $out6,$x60,$out
|
|
+ stvx_u $out7,$x70,$out
|
|
+ addi $out,$out,0x80
|
|
+ b Lctr32_enc8x_done
|
|
+
|
|
+.align 5
|
|
+Lctr32_enc8x_seven:
|
|
+ vcipherlast $out0,$out0,$in1
|
|
+ vcipherlast $out1,$out1,$in2
|
|
+ vcipherlast $out2,$out2,$in3
|
|
+ vcipherlast $out3,$out3,$in4
|
|
+ vcipherlast $out4,$out4,$in5
|
|
+ vcipherlast $out5,$out5,$in6
|
|
+ vcipherlast $out6,$out6,$in7
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ stvx_u $out0,$x00,$out
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ stvx_u $out4,$x40,$out
|
|
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
+ stvx_u $out5,$x50,$out
|
|
+ stvx_u $out6,$x60,$out
|
|
+ addi $out,$out,0x70
|
|
+ b Lctr32_enc8x_done
|
|
+
|
|
+.align 5
|
|
+Lctr32_enc8x_six:
|
|
+ vcipherlast $out0,$out0,$in2
|
|
+ vcipherlast $out1,$out1,$in3
|
|
+ vcipherlast $out2,$out2,$in4
|
|
+ vcipherlast $out3,$out3,$in5
|
|
+ vcipherlast $out4,$out4,$in6
|
|
+ vcipherlast $out5,$out5,$in7
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ stvx_u $out0,$x00,$out
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
+ stvx_u $out4,$x40,$out
|
|
+ stvx_u $out5,$x50,$out
|
|
+ addi $out,$out,0x60
|
|
+ b Lctr32_enc8x_done
|
|
+
|
|
+.align 5
|
|
+Lctr32_enc8x_five:
|
|
+ vcipherlast $out0,$out0,$in3
|
|
+ vcipherlast $out1,$out1,$in4
|
|
+ vcipherlast $out2,$out2,$in5
|
|
+ vcipherlast $out3,$out3,$in6
|
|
+ vcipherlast $out4,$out4,$in7
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ stvx_u $out0,$x00,$out
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ stvx_u $out4,$x40,$out
|
|
+ addi $out,$out,0x50
|
|
+ b Lctr32_enc8x_done
|
|
+
|
|
+.align 5
|
|
+Lctr32_enc8x_four:
|
|
+ vcipherlast $out0,$out0,$in4
|
|
+ vcipherlast $out1,$out1,$in5
|
|
+ vcipherlast $out2,$out2,$in6
|
|
+ vcipherlast $out3,$out3,$in7
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ stvx_u $out0,$x00,$out
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ stvx_u $out3,$x30,$out
|
|
+ addi $out,$out,0x40
|
|
+ b Lctr32_enc8x_done
|
|
+
|
|
+.align 5
|
|
+Lctr32_enc8x_three:
|
|
+ vcipherlast $out0,$out0,$in5
|
|
+ vcipherlast $out1,$out1,$in6
|
|
+ vcipherlast $out2,$out2,$in7
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ stvx_u $out0,$x00,$out
|
|
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ stvx_u $out2,$x20,$out
|
|
+ addi $out,$out,0x30
|
|
+ b Lctr32_enc8x_done
|
|
+
|
|
+.align 5
|
|
+Lctr32_enc8x_two:
|
|
+ vcipherlast $out0,$out0,$in6
|
|
+ vcipherlast $out1,$out1,$in7
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
+ stvx_u $out0,$x00,$out
|
|
+ stvx_u $out1,$x10,$out
|
|
+ addi $out,$out,0x20
|
|
+ b Lctr32_enc8x_done
|
|
+
|
|
+.align 5
|
|
+Lctr32_enc8x_one:
|
|
+ vcipherlast $out0,$out0,$in7
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
+ stvx_u $out0,0,$out
|
|
+ addi $out,$out,0x10
|
|
+
|
|
+Lctr32_enc8x_done:
|
|
+ li r10,`$FRAME+15`
|
|
+ li r11,`$FRAME+31`
|
|
+ stvx $inpperm,r10,$sp # wipe copies of round keys
|
|
+ addi r10,r10,32
|
|
+ stvx $inpperm,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $inpperm,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $inpperm,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $inpperm,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $inpperm,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $inpperm,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $inpperm,r11,$sp
|
|
+ addi r11,r11,32
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ lvx v20,r10,$sp # ABI says so
|
|
+ addi r10,r10,32
|
|
+ lvx v21,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v22,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v23,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v24,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v25,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v26,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v27,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v28,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v29,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v30,r10,$sp
|
|
+ lvx v31,r11,$sp
|
|
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x04,0,0x80,6,6,0
|
|
+ .long 0
|
|
+.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
|
|
+___
|
|
+}} }}}
|
|
+
|
|
+#########################################################################
|
|
+{{{ # XTS procedures #
|
|
+# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
|
|
+# const AES_KEY *key1, const AES_KEY *key2, #
|
|
+# [const] unsigned char iv[16]); #
|
|
+# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
|
|
+# input tweak value is assumed to be encrypted already, and last tweak #
|
|
+# value, one suitable for consecutive call on same chunk of data, is #
|
|
+# written back to original buffer. In addition, in "tweak chaining" #
|
|
+# mode only complete input blocks are processed. #
|
|
+
|
|
+my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
|
|
+my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
|
|
+my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
|
|
+my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
|
|
+my $taillen = $key2;
|
|
+
|
|
+ ($inp,$idx) = ($idx,$inp); # reassign
|
|
+
|
|
+$code.=<<___;
|
|
+.globl .${prefix}_xts_encrypt
|
|
+.align 5
|
|
+.${prefix}_xts_encrypt:
|
|
+ mr $inp,r3 # reassign
|
|
+ li r3,-1
|
|
+ ${UCMP}i $len,16
|
|
+ bltlr-
|
|
+
|
|
+ lis r0,0xfff0
|
|
+ mfspr r12,256 # save vrsave
|
|
+ li r11,0
|
|
+ mtspr 256,r0
|
|
+
|
|
+ vspltisb $seven,0x07 # 0x070707..07
|
|
+ le?lvsl $leperm,r11,r11
|
|
+ le?vspltisb $tmp,0x0f
|
|
+ le?vxor $leperm,$leperm,$seven
|
|
+
|
|
+ li $idx,15
|
|
+ lvx $tweak,0,$ivp # load [unaligned] iv
|
|
+ lvsl $inpperm,0,$ivp
|
|
+ lvx $inptail,$idx,$ivp
|
|
+ le?vxor $inpperm,$inpperm,$tmp
|
|
+ vperm $tweak,$tweak,$inptail,$inpperm
|
|
+
|
|
+ neg r11,$inp
|
|
+ lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
+ lvx $inout,0,$inp
|
|
+ addi $inp,$inp,15 # 15 is not typo
|
|
+ le?vxor $inpperm,$inpperm,$tmp
|
|
+
|
|
+ ${UCMP}i $key2,0 # key2==NULL?
|
|
+ beq Lxts_enc_no_key2
|
|
+
|
|
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
|
|
+ lwz $rounds,240($key2)
|
|
+ srwi $rounds,$rounds,1
|
|
+ subi $rounds,$rounds,1
|
|
+ li $idx,16
|
|
+
|
|
+ lvx $rndkey0,0,$key2
|
|
+ lvx $rndkey1,$idx,$key2
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $tweak,$tweak,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key2
|
|
+ addi $idx,$idx,16
|
|
+ mtctr $rounds
|
|
+
|
|
+Ltweak_xts_enc:
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $tweak,$tweak,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key2
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vcipher $tweak,$tweak,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key2
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Ltweak_xts_enc
|
|
+
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $tweak,$tweak,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key2
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vcipherlast $tweak,$tweak,$rndkey0
|
|
+
|
|
+ li $ivp,0 # don't chain the tweak
|
|
+ b Lxts_enc
|
|
+
|
|
+Lxts_enc_no_key2:
|
|
+ li $idx,-16
|
|
+ and $len,$len,$idx # in "tweak chaining"
|
|
+ # mode only complete
|
|
+ # blocks are processed
|
|
+Lxts_enc:
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,16
|
|
+
|
|
+ ?lvsl $keyperm,0,$key1 # prepare for unaligned key
|
|
+ lwz $rounds,240($key1)
|
|
+ srwi $rounds,$rounds,1
|
|
+ subi $rounds,$rounds,1
|
|
+ li $idx,16
|
|
+
|
|
+ vslb $eighty7,$seven,$seven # 0x808080..80
|
|
+ vor $eighty7,$eighty7,$seven # 0x878787..87
|
|
+ vspltisb $tmp,1 # 0x010101..01
|
|
+ vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
|
|
+
|
|
+ ${UCMP}i $len,96
|
|
+ bge _aesp8_xts_encrypt6x
|
|
+
|
|
+ andi. $taillen,$len,15
|
|
+ subic r0,$len,32
|
|
+ subi $taillen,$taillen,16
|
|
+ subfe r0,r0,r0
|
|
+ and r0,r0,$taillen
|
|
+ add $inp,$inp,r0
|
|
+
|
|
+ lvx $rndkey0,0,$key1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ vperm $inout,$inout,$inptail,$inpperm
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $inout,$inout,$tweak
|
|
+ vxor $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ mtctr $rounds
|
|
+ b Loop_xts_enc
|
|
+
|
|
+.align 5
|
|
+Loop_xts_enc:
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Loop_xts_enc
|
|
+
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ li $idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $rndkey0,$rndkey0,$tweak
|
|
+ vcipherlast $output,$inout,$rndkey0
|
|
+
|
|
+ le?vperm $tmp,$output,$output,$leperm
|
|
+ be?nop
|
|
+ le?stvx_u $tmp,0,$out
|
|
+ be?stvx_u $output,0,$out
|
|
+ addi $out,$out,16
|
|
+
|
|
+ subic. $len,$len,16
|
|
+ beq Lxts_enc_done
|
|
+
|
|
+ vmr $inout,$inptail
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,16
|
|
+ lvx $rndkey0,0,$key1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+
|
|
+ subic r0,$len,32
|
|
+ subfe r0,r0,r0
|
|
+ and r0,r0,$taillen
|
|
+ add $inp,$inp,r0
|
|
+
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ vperm $inout,$inout,$inptail,$inpperm
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $inout,$inout,$tweak
|
|
+ vxor $output,$output,$rndkey0 # just in case $len<16
|
|
+ vxor $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+
|
|
+ mtctr $rounds
|
|
+ ${UCMP}i $len,16
|
|
+ bge Loop_xts_enc
|
|
+
|
|
+ vxor $output,$output,$tweak
|
|
+ lvsr $inpperm,0,$len # $inpperm is no longer needed
|
|
+ vxor $inptail,$inptail,$inptail # $inptail is no longer needed
|
|
+ vspltisb $tmp,-1
|
|
+ vperm $inptail,$inptail,$tmp,$inpperm
|
|
+ vsel $inout,$inout,$output,$inptail
|
|
+
|
|
+ subi r11,$out,17
|
|
+ subi $out,$out,16
|
|
+ mtctr $len
|
|
+ li $len,16
|
|
+Loop_xts_enc_steal:
|
|
+ lbzu r0,1(r11)
|
|
+ stb r0,16(r11)
|
|
+ bdnz Loop_xts_enc_steal
|
|
+
|
|
+ mtctr $rounds
|
|
+ b Loop_xts_enc # one more time...
|
|
+
|
|
+Lxts_enc_done:
|
|
+ ${UCMP}i $ivp,0
|
|
+ beq Lxts_enc_ret
|
|
+
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ le?vperm $tweak,$tweak,$tweak,$leperm
|
|
+ stvx_u $tweak,0,$ivp
|
|
+
|
|
+Lxts_enc_ret:
|
|
+ mtspr 256,r12 # restore vrsave
|
|
+ li r3,0
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x04,0,0x80,6,6,0
|
|
+ .long 0
|
|
+.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
|
|
+
|
|
+.globl .${prefix}_xts_decrypt
|
|
+.align 5
|
|
+.${prefix}_xts_decrypt:
|
|
+ mr $inp,r3 # reassign
|
|
+ li r3,-1
|
|
+ ${UCMP}i $len,16
|
|
+ bltlr-
|
|
+
|
|
+ lis r0,0xfff8
|
|
+ mfspr r12,256 # save vrsave
|
|
+ li r11,0
|
|
+ mtspr 256,r0
|
|
+
|
|
+ andi. r0,$len,15
|
|
+ neg r0,r0
|
|
+ andi. r0,r0,16
|
|
+ sub $len,$len,r0
|
|
+
|
|
+ vspltisb $seven,0x07 # 0x070707..07
|
|
+ le?lvsl $leperm,r11,r11
|
|
+ le?vspltisb $tmp,0x0f
|
|
+ le?vxor $leperm,$leperm,$seven
|
|
+
|
|
+ li $idx,15
|
|
+ lvx $tweak,0,$ivp # load [unaligned] iv
|
|
+ lvsl $inpperm,0,$ivp
|
|
+ lvx $inptail,$idx,$ivp
|
|
+ le?vxor $inpperm,$inpperm,$tmp
|
|
+ vperm $tweak,$tweak,$inptail,$inpperm
|
|
+
|
|
+ neg r11,$inp
|
|
+ lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
+ lvx $inout,0,$inp
|
|
+ addi $inp,$inp,15 # 15 is not typo
|
|
+ le?vxor $inpperm,$inpperm,$tmp
|
|
+
|
|
+ ${UCMP}i $key2,0 # key2==NULL?
|
|
+ beq Lxts_dec_no_key2
|
|
+
|
|
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
|
|
+ lwz $rounds,240($key2)
|
|
+ srwi $rounds,$rounds,1
|
|
+ subi $rounds,$rounds,1
|
|
+ li $idx,16
|
|
+
|
|
+ lvx $rndkey0,0,$key2
|
|
+ lvx $rndkey1,$idx,$key2
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $tweak,$tweak,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key2
|
|
+ addi $idx,$idx,16
|
|
+ mtctr $rounds
|
|
+
|
|
+Ltweak_xts_dec:
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $tweak,$tweak,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key2
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vcipher $tweak,$tweak,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key2
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Ltweak_xts_dec
|
|
+
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vcipher $tweak,$tweak,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key2
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vcipherlast $tweak,$tweak,$rndkey0
|
|
+
|
|
+ li $ivp,0 # don't chain the tweak
|
|
+ b Lxts_dec
|
|
+
|
|
+Lxts_dec_no_key2:
|
|
+ neg $idx,$len
|
|
+ andi. $idx,$idx,15
|
|
+ add $len,$len,$idx # in "tweak chaining"
|
|
+ # mode only complete
|
|
+ # blocks are processed
|
|
+Lxts_dec:
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,16
|
|
+
|
|
+ ?lvsl $keyperm,0,$key1 # prepare for unaligned key
|
|
+ lwz $rounds,240($key1)
|
|
+ srwi $rounds,$rounds,1
|
|
+ subi $rounds,$rounds,1
|
|
+ li $idx,16
|
|
+
|
|
+ vslb $eighty7,$seven,$seven # 0x808080..80
|
|
+ vor $eighty7,$eighty7,$seven # 0x878787..87
|
|
+ vspltisb $tmp,1 # 0x010101..01
|
|
+ vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
|
|
+
|
|
+ ${UCMP}i $len,96
|
|
+ bge _aesp8_xts_decrypt6x
|
|
+
|
|
+ lvx $rndkey0,0,$key1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ vperm $inout,$inout,$inptail,$inpperm
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $inout,$inout,$tweak
|
|
+ vxor $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ mtctr $rounds
|
|
+
|
|
+ ${UCMP}i $len,16
|
|
+ blt Ltail_xts_dec
|
|
+ be?b Loop_xts_dec
|
|
+
|
|
+.align 5
|
|
+Loop_xts_dec:
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Loop_xts_dec
|
|
+
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ li $idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $rndkey0,$rndkey0,$tweak
|
|
+ vncipherlast $output,$inout,$rndkey0
|
|
+
|
|
+ le?vperm $tmp,$output,$output,$leperm
|
|
+ be?nop
|
|
+ le?stvx_u $tmp,0,$out
|
|
+ be?stvx_u $output,0,$out
|
|
+ addi $out,$out,16
|
|
+
|
|
+ subic. $len,$len,16
|
|
+ beq Lxts_dec_done
|
|
+
|
|
+ vmr $inout,$inptail
|
|
+ lvx $inptail,0,$inp
|
|
+ addi $inp,$inp,16
|
|
+ lvx $rndkey0,0,$key1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ vperm $inout,$inout,$inptail,$inpperm
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $inout,$inout,$tweak
|
|
+ vxor $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+
|
|
+ mtctr $rounds
|
|
+ ${UCMP}i $len,16
|
|
+ bge Loop_xts_dec
|
|
+
|
|
+Ltail_xts_dec:
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak1,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $tweak1,$tweak1,$tmp
|
|
+
|
|
+ subi $inp,$inp,16
|
|
+ add $inp,$inp,$len
|
|
+
|
|
+ vxor $inout,$inout,$tweak # :-(
|
|
+ vxor $inout,$inout,$tweak1 # :-)
|
|
+
|
|
+Loop_xts_dec_short:
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ bdnz Loop_xts_dec_short
|
|
+
|
|
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
+ vncipher $inout,$inout,$rndkey1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ li $idx,16
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+ vxor $rndkey0,$rndkey0,$tweak1
|
|
+ vncipherlast $output,$inout,$rndkey0
|
|
+
|
|
+ le?vperm $tmp,$output,$output,$leperm
|
|
+ be?nop
|
|
+ le?stvx_u $tmp,0,$out
|
|
+ be?stvx_u $output,0,$out
|
|
+
|
|
+ vmr $inout,$inptail
|
|
+ lvx $inptail,0,$inp
|
|
+ #addi $inp,$inp,16
|
|
+ lvx $rndkey0,0,$key1
|
|
+ lvx $rndkey1,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+ vperm $inout,$inout,$inptail,$inpperm
|
|
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
+
|
|
+ lvsr $inpperm,0,$len # $inpperm is no longer needed
|
|
+ vxor $inptail,$inptail,$inptail # $inptail is no longer needed
|
|
+ vspltisb $tmp,-1
|
|
+ vperm $inptail,$inptail,$tmp,$inpperm
|
|
+ vsel $inout,$inout,$output,$inptail
|
|
+
|
|
+ vxor $rndkey0,$rndkey0,$tweak
|
|
+ vxor $inout,$inout,$rndkey0
|
|
+ lvx $rndkey0,$idx,$key1
|
|
+ addi $idx,$idx,16
|
|
+
|
|
+ subi r11,$out,1
|
|
+ mtctr $len
|
|
+ li $len,16
|
|
+Loop_xts_dec_steal:
|
|
+ lbzu r0,1(r11)
|
|
+ stb r0,16(r11)
|
|
+ bdnz Loop_xts_dec_steal
|
|
+
|
|
+ mtctr $rounds
|
|
+ b Loop_xts_dec # one more time...
|
|
+
|
|
+Lxts_dec_done:
|
|
+ ${UCMP}i $ivp,0
|
|
+ beq Lxts_dec_ret
|
|
+
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ le?vperm $tweak,$tweak,$tweak,$leperm
|
|
+ stvx_u $tweak,0,$ivp
|
|
+
|
|
+Lxts_dec_ret:
|
|
+ mtspr 256,r12 # restore vrsave
|
|
+ li r3,0
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x04,0,0x80,6,6,0
|
|
+ .long 0
|
|
+.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
|
|
+___
|
|
+#########################################################################
|
|
+{{ # Optimized XTS procedures #
|
|
+my $key_=$key2;
|
|
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
|
|
+ $x00=0 if ($flavour =~ /osx/);
|
|
+my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
|
|
+my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
|
|
+my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
|
|
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
|
+ # v26-v31 last 6 round keys
|
|
+my ($keyperm)=($out0); # aliases with "caller", redundant assignment
|
|
+my $taillen=$x70;
|
|
+
|
|
+$code.=<<___;
|
|
+.align 5
|
|
+_aesp8_xts_encrypt6x:
|
|
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
+ mflr r11
|
|
+ li r7,`$FRAME+8*16+15`
|
|
+ li r3,`$FRAME+8*16+31`
|
|
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
|
|
+ stvx v20,r7,$sp # ABI says so
|
|
+ addi r7,r7,32
|
|
+ stvx v21,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v22,r7,$sp
|
|
+ addi r7,r7,32
|
|
+ stvx v23,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v24,r7,$sp
|
|
+ addi r7,r7,32
|
|
+ stvx v25,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v26,r7,$sp
|
|
+ addi r7,r7,32
|
|
+ stvx v27,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v28,r7,$sp
|
|
+ addi r7,r7,32
|
|
+ stvx v29,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v30,r7,$sp
|
|
+ stvx v31,r3,$sp
|
|
+ li r0,-1
|
|
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
+ li $x10,0x10
|
|
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
+ li $x20,0x20
|
|
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
+ li $x30,0x30
|
|
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
+ li $x40,0x40
|
|
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
+ li $x50,0x50
|
|
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
+ li $x60,0x60
|
|
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
+ li $x70,0x70
|
|
+ mtspr 256,r0
|
|
+
|
|
+ subi $rounds,$rounds,3 # -4 in total
|
|
+
|
|
+ lvx $rndkey0,$x00,$key1 # load key schedule
|
|
+ lvx v30,$x10,$key1
|
|
+ addi $key1,$key1,0x20
|
|
+ lvx v31,$x00,$key1
|
|
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
+ addi $key_,$sp,`$FRAME+15`
|
|
+ mtctr $rounds
|
|
+
|
|
+Load_xts_enc_key:
|
|
+ ?vperm v24,v30,v31,$keyperm
|
|
+ lvx v30,$x10,$key1
|
|
+ addi $key1,$key1,0x20
|
|
+ stvx v24,$x00,$key_ # off-load round[1]
|
|
+ ?vperm v25,v31,v30,$keyperm
|
|
+ lvx v31,$x00,$key1
|
|
+ stvx v25,$x10,$key_ # off-load round[2]
|
|
+ addi $key_,$key_,0x20
|
|
+ bdnz Load_xts_enc_key
|
|
+
|
|
+ lvx v26,$x10,$key1
|
|
+ ?vperm v24,v30,v31,$keyperm
|
|
+ lvx v27,$x20,$key1
|
|
+ stvx v24,$x00,$key_ # off-load round[3]
|
|
+ ?vperm v25,v31,v26,$keyperm
|
|
+ lvx v28,$x30,$key1
|
|
+ stvx v25,$x10,$key_ # off-load round[4]
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ ?vperm v26,v26,v27,$keyperm
|
|
+ lvx v29,$x40,$key1
|
|
+ ?vperm v27,v27,v28,$keyperm
|
|
+ lvx v30,$x50,$key1
|
|
+ ?vperm v28,v28,v29,$keyperm
|
|
+ lvx v31,$x60,$key1
|
|
+ ?vperm v29,v29,v30,$keyperm
|
|
+ lvx $twk5,$x70,$key1 # borrow $twk5
|
|
+ ?vperm v30,v30,v31,$keyperm
|
|
+ lvx v24,$x00,$key_ # pre-load round[1]
|
|
+ ?vperm v31,v31,$twk5,$keyperm
|
|
+ lvx v25,$x10,$key_ # pre-load round[2]
|
|
+
|
|
+ vperm $in0,$inout,$inptail,$inpperm
|
|
+ subi $inp,$inp,31 # undo "caller"
|
|
+ vxor $twk0,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out0,$in0,$twk0
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in1,$x10,$inp
|
|
+ vxor $twk1,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in1,$in1,$in1,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out1,$in1,$twk1
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in2,$x20,$inp
|
|
+ andi. $taillen,$len,15
|
|
+ vxor $twk2,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in2,$in2,$in2,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out2,$in2,$twk2
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in3,$x30,$inp
|
|
+ sub $len,$len,$taillen
|
|
+ vxor $twk3,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in3,$in3,$in3,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out3,$in3,$twk3
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in4,$x40,$inp
|
|
+ subi $len,$len,0x60
|
|
+ vxor $twk4,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in4,$in4,$in4,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out4,$in4,$twk4
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in5,$x50,$inp
|
|
+ addi $inp,$inp,0x60
|
|
+ vxor $twk5,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in5,$in5,$in5,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out5,$in5,$twk5
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ vxor v31,v31,$rndkey0
|
|
+ mtctr $rounds
|
|
+ b Loop_xts_enc6x
|
|
+
|
|
+.align 5
|
|
+Loop_xts_enc6x:
|
|
+ vcipher $out0,$out0,v24
|
|
+ vcipher $out1,$out1,v24
|
|
+ vcipher $out2,$out2,v24
|
|
+ vcipher $out3,$out3,v24
|
|
+ vcipher $out4,$out4,v24
|
|
+ vcipher $out5,$out5,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vcipher $out0,$out0,v25
|
|
+ vcipher $out1,$out1,v25
|
|
+ vcipher $out2,$out2,v25
|
|
+ vcipher $out3,$out3,v25
|
|
+ vcipher $out4,$out4,v25
|
|
+ vcipher $out5,$out5,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz Loop_xts_enc6x
|
|
+
|
|
+ subic $len,$len,96 # $len-=96
|
|
+ vxor $in0,$twk0,v31 # xor with last round key
|
|
+ vcipher $out0,$out0,v24
|
|
+ vcipher $out1,$out1,v24
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk0,$tweak,$rndkey0
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vcipher $out2,$out2,v24
|
|
+ vcipher $out3,$out3,v24
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vcipher $out4,$out4,v24
|
|
+ vcipher $out5,$out5,v24
|
|
+
|
|
+ subfe. r0,r0,r0 # borrow?-1:0
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vcipher $out0,$out0,v25
|
|
+ vcipher $out1,$out1,v25
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vcipher $out2,$out2,v25
|
|
+ vcipher $out3,$out3,v25
|
|
+ vxor $in1,$twk1,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk1,$tweak,$rndkey0
|
|
+ vcipher $out4,$out4,v25
|
|
+ vcipher $out5,$out5,v25
|
|
+
|
|
+ and r0,r0,$len
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vcipher $out0,$out0,v26
|
|
+ vcipher $out1,$out1,v26
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vcipher $out2,$out2,v26
|
|
+ vcipher $out3,$out3,v26
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vcipher $out4,$out4,v26
|
|
+ vcipher $out5,$out5,v26
|
|
+
|
|
+ add $inp,$inp,r0 # $inp is adjusted in such
|
|
+ # way that at exit from the
|
|
+ # loop inX-in5 are loaded
|
|
+ # with last "words"
|
|
+ vxor $in2,$twk2,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk2,$tweak,$rndkey0
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vcipher $out0,$out0,v27
|
|
+ vcipher $out1,$out1,v27
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vcipher $out2,$out2,v27
|
|
+ vcipher $out3,$out3,v27
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vcipher $out4,$out4,v27
|
|
+ vcipher $out5,$out5,v27
|
|
+
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vcipher $out0,$out0,v28
|
|
+ vcipher $out1,$out1,v28
|
|
+ vxor $in3,$twk3,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk3,$tweak,$rndkey0
|
|
+ vcipher $out2,$out2,v28
|
|
+ vcipher $out3,$out3,v28
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vcipher $out4,$out4,v28
|
|
+ vcipher $out5,$out5,v28
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+
|
|
+ vcipher $out0,$out0,v29
|
|
+ vcipher $out1,$out1,v29
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vcipher $out2,$out2,v29
|
|
+ vcipher $out3,$out3,v29
|
|
+ vxor $in4,$twk4,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk4,$tweak,$rndkey0
|
|
+ vcipher $out4,$out4,v29
|
|
+ vcipher $out5,$out5,v29
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+
|
|
+ vcipher $out0,$out0,v30
|
|
+ vcipher $out1,$out1,v30
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vcipher $out2,$out2,v30
|
|
+ vcipher $out3,$out3,v30
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vcipher $out4,$out4,v30
|
|
+ vcipher $out5,$out5,v30
|
|
+ vxor $in5,$twk5,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk5,$tweak,$rndkey0
|
|
+
|
|
+ vcipherlast $out0,$out0,$in0
|
|
+ lvx_u $in0,$x00,$inp # load next input block
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vcipherlast $out1,$out1,$in1
|
|
+ lvx_u $in1,$x10,$inp
|
|
+ vcipherlast $out2,$out2,$in2
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ lvx_u $in2,$x20,$inp
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vcipherlast $out3,$out3,$in3
|
|
+ le?vperm $in1,$in1,$in1,$leperm
|
|
+ lvx_u $in3,$x30,$inp
|
|
+ vcipherlast $out4,$out4,$in4
|
|
+ le?vperm $in2,$in2,$in2,$leperm
|
|
+ lvx_u $in4,$x40,$inp
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vcipherlast $tmp,$out5,$in5 # last block might be needed
|
|
+ # in stealing mode
|
|
+ le?vperm $in3,$in3,$in3,$leperm
|
|
+ lvx_u $in5,$x50,$inp
|
|
+ addi $inp,$inp,0x60
|
|
+ le?vperm $in4,$in4,$in4,$leperm
|
|
+ le?vperm $in5,$in5,$in5,$leperm
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ vxor $out0,$in0,$twk0
|
|
+ le?vperm $out2,$out2,$out2,$leperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ vxor $out1,$in1,$twk1
|
|
+ le?vperm $out3,$out3,$out3,$leperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ vxor $out2,$in2,$twk2
|
|
+ le?vperm $out4,$out4,$out4,$leperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ vxor $out3,$in3,$twk3
|
|
+ le?vperm $out5,$tmp,$tmp,$leperm
|
|
+ stvx_u $out4,$x40,$out
|
|
+ vxor $out4,$in4,$twk4
|
|
+ le?stvx_u $out5,$x50,$out
|
|
+ be?stvx_u $tmp, $x50,$out
|
|
+ vxor $out5,$in5,$twk5
|
|
+ addi $out,$out,0x60
|
|
+
|
|
+ mtctr $rounds
|
|
+ beq Loop_xts_enc6x # did $len-=96 borrow?
|
|
+
|
|
+ addic. $len,$len,0x60
|
|
+ beq Lxts_enc6x_zero
|
|
+ cmpwi $len,0x20
|
|
+ blt Lxts_enc6x_one
|
|
+ nop
|
|
+ beq Lxts_enc6x_two
|
|
+ cmpwi $len,0x40
|
|
+ blt Lxts_enc6x_three
|
|
+ nop
|
|
+ beq Lxts_enc6x_four
|
|
+
|
|
+Lxts_enc6x_five:
|
|
+ vxor $out0,$in1,$twk0
|
|
+ vxor $out1,$in2,$twk1
|
|
+ vxor $out2,$in3,$twk2
|
|
+ vxor $out3,$in4,$twk3
|
|
+ vxor $out4,$in5,$twk4
|
|
+
|
|
+ bl _aesp8_xts_enc5x
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ vmr $twk0,$twk5 # unused tweak
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ le?vperm $out2,$out2,$out2,$leperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $out3,$out3,$out3,$leperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ vxor $tmp,$out4,$twk5 # last block prep for stealing
|
|
+ le?vperm $out4,$out4,$out4,$leperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ stvx_u $out4,$x40,$out
|
|
+ addi $out,$out,0x50
|
|
+ bne Lxts_enc6x_steal
|
|
+ b Lxts_enc6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_enc6x_four:
|
|
+ vxor $out0,$in2,$twk0
|
|
+ vxor $out1,$in3,$twk1
|
|
+ vxor $out2,$in4,$twk2
|
|
+ vxor $out3,$in5,$twk3
|
|
+ vxor $out4,$out4,$out4
|
|
+
|
|
+ bl _aesp8_xts_enc5x
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ vmr $twk0,$twk4 # unused tweak
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ le?vperm $out2,$out2,$out2,$leperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ vxor $tmp,$out3,$twk4 # last block prep for stealing
|
|
+ le?vperm $out3,$out3,$out3,$leperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ stvx_u $out3,$x30,$out
|
|
+ addi $out,$out,0x40
|
|
+ bne Lxts_enc6x_steal
|
|
+ b Lxts_enc6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_enc6x_three:
|
|
+ vxor $out0,$in3,$twk0
|
|
+ vxor $out1,$in4,$twk1
|
|
+ vxor $out2,$in5,$twk2
|
|
+ vxor $out3,$out3,$out3
|
|
+ vxor $out4,$out4,$out4
|
|
+
|
|
+ bl _aesp8_xts_enc5x
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ vmr $twk0,$twk3 # unused tweak
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ vxor $tmp,$out2,$twk3 # last block prep for stealing
|
|
+ le?vperm $out2,$out2,$out2,$leperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ stvx_u $out2,$x20,$out
|
|
+ addi $out,$out,0x30
|
|
+ bne Lxts_enc6x_steal
|
|
+ b Lxts_enc6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_enc6x_two:
|
|
+ vxor $out0,$in4,$twk0
|
|
+ vxor $out1,$in5,$twk1
|
|
+ vxor $out2,$out2,$out2
|
|
+ vxor $out3,$out3,$out3
|
|
+ vxor $out4,$out4,$out4
|
|
+
|
|
+ bl _aesp8_xts_enc5x
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ vmr $twk0,$twk2 # unused tweak
|
|
+ vxor $tmp,$out1,$twk2 # last block prep for stealing
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ stvx_u $out1,$x10,$out
|
|
+ addi $out,$out,0x20
|
|
+ bne Lxts_enc6x_steal
|
|
+ b Lxts_enc6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_enc6x_one:
|
|
+ vxor $out0,$in5,$twk0
|
|
+ nop
|
|
+Loop_xts_enc1x:
|
|
+ vcipher $out0,$out0,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vcipher $out0,$out0,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz Loop_xts_enc1x
|
|
+
|
|
+ add $inp,$inp,$taillen
|
|
+ cmpwi $taillen,0
|
|
+ vcipher $out0,$out0,v24
|
|
+
|
|
+ subi $inp,$inp,16
|
|
+ vcipher $out0,$out0,v25
|
|
+
|
|
+ lvsr $inpperm,0,$taillen
|
|
+ vcipher $out0,$out0,v26
|
|
+
|
|
+ lvx_u $in0,0,$inp
|
|
+ vcipher $out0,$out0,v27
|
|
+
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vcipher $out0,$out0,v28
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+
|
|
+ vcipher $out0,$out0,v29
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+ vxor $twk0,$twk0,v31
|
|
+
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ vcipher $out0,$out0,v30
|
|
+
|
|
+ vperm $in0,$in0,$in0,$inpperm
|
|
+ vcipherlast $out0,$out0,$twk0
|
|
+
|
|
+ vmr $twk0,$twk1 # unused tweak
|
|
+ vxor $tmp,$out0,$twk1 # last block prep for stealing
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ addi $out,$out,0x10
|
|
+ bne Lxts_enc6x_steal
|
|
+ b Lxts_enc6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_enc6x_zero:
|
|
+ cmpwi $taillen,0
|
|
+ beq Lxts_enc6x_done
|
|
+
|
|
+ add $inp,$inp,$taillen
|
|
+ subi $inp,$inp,16
|
|
+ lvx_u $in0,0,$inp
|
|
+ lvsr $inpperm,0,$taillen # $in5 is no more
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ vperm $in0,$in0,$in0,$inpperm
|
|
+ vxor $tmp,$tmp,$twk0
|
|
+Lxts_enc6x_steal:
|
|
+ vxor $in0,$in0,$twk0
|
|
+ vxor $out0,$out0,$out0
|
|
+ vspltisb $out1,-1
|
|
+ vperm $out0,$out0,$out1,$inpperm
|
|
+ vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
|
|
+
|
|
+ subi r30,$out,17
|
|
+ subi $out,$out,16
|
|
+ mtctr $taillen
|
|
+Loop_xts_enc6x_steal:
|
|
+ lbzu r0,1(r30)
|
|
+ stb r0,16(r30)
|
|
+ bdnz Loop_xts_enc6x_steal
|
|
+
|
|
+ li $taillen,0
|
|
+ mtctr $rounds
|
|
+ b Loop_xts_enc1x # one more time...
|
|
+
|
|
+.align 4
|
|
+Lxts_enc6x_done:
|
|
+ ${UCMP}i $ivp,0
|
|
+ beq Lxts_enc6x_ret
|
|
+
|
|
+ vxor $tweak,$twk0,$rndkey0
|
|
+ le?vperm $tweak,$tweak,$tweak,$leperm
|
|
+ stvx_u $tweak,0,$ivp
|
|
+
|
|
+Lxts_enc6x_ret:
|
|
+ mtlr r11
|
|
+ li r10,`$FRAME+15`
|
|
+ li r11,`$FRAME+31`
|
|
+ stvx $seven,r10,$sp # wipe copies of round keys
|
|
+ addi r10,r10,32
|
|
+ stvx $seven,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $seven,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $seven,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $seven,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $seven,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $seven,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $seven,r11,$sp
|
|
+ addi r11,r11,32
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ lvx v20,r10,$sp # ABI says so
|
|
+ addi r10,r10,32
|
|
+ lvx v21,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v22,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v23,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v24,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v25,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v26,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v27,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v28,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v29,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v30,r10,$sp
|
|
+ lvx v31,r11,$sp
|
|
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x04,1,0x80,6,6,0
|
|
+ .long 0
|
|
+
|
|
+.align 5
|
|
+_aesp8_xts_enc5x:
|
|
+ vcipher $out0,$out0,v24
|
|
+ vcipher $out1,$out1,v24
|
|
+ vcipher $out2,$out2,v24
|
|
+ vcipher $out3,$out3,v24
|
|
+ vcipher $out4,$out4,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vcipher $out0,$out0,v25
|
|
+ vcipher $out1,$out1,v25
|
|
+ vcipher $out2,$out2,v25
|
|
+ vcipher $out3,$out3,v25
|
|
+ vcipher $out4,$out4,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz _aesp8_xts_enc5x
|
|
+
|
|
+ add $inp,$inp,$taillen
|
|
+ cmpwi $taillen,0
|
|
+ vcipher $out0,$out0,v24
|
|
+ vcipher $out1,$out1,v24
|
|
+ vcipher $out2,$out2,v24
|
|
+ vcipher $out3,$out3,v24
|
|
+ vcipher $out4,$out4,v24
|
|
+
|
|
+ subi $inp,$inp,16
|
|
+ vcipher $out0,$out0,v25
|
|
+ vcipher $out1,$out1,v25
|
|
+ vcipher $out2,$out2,v25
|
|
+ vcipher $out3,$out3,v25
|
|
+ vcipher $out4,$out4,v25
|
|
+ vxor $twk0,$twk0,v31
|
|
+
|
|
+ vcipher $out0,$out0,v26
|
|
+ lvsr $inpperm,0,$taillen # $in5 is no more
|
|
+ vcipher $out1,$out1,v26
|
|
+ vcipher $out2,$out2,v26
|
|
+ vcipher $out3,$out3,v26
|
|
+ vcipher $out4,$out4,v26
|
|
+ vxor $in1,$twk1,v31
|
|
+
|
|
+ vcipher $out0,$out0,v27
|
|
+ lvx_u $in0,0,$inp
|
|
+ vcipher $out1,$out1,v27
|
|
+ vcipher $out2,$out2,v27
|
|
+ vcipher $out3,$out3,v27
|
|
+ vcipher $out4,$out4,v27
|
|
+ vxor $in2,$twk2,v31
|
|
+
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vcipher $out0,$out0,v28
|
|
+ vcipher $out1,$out1,v28
|
|
+ vcipher $out2,$out2,v28
|
|
+ vcipher $out3,$out3,v28
|
|
+ vcipher $out4,$out4,v28
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+ vxor $in3,$twk3,v31
|
|
+
|
|
+ vcipher $out0,$out0,v29
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ vcipher $out1,$out1,v29
|
|
+ vcipher $out2,$out2,v29
|
|
+ vcipher $out3,$out3,v29
|
|
+ vcipher $out4,$out4,v29
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+ vxor $in4,$twk4,v31
|
|
+
|
|
+ vcipher $out0,$out0,v30
|
|
+ vperm $in0,$in0,$in0,$inpperm
|
|
+ vcipher $out1,$out1,v30
|
|
+ vcipher $out2,$out2,v30
|
|
+ vcipher $out3,$out3,v30
|
|
+ vcipher $out4,$out4,v30
|
|
+
|
|
+ vcipherlast $out0,$out0,$twk0
|
|
+ vcipherlast $out1,$out1,$in1
|
|
+ vcipherlast $out2,$out2,$in2
|
|
+ vcipherlast $out3,$out3,$in3
|
|
+ vcipherlast $out4,$out4,$in4
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,0,0
|
|
+
|
|
+.align 5
|
|
+_aesp8_xts_decrypt6x:
|
|
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
+ mflr r11
|
|
+ li r7,`$FRAME+8*16+15`
|
|
+ li r3,`$FRAME+8*16+31`
|
|
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
|
|
+ stvx v20,r7,$sp # ABI says so
|
|
+ addi r7,r7,32
|
|
+ stvx v21,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v22,r7,$sp
|
|
+ addi r7,r7,32
|
|
+ stvx v23,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v24,r7,$sp
|
|
+ addi r7,r7,32
|
|
+ stvx v25,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v26,r7,$sp
|
|
+ addi r7,r7,32
|
|
+ stvx v27,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v28,r7,$sp
|
|
+ addi r7,r7,32
|
|
+ stvx v29,r3,$sp
|
|
+ addi r3,r3,32
|
|
+ stvx v30,r7,$sp
|
|
+ stvx v31,r3,$sp
|
|
+ li r0,-1
|
|
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
+ li $x10,0x10
|
|
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
+ li $x20,0x20
|
|
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
+ li $x30,0x30
|
|
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
+ li $x40,0x40
|
|
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
+ li $x50,0x50
|
|
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
+ li $x60,0x60
|
|
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
+ li $x70,0x70
|
|
+ mtspr 256,r0
|
|
+
|
|
+ subi $rounds,$rounds,3 # -4 in total
|
|
+
|
|
+ lvx $rndkey0,$x00,$key1 # load key schedule
|
|
+ lvx v30,$x10,$key1
|
|
+ addi $key1,$key1,0x20
|
|
+ lvx v31,$x00,$key1
|
|
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
+ addi $key_,$sp,`$FRAME+15`
|
|
+ mtctr $rounds
|
|
+
|
|
+Load_xts_dec_key:
|
|
+ ?vperm v24,v30,v31,$keyperm
|
|
+ lvx v30,$x10,$key1
|
|
+ addi $key1,$key1,0x20
|
|
+ stvx v24,$x00,$key_ # off-load round[1]
|
|
+ ?vperm v25,v31,v30,$keyperm
|
|
+ lvx v31,$x00,$key1
|
|
+ stvx v25,$x10,$key_ # off-load round[2]
|
|
+ addi $key_,$key_,0x20
|
|
+ bdnz Load_xts_dec_key
|
|
+
|
|
+ lvx v26,$x10,$key1
|
|
+ ?vperm v24,v30,v31,$keyperm
|
|
+ lvx v27,$x20,$key1
|
|
+ stvx v24,$x00,$key_ # off-load round[3]
|
|
+ ?vperm v25,v31,v26,$keyperm
|
|
+ lvx v28,$x30,$key1
|
|
+ stvx v25,$x10,$key_ # off-load round[4]
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ ?vperm v26,v26,v27,$keyperm
|
|
+ lvx v29,$x40,$key1
|
|
+ ?vperm v27,v27,v28,$keyperm
|
|
+ lvx v30,$x50,$key1
|
|
+ ?vperm v28,v28,v29,$keyperm
|
|
+ lvx v31,$x60,$key1
|
|
+ ?vperm v29,v29,v30,$keyperm
|
|
+ lvx $twk5,$x70,$key1 # borrow $twk5
|
|
+ ?vperm v30,v30,v31,$keyperm
|
|
+ lvx v24,$x00,$key_ # pre-load round[1]
|
|
+ ?vperm v31,v31,$twk5,$keyperm
|
|
+ lvx v25,$x10,$key_ # pre-load round[2]
|
|
+
|
|
+ vperm $in0,$inout,$inptail,$inpperm
|
|
+ subi $inp,$inp,31 # undo "caller"
|
|
+ vxor $twk0,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out0,$in0,$twk0
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in1,$x10,$inp
|
|
+ vxor $twk1,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in1,$in1,$in1,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out1,$in1,$twk1
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in2,$x20,$inp
|
|
+ andi. $taillen,$len,15
|
|
+ vxor $twk2,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in2,$in2,$in2,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out2,$in2,$twk2
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in3,$x30,$inp
|
|
+ sub $len,$len,$taillen
|
|
+ vxor $twk3,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in3,$in3,$in3,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out3,$in3,$twk3
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in4,$x40,$inp
|
|
+ subi $len,$len,0x60
|
|
+ vxor $twk4,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in4,$in4,$in4,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out4,$in4,$twk4
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ lvx_u $in5,$x50,$inp
|
|
+ addi $inp,$inp,0x60
|
|
+ vxor $twk5,$tweak,$rndkey0
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ le?vperm $in5,$in5,$in5,$leperm
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vxor $out5,$in5,$twk5
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+
|
|
+ vxor v31,v31,$rndkey0
|
|
+ mtctr $rounds
|
|
+ b Loop_xts_dec6x
|
|
+
|
|
+.align 5
|
|
+Loop_xts_dec6x:
|
|
+ vncipher $out0,$out0,v24
|
|
+ vncipher $out1,$out1,v24
|
|
+ vncipher $out2,$out2,v24
|
|
+ vncipher $out3,$out3,v24
|
|
+ vncipher $out4,$out4,v24
|
|
+ vncipher $out5,$out5,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vncipher $out0,$out0,v25
|
|
+ vncipher $out1,$out1,v25
|
|
+ vncipher $out2,$out2,v25
|
|
+ vncipher $out3,$out3,v25
|
|
+ vncipher $out4,$out4,v25
|
|
+ vncipher $out5,$out5,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz Loop_xts_dec6x
|
|
+
|
|
+ subic $len,$len,96 # $len-=96
|
|
+ vxor $in0,$twk0,v31 # xor with last round key
|
|
+ vncipher $out0,$out0,v24
|
|
+ vncipher $out1,$out1,v24
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk0,$tweak,$rndkey0
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vncipher $out2,$out2,v24
|
|
+ vncipher $out3,$out3,v24
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vncipher $out4,$out4,v24
|
|
+ vncipher $out5,$out5,v24
|
|
+
|
|
+ subfe. r0,r0,r0 # borrow?-1:0
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vncipher $out0,$out0,v25
|
|
+ vncipher $out1,$out1,v25
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vncipher $out2,$out2,v25
|
|
+ vncipher $out3,$out3,v25
|
|
+ vxor $in1,$twk1,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk1,$tweak,$rndkey0
|
|
+ vncipher $out4,$out4,v25
|
|
+ vncipher $out5,$out5,v25
|
|
+
|
|
+ and r0,r0,$len
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vncipher $out0,$out0,v26
|
|
+ vncipher $out1,$out1,v26
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vncipher $out2,$out2,v26
|
|
+ vncipher $out3,$out3,v26
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vncipher $out4,$out4,v26
|
|
+ vncipher $out5,$out5,v26
|
|
+
|
|
+ add $inp,$inp,r0 # $inp is adjusted in such
|
|
+ # way that at exit from the
|
|
+ # loop inX-in5 are loaded
|
|
+ # with last "words"
|
|
+ vxor $in2,$twk2,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk2,$tweak,$rndkey0
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vncipher $out0,$out0,v27
|
|
+ vncipher $out1,$out1,v27
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vncipher $out2,$out2,v27
|
|
+ vncipher $out3,$out3,v27
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vncipher $out4,$out4,v27
|
|
+ vncipher $out5,$out5,v27
|
|
+
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vncipher $out0,$out0,v28
|
|
+ vncipher $out1,$out1,v28
|
|
+ vxor $in3,$twk3,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk3,$tweak,$rndkey0
|
|
+ vncipher $out2,$out2,v28
|
|
+ vncipher $out3,$out3,v28
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vncipher $out4,$out4,v28
|
|
+ vncipher $out5,$out5,v28
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+
|
|
+ vncipher $out0,$out0,v29
|
|
+ vncipher $out1,$out1,v29
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vncipher $out2,$out2,v29
|
|
+ vncipher $out3,$out3,v29
|
|
+ vxor $in4,$twk4,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk4,$tweak,$rndkey0
|
|
+ vncipher $out4,$out4,v29
|
|
+ vncipher $out5,$out5,v29
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+
|
|
+ vncipher $out0,$out0,v30
|
|
+ vncipher $out1,$out1,v30
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vncipher $out2,$out2,v30
|
|
+ vncipher $out3,$out3,v30
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vncipher $out4,$out4,v30
|
|
+ vncipher $out5,$out5,v30
|
|
+ vxor $in5,$twk5,v31
|
|
+ vsrab $tmp,$tweak,$seven # next tweak value
|
|
+ vxor $twk5,$tweak,$rndkey0
|
|
+
|
|
+ vncipherlast $out0,$out0,$in0
|
|
+ lvx_u $in0,$x00,$inp # load next input block
|
|
+ vaddubm $tweak,$tweak,$tweak
|
|
+ vsldoi $tmp,$tmp,$tmp,15
|
|
+ vncipherlast $out1,$out1,$in1
|
|
+ lvx_u $in1,$x10,$inp
|
|
+ vncipherlast $out2,$out2,$in2
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ lvx_u $in2,$x20,$inp
|
|
+ vand $tmp,$tmp,$eighty7
|
|
+ vncipherlast $out3,$out3,$in3
|
|
+ le?vperm $in1,$in1,$in1,$leperm
|
|
+ lvx_u $in3,$x30,$inp
|
|
+ vncipherlast $out4,$out4,$in4
|
|
+ le?vperm $in2,$in2,$in2,$leperm
|
|
+ lvx_u $in4,$x40,$inp
|
|
+ vxor $tweak,$tweak,$tmp
|
|
+ vncipherlast $out5,$out5,$in5
|
|
+ le?vperm $in3,$in3,$in3,$leperm
|
|
+ lvx_u $in5,$x50,$inp
|
|
+ addi $inp,$inp,0x60
|
|
+ le?vperm $in4,$in4,$in4,$leperm
|
|
+ le?vperm $in5,$in5,$in5,$leperm
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ vxor $out0,$in0,$twk0
|
|
+ le?vperm $out2,$out2,$out2,$leperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ vxor $out1,$in1,$twk1
|
|
+ le?vperm $out3,$out3,$out3,$leperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ vxor $out2,$in2,$twk2
|
|
+ le?vperm $out4,$out4,$out4,$leperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ vxor $out3,$in3,$twk3
|
|
+ le?vperm $out5,$out5,$out5,$leperm
|
|
+ stvx_u $out4,$x40,$out
|
|
+ vxor $out4,$in4,$twk4
|
|
+ stvx_u $out5,$x50,$out
|
|
+ vxor $out5,$in5,$twk5
|
|
+ addi $out,$out,0x60
|
|
+
|
|
+ mtctr $rounds
|
|
+ beq Loop_xts_dec6x # did $len-=96 borrow?
|
|
+
|
|
+ addic. $len,$len,0x60
|
|
+ beq Lxts_dec6x_zero
|
|
+ cmpwi $len,0x20
|
|
+ blt Lxts_dec6x_one
|
|
+ nop
|
|
+ beq Lxts_dec6x_two
|
|
+ cmpwi $len,0x40
|
|
+ blt Lxts_dec6x_three
|
|
+ nop
|
|
+ beq Lxts_dec6x_four
|
|
+
|
|
+Lxts_dec6x_five:
|
|
+ vxor $out0,$in1,$twk0
|
|
+ vxor $out1,$in2,$twk1
|
|
+ vxor $out2,$in3,$twk2
|
|
+ vxor $out3,$in4,$twk3
|
|
+ vxor $out4,$in5,$twk4
|
|
+
|
|
+ bl _aesp8_xts_dec5x
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ vmr $twk0,$twk5 # unused tweak
|
|
+ vxor $twk1,$tweak,$rndkey0
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ vxor $out0,$in0,$twk1
|
|
+ le?vperm $out2,$out2,$out2,$leperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $out3,$out3,$out3,$leperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ le?vperm $out4,$out4,$out4,$leperm
|
|
+ stvx_u $out3,$x30,$out
|
|
+ stvx_u $out4,$x40,$out
|
|
+ addi $out,$out,0x50
|
|
+ bne Lxts_dec6x_steal
|
|
+ b Lxts_dec6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_dec6x_four:
|
|
+ vxor $out0,$in2,$twk0
|
|
+ vxor $out1,$in3,$twk1
|
|
+ vxor $out2,$in4,$twk2
|
|
+ vxor $out3,$in5,$twk3
|
|
+ vxor $out4,$out4,$out4
|
|
+
|
|
+ bl _aesp8_xts_dec5x
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ vmr $twk0,$twk4 # unused tweak
|
|
+ vmr $twk1,$twk5
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ vxor $out0,$in0,$twk5
|
|
+ le?vperm $out2,$out2,$out2,$leperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ le?vperm $out3,$out3,$out3,$leperm
|
|
+ stvx_u $out2,$x20,$out
|
|
+ stvx_u $out3,$x30,$out
|
|
+ addi $out,$out,0x40
|
|
+ bne Lxts_dec6x_steal
|
|
+ b Lxts_dec6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_dec6x_three:
|
|
+ vxor $out0,$in3,$twk0
|
|
+ vxor $out1,$in4,$twk1
|
|
+ vxor $out2,$in5,$twk2
|
|
+ vxor $out3,$out3,$out3
|
|
+ vxor $out4,$out4,$out4
|
|
+
|
|
+ bl _aesp8_xts_dec5x
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ vmr $twk0,$twk3 # unused tweak
|
|
+ vmr $twk1,$twk4
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ vxor $out0,$in0,$twk4
|
|
+ le?vperm $out2,$out2,$out2,$leperm
|
|
+ stvx_u $out1,$x10,$out
|
|
+ stvx_u $out2,$x20,$out
|
|
+ addi $out,$out,0x30
|
|
+ bne Lxts_dec6x_steal
|
|
+ b Lxts_dec6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_dec6x_two:
|
|
+ vxor $out0,$in4,$twk0
|
|
+ vxor $out1,$in5,$twk1
|
|
+ vxor $out2,$out2,$out2
|
|
+ vxor $out3,$out3,$out3
|
|
+ vxor $out4,$out4,$out4
|
|
+
|
|
+ bl _aesp8_xts_dec5x
|
|
+
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ vmr $twk0,$twk2 # unused tweak
|
|
+ vmr $twk1,$twk3
|
|
+ le?vperm $out1,$out1,$out1,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ vxor $out0,$in0,$twk3
|
|
+ stvx_u $out1,$x10,$out
|
|
+ addi $out,$out,0x20
|
|
+ bne Lxts_dec6x_steal
|
|
+ b Lxts_dec6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_dec6x_one:
|
|
+ vxor $out0,$in5,$twk0
|
|
+ nop
|
|
+Loop_xts_dec1x:
|
|
+ vncipher $out0,$out0,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vncipher $out0,$out0,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz Loop_xts_dec1x
|
|
+
|
|
+ subi r0,$taillen,1
|
|
+ vncipher $out0,$out0,v24
|
|
+
|
|
+ andi. r0,r0,16
|
|
+ cmpwi $taillen,0
|
|
+ vncipher $out0,$out0,v25
|
|
+
|
|
+ sub $inp,$inp,r0
|
|
+ vncipher $out0,$out0,v26
|
|
+
|
|
+ lvx_u $in0,0,$inp
|
|
+ vncipher $out0,$out0,v27
|
|
+
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vncipher $out0,$out0,v28
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+
|
|
+ vncipher $out0,$out0,v29
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+ vxor $twk0,$twk0,v31
|
|
+
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ vncipher $out0,$out0,v30
|
|
+
|
|
+ mtctr $rounds
|
|
+ vncipherlast $out0,$out0,$twk0
|
|
+
|
|
+ vmr $twk0,$twk1 # unused tweak
|
|
+ vmr $twk1,$twk2
|
|
+ le?vperm $out0,$out0,$out0,$leperm
|
|
+ stvx_u $out0,$x00,$out # store output
|
|
+ addi $out,$out,0x10
|
|
+ vxor $out0,$in0,$twk2
|
|
+ bne Lxts_dec6x_steal
|
|
+ b Lxts_dec6x_done
|
|
+
|
|
+.align 4
|
|
+Lxts_dec6x_zero:
|
|
+ cmpwi $taillen,0
|
|
+ beq Lxts_dec6x_done
|
|
+
|
|
+ lvx_u $in0,0,$inp
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ vxor $out0,$in0,$twk1
|
|
+Lxts_dec6x_steal:
|
|
+ vncipher $out0,$out0,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vncipher $out0,$out0,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz Lxts_dec6x_steal
|
|
+
|
|
+ add $inp,$inp,$taillen
|
|
+ vncipher $out0,$out0,v24
|
|
+
|
|
+ cmpwi $taillen,0
|
|
+ vncipher $out0,$out0,v25
|
|
+
|
|
+ lvx_u $in0,0,$inp
|
|
+ vncipher $out0,$out0,v26
|
|
+
|
|
+ lvsr $inpperm,0,$taillen # $in5 is no more
|
|
+ vncipher $out0,$out0,v27
|
|
+
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vncipher $out0,$out0,v28
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+
|
|
+ vncipher $out0,$out0,v29
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+ vxor $twk1,$twk1,v31
|
|
+
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ vncipher $out0,$out0,v30
|
|
+
|
|
+ vperm $in0,$in0,$in0,$inpperm
|
|
+ vncipherlast $tmp,$out0,$twk1
|
|
+
|
|
+ le?vperm $out0,$tmp,$tmp,$leperm
|
|
+ le?stvx_u $out0,0,$out
|
|
+ be?stvx_u $tmp,0,$out
|
|
+
|
|
+ vxor $out0,$out0,$out0
|
|
+ vspltisb $out1,-1
|
|
+ vperm $out0,$out0,$out1,$inpperm
|
|
+ vsel $out0,$in0,$tmp,$out0
|
|
+ vxor $out0,$out0,$twk0
|
|
+
|
|
+ subi r30,$out,1
|
|
+ mtctr $taillen
|
|
+Loop_xts_dec6x_steal:
|
|
+ lbzu r0,1(r30)
|
|
+ stb r0,16(r30)
|
|
+ bdnz Loop_xts_dec6x_steal
|
|
+
|
|
+ li $taillen,0
|
|
+ mtctr $rounds
|
|
+ b Loop_xts_dec1x # one more time...
|
|
+
|
|
+.align 4
|
|
+Lxts_dec6x_done:
|
|
+ ${UCMP}i $ivp,0
|
|
+ beq Lxts_dec6x_ret
|
|
+
|
|
+ vxor $tweak,$twk0,$rndkey0
|
|
+ le?vperm $tweak,$tweak,$tweak,$leperm
|
|
+ stvx_u $tweak,0,$ivp
|
|
+
|
|
+Lxts_dec6x_ret:
|
|
+ mtlr r11
|
|
+ li r10,`$FRAME+15`
|
|
+ li r11,`$FRAME+31`
|
|
+ stvx $seven,r10,$sp # wipe copies of round keys
|
|
+ addi r10,r10,32
|
|
+ stvx $seven,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $seven,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $seven,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $seven,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $seven,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx $seven,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx $seven,r11,$sp
|
|
+ addi r11,r11,32
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ lvx v20,r10,$sp # ABI says so
|
|
+ addi r10,r10,32
|
|
+ lvx v21,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v22,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v23,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v24,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v25,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v26,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v27,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v28,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v29,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v30,r10,$sp
|
|
+ lvx v31,r11,$sp
|
|
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x04,1,0x80,6,6,0
|
|
+ .long 0
|
|
+
|
|
+.align 5
|
|
+_aesp8_xts_dec5x:
|
|
+ vncipher $out0,$out0,v24
|
|
+ vncipher $out1,$out1,v24
|
|
+ vncipher $out2,$out2,v24
|
|
+ vncipher $out3,$out3,v24
|
|
+ vncipher $out4,$out4,v24
|
|
+ lvx v24,$x20,$key_ # round[3]
|
|
+ addi $key_,$key_,0x20
|
|
+
|
|
+ vncipher $out0,$out0,v25
|
|
+ vncipher $out1,$out1,v25
|
|
+ vncipher $out2,$out2,v25
|
|
+ vncipher $out3,$out3,v25
|
|
+ vncipher $out4,$out4,v25
|
|
+ lvx v25,$x10,$key_ # round[4]
|
|
+ bdnz _aesp8_xts_dec5x
|
|
+
|
|
+ subi r0,$taillen,1
|
|
+ vncipher $out0,$out0,v24
|
|
+ vncipher $out1,$out1,v24
|
|
+ vncipher $out2,$out2,v24
|
|
+ vncipher $out3,$out3,v24
|
|
+ vncipher $out4,$out4,v24
|
|
+
|
|
+ andi. r0,r0,16
|
|
+ cmpwi $taillen,0
|
|
+ vncipher $out0,$out0,v25
|
|
+ vncipher $out1,$out1,v25
|
|
+ vncipher $out2,$out2,v25
|
|
+ vncipher $out3,$out3,v25
|
|
+ vncipher $out4,$out4,v25
|
|
+ vxor $twk0,$twk0,v31
|
|
+
|
|
+ sub $inp,$inp,r0
|
|
+ vncipher $out0,$out0,v26
|
|
+ vncipher $out1,$out1,v26
|
|
+ vncipher $out2,$out2,v26
|
|
+ vncipher $out3,$out3,v26
|
|
+ vncipher $out4,$out4,v26
|
|
+ vxor $in1,$twk1,v31
|
|
+
|
|
+ vncipher $out0,$out0,v27
|
|
+ lvx_u $in0,0,$inp
|
|
+ vncipher $out1,$out1,v27
|
|
+ vncipher $out2,$out2,v27
|
|
+ vncipher $out3,$out3,v27
|
|
+ vncipher $out4,$out4,v27
|
|
+ vxor $in2,$twk2,v31
|
|
+
|
|
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
+ vncipher $out0,$out0,v28
|
|
+ vncipher $out1,$out1,v28
|
|
+ vncipher $out2,$out2,v28
|
|
+ vncipher $out3,$out3,v28
|
|
+ vncipher $out4,$out4,v28
|
|
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
+ vxor $in3,$twk3,v31
|
|
+
|
|
+ vncipher $out0,$out0,v29
|
|
+ le?vperm $in0,$in0,$in0,$leperm
|
|
+ vncipher $out1,$out1,v29
|
|
+ vncipher $out2,$out2,v29
|
|
+ vncipher $out3,$out3,v29
|
|
+ vncipher $out4,$out4,v29
|
|
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
+ vxor $in4,$twk4,v31
|
|
+
|
|
+ vncipher $out0,$out0,v30
|
|
+ vncipher $out1,$out1,v30
|
|
+ vncipher $out2,$out2,v30
|
|
+ vncipher $out3,$out3,v30
|
|
+ vncipher $out4,$out4,v30
|
|
+
|
|
+ vncipherlast $out0,$out0,$twk0
|
|
+ vncipherlast $out1,$out1,$in1
|
|
+ vncipherlast $out2,$out2,$in2
|
|
+ vncipherlast $out3,$out3,$in3
|
|
+ vncipherlast $out4,$out4,$in4
|
|
+ mtctr $rounds
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,0,0
|
|
+___
|
|
+}} }}}
|
|
+
|
|
+my $consts=1;
|
|
+foreach(split("\n",$code)) {
|
|
+ s/\`([^\`]*)\`/eval($1)/geo;
|
|
+
|
|
+ # constants table endian-specific conversion
|
|
+ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
|
|
+ my $conv=$3;
|
|
+ my @bytes=();
|
|
+
|
|
+ # convert to endian-agnostic format
|
|
+ if ($1 eq "long") {
|
|
+ foreach (split(/,\s*/,$2)) {
|
|
+ my $l = /^0/?oct:int;
|
|
+ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
|
|
+ }
|
|
+ } else {
|
|
+ @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
|
|
+ }
|
|
+
|
|
+ # little-endian conversion
|
|
+ if ($flavour =~ /le$/o) {
|
|
+ SWITCH: for($conv) {
|
|
+ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
|
|
+ /\?rev/ && do { @bytes=reverse(@bytes); last; };
|
|
+ }
|
|
+ }
|
|
+
|
|
+ #emit
|
|
+ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
|
|
+ next;
|
|
+ }
|
|
+ $consts=0 if (m/Lconsts:/o); # end of table
|
|
+
|
|
+ # instructions prefixed with '?' are endian-specific and need
|
|
+ # to be adjusted accordingly...
|
|
+ if ($flavour =~ /le$/o) { # little-endian
|
|
+ s/le\?//o or
|
|
+ s/be\?/#be#/o or
|
|
+ s/\?lvsr/lvsl/o or
|
|
+ s/\?lvsl/lvsr/o or
|
|
+ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
|
|
+ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
|
|
+ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
|
|
+ } else { # big-endian
|
|
+ s/le\?/#le#/o or
|
|
+ s/be\?//o or
|
|
+ s/\?([a-z]+)/$1/o;
|
|
+ }
|
|
+
|
|
+ print $_,"\n";
|
|
+}
|
|
+
|
|
+close STDOUT or die "error closing STDOUT: $!";
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
|
|
@@ -61,6 +61,12 @@ OPENSSL_INLINE int vpaes_capable(void) {
|
|
OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
|
|
#endif
|
|
|
|
+#elif defined(OPENSSL_PPC64LE)
|
|
+#define HWAES
|
|
+
|
|
+OPENSSL_INLINE int hwaes_capable(void) {
|
|
+ return CRYPTO_is_PPC64LE_vcrypto_capable();
|
|
+}
|
|
#endif
|
|
|
|
#endif // !NO_ASM
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/bcm.c
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/bcm.c
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/bcm.c
|
|
@@ -101,6 +101,7 @@
|
|
#include "self_check/fips.c.inc"
|
|
#include "self_check/self_check.c.inc"
|
|
#include "service_indicator/service_indicator.c.inc"
|
|
+#include "sha/sha1-altivec.c.inc"
|
|
#include "sha/sha1.c.inc"
|
|
#include "sha/sha256.c.inc"
|
|
#include "sha/sha512.c.inc"
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c.inc
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c.inc
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c.inc
|
|
@@ -384,6 +384,23 @@ int bn_expand(BIGNUM *bn, size_t bits) {
|
|
}
|
|
|
|
int bn_resize_words(BIGNUM *bn, size_t words) {
|
|
+#if defined(OPENSSL_PPC64LE)
|
|
+ // This is a workaround for a miscompilation bug in Clang 7.0.1 on POWER.
|
|
+ // The unittests catch the miscompilation, if it occurs, and it manifests
|
|
+ // as a crash in |bn_fits_in_words|.
|
|
+ //
|
|
+ // The bug only triggers if building in FIPS mode and with -O3. Clang 8.0.1
|
|
+ // has the same bug but this workaround is not effective there---I've not
|
|
+ // been able to find a workaround for 8.0.1.
|
|
+ //
|
|
+ // At the time of writing (2019-08-08), Clang git does *not* have this bug
|
|
+ // and does not need this workaroud. The current git version should go on to
|
|
+ // be Clang 10 thus, once we can depend on that, this can be removed.
|
|
+ if (value_barrier_w((size_t)bn->width == words)) {
|
|
+ return 1;
|
|
+ }
|
|
+#endif
|
|
+
|
|
if ((size_t)bn->width <= words) {
|
|
if (!bn_wexpand(bn, words)) {
|
|
return 0;
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c.inc
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c.inc
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c.inc
|
|
@@ -1456,6 +1456,8 @@ int EVP_has_aes_hardware(void) {
|
|
return hwaes_capable() && crypto_gcm_clmul_enabled();
|
|
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
|
|
return hwaes_capable() && CRYPTO_is_ARMv8_PMULL_capable();
|
|
+#elif defined(OPENSSL_PPC64LE)
|
|
+ return CRYPTO_is_PPC64LE_vcrypto_capable();
|
|
#else
|
|
return 0;
|
|
#endif
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
|
|
@@ -0,0 +1,671 @@
|
|
+#! /usr/bin/env perl
|
|
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
|
+#
|
|
+# Licensed under the OpenSSL license (the "License"). You may not use
|
|
+# this file except in compliance with the License. You can obtain a copy
|
|
+# in the file LICENSE in the source distribution or at
|
|
+# https://www.openssl.org/source/license.html
|
|
+
|
|
+#
|
|
+# ====================================================================
|
|
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
|
+# project. The module is, however, dual licensed under OpenSSL and
|
|
+# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
+# details see http://www.openssl.org/~appro/cryptogams/.
|
|
+# ====================================================================
|
|
+#
|
|
+# GHASH for for PowerISA v2.07.
|
|
+#
|
|
+# July 2014
|
|
+#
|
|
+# Accurate performance measurements are problematic, because it's
|
|
+# always virtualized setup with possibly throttled processor.
|
|
+# Relative comparison is therefore more informative. This initial
|
|
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
|
|
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
|
|
+# "Initial version" means that there is room for futher improvement.
|
|
+
|
|
+# May 2016
|
|
+#
|
|
+# 2x aggregated reduction improves performance by 50% (resulting
|
|
+# performance on POWER8 is 1 cycle per processed byte), and 4x
|
|
+# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
|
|
+
|
|
+$flavour=shift;
|
|
+$output =shift;
|
|
+
|
|
+if ($flavour =~ /64/) {
|
|
+ $SIZE_T=8;
|
|
+ $LRSAVE=2*$SIZE_T;
|
|
+ $STU="stdu";
|
|
+ $POP="ld";
|
|
+ $PUSH="std";
|
|
+ $UCMP="cmpld";
|
|
+ $SHRI="srdi";
|
|
+} elsif ($flavour =~ /32/) {
|
|
+ $SIZE_T=4;
|
|
+ $LRSAVE=$SIZE_T;
|
|
+ $STU="stwu";
|
|
+ $POP="lwz";
|
|
+ $PUSH="stw";
|
|
+ $UCMP="cmplw";
|
|
+ $SHRI="srwi";
|
|
+} else { die "nonsense $flavour"; }
|
|
+
|
|
+$sp="r1";
|
|
+$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
|
|
+
|
|
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
|
+( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
|
+die "can't locate ppc-xlate.pl";
|
|
+
|
|
+open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
|
|
+*STDOUT=*OUT;
|
|
+
|
|
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
|
|
+
|
|
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
|
|
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
|
|
+my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
|
|
+my $vrsave="r12";
|
|
+
|
|
+$code=<<___;
|
|
+.machine "any"
|
|
+
|
|
+.text
|
|
+
|
|
+.globl .gcm_init_p8
|
|
+.align 5
|
|
+.gcm_init_p8:
|
|
+ li r0,-4096
|
|
+ li r8,0x10
|
|
+ mfspr $vrsave,256
|
|
+ li r9,0x20
|
|
+ mtspr 256,r0
|
|
+ li r10,0x30
|
|
+ lvx_u $H,0,r4 # load H
|
|
+
|
|
+ vspltisb $xC2,-16 # 0xf0
|
|
+ vspltisb $t0,1 # one
|
|
+ vaddubm $xC2,$xC2,$xC2 # 0xe0
|
|
+ vxor $zero,$zero,$zero
|
|
+ vor $xC2,$xC2,$t0 # 0xe1
|
|
+ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
|
|
+ vsldoi $t1,$zero,$t0,1 # ...1
|
|
+ vaddubm $xC2,$xC2,$xC2 # 0xc2...
|
|
+ vspltisb $t2,7
|
|
+ vor $xC2,$xC2,$t1 # 0xc2....01
|
|
+ vspltb $t1,$H,0 # most significant byte
|
|
+ vsl $H,$H,$t0 # H<<=1
|
|
+ vsrab $t1,$t1,$t2 # broadcast carry bit
|
|
+ vand $t1,$t1,$xC2
|
|
+ vxor $IN,$H,$t1 # twisted H
|
|
+
|
|
+ vsldoi $H,$IN,$IN,8 # twist even more ...
|
|
+ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
|
|
+ vsldoi $Hl,$zero,$H,8 # ... and split
|
|
+ vsldoi $Hh,$H,$zero,8
|
|
+
|
|
+ stvx_u $xC2,0,r3 # save pre-computed table
|
|
+ stvx_u $Hl,r8,r3
|
|
+ li r8,0x40
|
|
+ stvx_u $H, r9,r3
|
|
+ li r9,0x50
|
|
+ stvx_u $Hh,r10,r3
|
|
+ li r10,0x60
|
|
+
|
|
+ vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
|
|
+ vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
|
|
+ vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
|
|
+
|
|
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
|
+
|
|
+ vsldoi $t0,$Xm,$zero,8
|
|
+ vsldoi $t1,$zero,$Xm,8
|
|
+ vxor $Xl,$Xl,$t0
|
|
+ vxor $Xh,$Xh,$t1
|
|
+
|
|
+ vsldoi $Xl,$Xl,$Xl,8
|
|
+ vxor $Xl,$Xl,$t2
|
|
+
|
|
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
|
+ vpmsumd $Xl,$Xl,$xC2
|
|
+ vxor $t1,$t1,$Xh
|
|
+ vxor $IN1,$Xl,$t1
|
|
+
|
|
+ vsldoi $H2,$IN1,$IN1,8
|
|
+ vsldoi $H2l,$zero,$H2,8
|
|
+ vsldoi $H2h,$H2,$zero,8
|
|
+
|
|
+ stvx_u $H2l,r8,r3 # save H^2
|
|
+ li r8,0x70
|
|
+ stvx_u $H2,r9,r3
|
|
+ li r9,0x80
|
|
+ stvx_u $H2h,r10,r3
|
|
+ li r10,0x90
|
|
+___
|
|
+{
|
|
+my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
|
|
+$code.=<<___;
|
|
+ vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
|
|
+ vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
|
|
+ vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
|
|
+ vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
|
|
+ vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
|
|
+ vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
|
|
+
|
|
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
|
+ vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
|
|
+
|
|
+ vsldoi $t0,$Xm,$zero,8
|
|
+ vsldoi $t1,$zero,$Xm,8
|
|
+ vsldoi $t4,$Xm1,$zero,8
|
|
+ vsldoi $t5,$zero,$Xm1,8
|
|
+ vxor $Xl,$Xl,$t0
|
|
+ vxor $Xh,$Xh,$t1
|
|
+ vxor $Xl1,$Xl1,$t4
|
|
+ vxor $Xh1,$Xh1,$t5
|
|
+
|
|
+ vsldoi $Xl,$Xl,$Xl,8
|
|
+ vsldoi $Xl1,$Xl1,$Xl1,8
|
|
+ vxor $Xl,$Xl,$t2
|
|
+ vxor $Xl1,$Xl1,$t6
|
|
+
|
|
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
|
+ vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
|
|
+ vpmsumd $Xl,$Xl,$xC2
|
|
+ vpmsumd $Xl1,$Xl1,$xC2
|
|
+ vxor $t1,$t1,$Xh
|
|
+ vxor $t5,$t5,$Xh1
|
|
+ vxor $Xl,$Xl,$t1
|
|
+ vxor $Xl1,$Xl1,$t5
|
|
+
|
|
+ vsldoi $H,$Xl,$Xl,8
|
|
+ vsldoi $H2,$Xl1,$Xl1,8
|
|
+ vsldoi $Hl,$zero,$H,8
|
|
+ vsldoi $Hh,$H,$zero,8
|
|
+ vsldoi $H2l,$zero,$H2,8
|
|
+ vsldoi $H2h,$H2,$zero,8
|
|
+
|
|
+ stvx_u $Hl,r8,r3 # save H^3
|
|
+ li r8,0xa0
|
|
+ stvx_u $H,r9,r3
|
|
+ li r9,0xb0
|
|
+ stvx_u $Hh,r10,r3
|
|
+ li r10,0xc0
|
|
+ stvx_u $H2l,r8,r3 # save H^4
|
|
+ stvx_u $H2,r9,r3
|
|
+ stvx_u $H2h,r10,r3
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,2,0
|
|
+ .long 0
|
|
+.size .gcm_init_p8,.-.gcm_init_p8
|
|
+___
|
|
+}
|
|
+$code.=<<___;
|
|
+.globl .gcm_gmult_p8
|
|
+.align 5
|
|
+.gcm_gmult_p8:
|
|
+ lis r0,0xfff8
|
|
+ li r8,0x10
|
|
+ mfspr $vrsave,256
|
|
+ li r9,0x20
|
|
+ mtspr 256,r0
|
|
+ li r10,0x30
|
|
+ lvx_u $IN,0,$Xip # load Xi
|
|
+
|
|
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
|
|
+ le?lvsl $lemask,r0,r0
|
|
+ lvx_u $H, r9,$Htbl
|
|
+ le?vspltisb $t0,0x07
|
|
+ lvx_u $Hh,r10,$Htbl
|
|
+ le?vxor $lemask,$lemask,$t0
|
|
+ lvx_u $xC2,0,$Htbl
|
|
+ le?vperm $IN,$IN,$IN,$lemask
|
|
+ vxor $zero,$zero,$zero
|
|
+
|
|
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
|
|
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
|
|
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
|
|
+
|
|
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
|
+
|
|
+ vsldoi $t0,$Xm,$zero,8
|
|
+ vsldoi $t1,$zero,$Xm,8
|
|
+ vxor $Xl,$Xl,$t0
|
|
+ vxor $Xh,$Xh,$t1
|
|
+
|
|
+ vsldoi $Xl,$Xl,$Xl,8
|
|
+ vxor $Xl,$Xl,$t2
|
|
+
|
|
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
|
+ vpmsumd $Xl,$Xl,$xC2
|
|
+ vxor $t1,$t1,$Xh
|
|
+ vxor $Xl,$Xl,$t1
|
|
+
|
|
+ le?vperm $Xl,$Xl,$Xl,$lemask
|
|
+ stvx_u $Xl,0,$Xip # write out Xi
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,2,0
|
|
+ .long 0
|
|
+.size .gcm_gmult_p8,.-.gcm_gmult_p8
|
|
+
|
|
+.globl .gcm_ghash_p8
|
|
+.align 5
|
|
+.gcm_ghash_p8:
|
|
+ li r0,-4096
|
|
+ li r8,0x10
|
|
+ mfspr $vrsave,256
|
|
+ li r9,0x20
|
|
+ mtspr 256,r0
|
|
+ li r10,0x30
|
|
+ lvx_u $Xl,0,$Xip # load Xi
|
|
+
|
|
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
|
|
+ li r8,0x40
|
|
+ le?lvsl $lemask,r0,r0
|
|
+ lvx_u $H, r9,$Htbl
|
|
+ li r9,0x50
|
|
+ le?vspltisb $t0,0x07
|
|
+ lvx_u $Hh,r10,$Htbl
|
|
+ li r10,0x60
|
|
+ le?vxor $lemask,$lemask,$t0
|
|
+ lvx_u $xC2,0,$Htbl
|
|
+ le?vperm $Xl,$Xl,$Xl,$lemask
|
|
+ vxor $zero,$zero,$zero
|
|
+
|
|
+ ${UCMP}i $len,64
|
|
+ bge Lgcm_ghash_p8_4x
|
|
+
|
|
+ lvx_u $IN,0,$inp
|
|
+ addi $inp,$inp,16
|
|
+ subic. $len,$len,16
|
|
+ le?vperm $IN,$IN,$IN,$lemask
|
|
+ vxor $IN,$IN,$Xl
|
|
+ beq Lshort
|
|
+
|
|
+ lvx_u $H2l,r8,$Htbl # load H^2
|
|
+ li r8,16
|
|
+ lvx_u $H2, r9,$Htbl
|
|
+ add r9,$inp,$len # end of input
|
|
+ lvx_u $H2h,r10,$Htbl
|
|
+ be?b Loop_2x
|
|
+
|
|
+.align 5
|
|
+Loop_2x:
|
|
+ lvx_u $IN1,0,$inp
|
|
+ le?vperm $IN1,$IN1,$IN1,$lemask
|
|
+
|
|
+ subic $len,$len,32
|
|
+ vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo
|
|
+ vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo
|
|
+ subfe r0,r0,r0 # borrow?-1:0
|
|
+ vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi
|
|
+ vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi
|
|
+ and r0,r0,$len
|
|
+ vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi
|
|
+ vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi
|
|
+ add $inp,$inp,r0
|
|
+
|
|
+ vxor $Xl,$Xl,$Xl1
|
|
+ vxor $Xm,$Xm,$Xm1
|
|
+
|
|
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
|
+
|
|
+ vsldoi $t0,$Xm,$zero,8
|
|
+ vsldoi $t1,$zero,$Xm,8
|
|
+ vxor $Xh,$Xh,$Xh1
|
|
+ vxor $Xl,$Xl,$t0
|
|
+ vxor $Xh,$Xh,$t1
|
|
+
|
|
+ vsldoi $Xl,$Xl,$Xl,8
|
|
+ vxor $Xl,$Xl,$t2
|
|
+ lvx_u $IN,r8,$inp
|
|
+ addi $inp,$inp,32
|
|
+
|
|
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
|
+ vpmsumd $Xl,$Xl,$xC2
|
|
+ le?vperm $IN,$IN,$IN,$lemask
|
|
+ vxor $t1,$t1,$Xh
|
|
+ vxor $IN,$IN,$t1
|
|
+ vxor $IN,$IN,$Xl
|
|
+ $UCMP r9,$inp
|
|
+ bgt Loop_2x # done yet?
|
|
+
|
|
+ cmplwi $len,0
|
|
+ bne Leven
|
|
+
|
|
+Lshort:
|
|
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
|
|
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
|
|
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
|
|
+
|
|
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
|
+
|
|
+ vsldoi $t0,$Xm,$zero,8
|
|
+ vsldoi $t1,$zero,$Xm,8
|
|
+ vxor $Xl,$Xl,$t0
|
|
+ vxor $Xh,$Xh,$t1
|
|
+
|
|
+ vsldoi $Xl,$Xl,$Xl,8
|
|
+ vxor $Xl,$Xl,$t2
|
|
+
|
|
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
|
+ vpmsumd $Xl,$Xl,$xC2
|
|
+ vxor $t1,$t1,$Xh
|
|
+
|
|
+Leven:
|
|
+ vxor $Xl,$Xl,$t1
|
|
+ le?vperm $Xl,$Xl,$Xl,$lemask
|
|
+ stvx_u $Xl,0,$Xip # write out Xi
|
|
+
|
|
+ mtspr 256,$vrsave
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x14,0,0,0,4,0
|
|
+ .long 0
|
|
+___
|
|
+{
|
|
+my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h,
|
|
+ $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31));
|
|
+my $IN0=$IN;
|
|
+my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h);
|
|
+
|
|
+$code.=<<___;
|
|
+.align 5
|
|
+.gcm_ghash_p8_4x:
|
|
+Lgcm_ghash_p8_4x:
|
|
+ $STU $sp,-$FRAME($sp)
|
|
+ li r10,`15+6*$SIZE_T`
|
|
+ li r11,`31+6*$SIZE_T`
|
|
+ stvx v20,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v21,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v22,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v23,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v24,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v25,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v26,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v27,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v28,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ stvx v29,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ stvx v30,r10,$sp
|
|
+ li r10,0x60
|
|
+ stvx v31,r11,$sp
|
|
+ li r0,-1
|
|
+ stw $vrsave,`$FRAME-4`($sp) # save vrsave
|
|
+ mtspr 256,r0 # preserve all AltiVec registers
|
|
+
|
|
+ lvsl $t0,0,r8 # 0x0001..0e0f
|
|
+ #lvx_u $H2l,r8,$Htbl # load H^2
|
|
+ li r8,0x70
|
|
+ lvx_u $H2, r9,$Htbl
|
|
+ li r9,0x80
|
|
+ vspltisb $t1,8 # 0x0808..0808
|
|
+ #lvx_u $H2h,r10,$Htbl
|
|
+ li r10,0x90
|
|
+ lvx_u $H3l,r8,$Htbl # load H^3
|
|
+ li r8,0xa0
|
|
+ lvx_u $H3, r9,$Htbl
|
|
+ li r9,0xb0
|
|
+ lvx_u $H3h,r10,$Htbl
|
|
+ li r10,0xc0
|
|
+ lvx_u $H4l,r8,$Htbl # load H^4
|
|
+ li r8,0x10
|
|
+ lvx_u $H4, r9,$Htbl
|
|
+ li r9,0x20
|
|
+ lvx_u $H4h,r10,$Htbl
|
|
+ li r10,0x30
|
|
+
|
|
+ vsldoi $t2,$zero,$t1,8 # 0x0000..0808
|
|
+ vaddubm $hiperm,$t0,$t2 # 0x0001..1617
|
|
+ vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f
|
|
+
|
|
+ $SHRI $len,$len,4 # this allows to use sign bit
|
|
+ # as carry
|
|
+ lvx_u $IN0,0,$inp # load input
|
|
+ lvx_u $IN1,r8,$inp
|
|
+ subic. $len,$len,8
|
|
+ lvx_u $IN2,r9,$inp
|
|
+ lvx_u $IN3,r10,$inp
|
|
+ addi $inp,$inp,0x40
|
|
+ le?vperm $IN0,$IN0,$IN0,$lemask
|
|
+ le?vperm $IN1,$IN1,$IN1,$lemask
|
|
+ le?vperm $IN2,$IN2,$IN2,$lemask
|
|
+ le?vperm $IN3,$IN3,$IN3,$lemask
|
|
+
|
|
+ vxor $Xh,$IN0,$Xl
|
|
+
|
|
+ vpmsumd $Xl1,$IN1,$H3l
|
|
+ vpmsumd $Xm1,$IN1,$H3
|
|
+ vpmsumd $Xh1,$IN1,$H3h
|
|
+
|
|
+ vperm $H21l,$H2,$H,$hiperm
|
|
+ vperm $t0,$IN2,$IN3,$loperm
|
|
+ vperm $H21h,$H2,$H,$loperm
|
|
+ vperm $t1,$IN2,$IN3,$hiperm
|
|
+ vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
|
|
+ vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
|
|
+ vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
|
|
+ vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
|
|
+
|
|
+ vxor $Xm2,$Xm2,$Xm1
|
|
+ vxor $Xl3,$Xl3,$Xl1
|
|
+ vxor $Xm3,$Xm3,$Xm2
|
|
+ vxor $Xh3,$Xh3,$Xh1
|
|
+
|
|
+ blt Ltail_4x
|
|
+
|
|
+Loop_4x:
|
|
+ lvx_u $IN0,0,$inp
|
|
+ lvx_u $IN1,r8,$inp
|
|
+ subic. $len,$len,4
|
|
+ lvx_u $IN2,r9,$inp
|
|
+ lvx_u $IN3,r10,$inp
|
|
+ addi $inp,$inp,0x40
|
|
+ le?vperm $IN1,$IN1,$IN1,$lemask
|
|
+ le?vperm $IN2,$IN2,$IN2,$lemask
|
|
+ le?vperm $IN3,$IN3,$IN3,$lemask
|
|
+ le?vperm $IN0,$IN0,$IN0,$lemask
|
|
+
|
|
+ vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
|
|
+ vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
|
|
+ vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
|
|
+ vpmsumd $Xl1,$IN1,$H3l
|
|
+ vpmsumd $Xm1,$IN1,$H3
|
|
+ vpmsumd $Xh1,$IN1,$H3h
|
|
+
|
|
+ vxor $Xl,$Xl,$Xl3
|
|
+ vxor $Xm,$Xm,$Xm3
|
|
+ vxor $Xh,$Xh,$Xh3
|
|
+ vperm $t0,$IN2,$IN3,$loperm
|
|
+ vperm $t1,$IN2,$IN3,$hiperm
|
|
+
|
|
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
|
+ vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo
|
|
+ vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi
|
|
+
|
|
+ vsldoi $t0,$Xm,$zero,8
|
|
+ vsldoi $t1,$zero,$Xm,8
|
|
+ vxor $Xl,$Xl,$t0
|
|
+ vxor $Xh,$Xh,$t1
|
|
+
|
|
+ vsldoi $Xl,$Xl,$Xl,8
|
|
+ vxor $Xl,$Xl,$t2
|
|
+
|
|
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
|
+ vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
|
|
+ vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
|
|
+ vpmsumd $Xl,$Xl,$xC2
|
|
+
|
|
+ vxor $Xl3,$Xl3,$Xl1
|
|
+ vxor $Xh3,$Xh3,$Xh1
|
|
+ vxor $Xh,$Xh,$IN0
|
|
+ vxor $Xm2,$Xm2,$Xm1
|
|
+ vxor $Xh,$Xh,$t1
|
|
+ vxor $Xm3,$Xm3,$Xm2
|
|
+ vxor $Xh,$Xh,$Xl
|
|
+ bge Loop_4x
|
|
+
|
|
+Ltail_4x:
|
|
+ vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
|
|
+ vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
|
|
+ vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
|
|
+
|
|
+ vxor $Xl,$Xl,$Xl3
|
|
+ vxor $Xm,$Xm,$Xm3
|
|
+
|
|
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
|
|
+
|
|
+ vsldoi $t0,$Xm,$zero,8
|
|
+ vsldoi $t1,$zero,$Xm,8
|
|
+ vxor $Xh,$Xh,$Xh3
|
|
+ vxor $Xl,$Xl,$t0
|
|
+ vxor $Xh,$Xh,$t1
|
|
+
|
|
+ vsldoi $Xl,$Xl,$Xl,8
|
|
+ vxor $Xl,$Xl,$t2
|
|
+
|
|
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
|
|
+ vpmsumd $Xl,$Xl,$xC2
|
|
+ vxor $t1,$t1,$Xh
|
|
+ vxor $Xl,$Xl,$t1
|
|
+
|
|
+ addic. $len,$len,4
|
|
+ beq Ldone_4x
|
|
+
|
|
+ lvx_u $IN0,0,$inp
|
|
+ ${UCMP}i $len,2
|
|
+ li $len,-4
|
|
+ blt Lone
|
|
+ lvx_u $IN1,r8,$inp
|
|
+ beq Ltwo
|
|
+
|
|
+Lthree:
|
|
+ lvx_u $IN2,r9,$inp
|
|
+ le?vperm $IN0,$IN0,$IN0,$lemask
|
|
+ le?vperm $IN1,$IN1,$IN1,$lemask
|
|
+ le?vperm $IN2,$IN2,$IN2,$lemask
|
|
+
|
|
+ vxor $Xh,$IN0,$Xl
|
|
+ vmr $H4l,$H3l
|
|
+ vmr $H4, $H3
|
|
+ vmr $H4h,$H3h
|
|
+
|
|
+ vperm $t0,$IN1,$IN2,$loperm
|
|
+ vperm $t1,$IN1,$IN2,$hiperm
|
|
+ vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
|
|
+ vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi
|
|
+ vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
|
|
+ vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
|
|
+
|
|
+ vxor $Xm3,$Xm3,$Xm2
|
|
+ b Ltail_4x
|
|
+
|
|
+.align 4
|
|
+Ltwo:
|
|
+ le?vperm $IN0,$IN0,$IN0,$lemask
|
|
+ le?vperm $IN1,$IN1,$IN1,$lemask
|
|
+
|
|
+ vxor $Xh,$IN0,$Xl
|
|
+ vperm $t0,$zero,$IN1,$loperm
|
|
+ vperm $t1,$zero,$IN1,$hiperm
|
|
+
|
|
+ vsldoi $H4l,$zero,$H2,8
|
|
+ vmr $H4, $H2
|
|
+ vsldoi $H4h,$H2,$zero,8
|
|
+
|
|
+ vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo
|
|
+ vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi
|
|
+ vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi
|
|
+
|
|
+ b Ltail_4x
|
|
+
|
|
+.align 4
|
|
+Lone:
|
|
+ le?vperm $IN0,$IN0,$IN0,$lemask
|
|
+
|
|
+ vsldoi $H4l,$zero,$H,8
|
|
+ vmr $H4, $H
|
|
+ vsldoi $H4h,$H,$zero,8
|
|
+
|
|
+ vxor $Xh,$IN0,$Xl
|
|
+ vxor $Xl3,$Xl3,$Xl3
|
|
+ vxor $Xm3,$Xm3,$Xm3
|
|
+ vxor $Xh3,$Xh3,$Xh3
|
|
+
|
|
+ b Ltail_4x
|
|
+
|
|
+Ldone_4x:
|
|
+ le?vperm $Xl,$Xl,$Xl,$lemask
|
|
+ stvx_u $Xl,0,$Xip # write out Xi
|
|
+
|
|
+ li r10,`15+6*$SIZE_T`
|
|
+ li r11,`31+6*$SIZE_T`
|
|
+ mtspr 256,$vrsave
|
|
+ lvx v20,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v21,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v22,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v23,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v24,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v25,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v26,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v27,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v28,r10,$sp
|
|
+ addi r10,r10,32
|
|
+ lvx v29,r11,$sp
|
|
+ addi r11,r11,32
|
|
+ lvx v30,r10,$sp
|
|
+ lvx v31,r11,$sp
|
|
+ addi $sp,$sp,$FRAME
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,12,0x04,0,0x80,0,4,0
|
|
+ .long 0
|
|
+___
|
|
+}
|
|
+$code.=<<___;
|
|
+.size .gcm_ghash_p8,.-.gcm_ghash_p8
|
|
+
|
|
+.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
|
|
+.align 2
|
|
+___
|
|
+
|
|
+foreach (split("\n",$code)) {
|
|
+ s/\`([^\`]*)\`/eval $1/geo;
|
|
+
|
|
+ if ($flavour =~ /le$/o) { # little-endian
|
|
+ s/le\?//o or
|
|
+ s/be\?/#be#/o;
|
|
+ } else {
|
|
+ s/le\?/#le#/o or
|
|
+ s/be\?//o;
|
|
+ }
|
|
+ print $_,"\n";
|
|
+}
|
|
+
|
|
+close STDOUT or die "error closing STDOUT: $!"; # enforce flush
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c.inc
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c.inc
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c.inc
|
|
@@ -228,6 +228,13 @@ void CRYPTO_ghash_init(gmult_func *out_m
|
|
*out_hash = gcm_ghash_neon;
|
|
return;
|
|
}
|
|
+#elif defined(GHASH_ASM_PPC64LE)
|
|
+ if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
|
|
+ gcm_init_p8(out_table, H);
|
|
+ *out_mult = gcm_gmult_p8;
|
|
+ *out_hash = gcm_ghash_p8;
|
|
+ return;
|
|
+ }
|
|
#endif
|
|
|
|
gcm_init_nohw(out_table, H);
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
|
|
@@ -209,5 +209,15 @@ TEST(GCMTest, ABI) {
|
|
}
|
|
}
|
|
#endif
|
|
+
|
|
+#if defined(GHASH_ASM_PPC64LE)
|
|
+ if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
|
|
+ CHECK_ABI(gcm_init_p8, Htable, kH);
|
|
+ CHECK_ABI(gcm_gmult_p8, X, Htable);
|
|
+ for (size_t blocks : kBlockCounts) {
|
|
+ CHECK_ABI(gcm_ghash_p8, X, Htable, buf, 16 * blocks);
|
|
+ }
|
|
+ }
|
|
+#endif // GHASH_ASM_PPC64LE
|
|
}
|
|
#endif // SUPPORTS_ABI_TEST && !OPENSSL_NO_ASM
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
|
|
@@ -325,6 +325,13 @@ void aes_gcm_dec_kernel(const uint8_t *i
|
|
const u128 Htable[16]);
|
|
#endif
|
|
|
|
+#elif defined(OPENSSL_PPC64LE)
|
|
+#define GHASH_ASM_PPC64LE
|
|
+#define GCM_FUNCREF
|
|
+void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
|
|
+void gcm_gmult_p8(uint8_t Xi[16], const u128 Htable[16]);
|
|
+void gcm_ghash_p8(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp,
|
|
+ size_t len);
|
|
#endif
|
|
#endif // OPENSSL_NO_ASM
|
|
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/rand_extra/getrandom_fillin.h
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/rand_extra/getrandom_fillin.h
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/rand_extra/getrandom_fillin.h
|
|
@@ -30,6 +30,8 @@
|
|
#define EXPECTED_NR_getrandom 278
|
|
#elif defined(OPENSSL_ARM)
|
|
#define EXPECTED_NR_getrandom 384
|
|
+#elif defined(OPENSSL_PPC64LE)
|
|
+#define EXPECTED_NR_getrandom 359
|
|
#elif defined(OPENSSL_RISCV64)
|
|
#define EXPECTED_NR_getrandom 278
|
|
#endif
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c.inc
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c.inc
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c.inc
|
|
@@ -427,6 +427,11 @@ bcm_infallible BCM_rand_bytes_with_addit
|
|
// Take a read lock around accesses to |state->drbg|. This is needed to
|
|
// avoid returning bad entropy if we race with
|
|
// |rand_thread_state_clear_all|.
|
|
+ //
|
|
+ // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a
|
|
+ // bug on ppc64le. glibc may implement pthread locks by wrapping user code
|
|
+ // in a hardware transaction, but, on some older versions of glibc and the
|
|
+ // kernel, syscalls made with |syscall| did not abort the transaction.
|
|
CRYPTO_MUTEX_lock_read(&state->clear_drbg_lock);
|
|
#endif
|
|
if (!CTR_DRBG_reseed(&state->drbg, seed, reseed_additional_data,
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
|
|
@@ -23,6 +23,16 @@
|
|
extern "C" {
|
|
#endif
|
|
|
|
+#if defined(OPENSSL_PPC64LE)
|
|
+// POWER has an intrinsics-based implementation of SHA-1 and thus the functions
|
|
+// normally defined in assembly are available even with |OPENSSL_NO_ASM| in
|
|
+// this case.
|
|
+#define SHA1_ASM_PPC64
|
|
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *in,
|
|
+ size_t num_blocks);
|
|
+#endif
|
|
+
|
|
+
|
|
// Define SHA{n}[_{variant}]_ASM if sha{n}_block_data_order[_{variant}] is
|
|
// defined in assembly.
|
|
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/sha/sha1-altivec.c.inc
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/sha/sha1-altivec.c.inc
|
|
@@ -0,0 +1,361 @@
|
|
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
|
|
+ * All rights reserved.
|
|
+ *
|
|
+ * This package is an SSL implementation written
|
|
+ * by Eric Young (eay@cryptsoft.com).
|
|
+ * The implementation was written so as to conform with Netscapes SSL.
|
|
+ *
|
|
+ * This library is free for commercial and non-commercial use as long as
|
|
+ * the following conditions are aheared to. The following conditions
|
|
+ * apply to all code found in this distribution, be it the RC4, RSA,
|
|
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
|
|
+ * included with this distribution is covered by the same copyright terms
|
|
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
|
|
+ *
|
|
+ * Copyright remains Eric Young's, and as such any Copyright notices in
|
|
+ * the code are not to be removed.
|
|
+ * If this package is used in a product, Eric Young should be given attribution
|
|
+ * as the author of the parts of the library used.
|
|
+ * This can be in the form of a textual message at program startup or
|
|
+ * in documentation (online or textual) provided with the package.
|
|
+ *
|
|
+ * Redistribution and use in source and binary forms, with or without
|
|
+ * modification, are permitted provided that the following conditions
|
|
+ * are met:
|
|
+ * 1. Redistributions of source code must retain the copyright
|
|
+ * notice, this list of conditions and the following disclaimer.
|
|
+ * 2. Redistributions in binary form must reproduce the above copyright
|
|
+ * notice, this list of conditions and the following disclaimer in the
|
|
+ * documentation and/or other materials provided with the distribution.
|
|
+ * 3. All advertising materials mentioning features or use of this software
|
|
+ * must display the following acknowledgement:
|
|
+ * "This product includes cryptographic software written by
|
|
+ * Eric Young (eay@cryptsoft.com)"
|
|
+ * The word 'cryptographic' can be left out if the rouines from the library
|
|
+ * being used are not cryptographic related :-).
|
|
+ * 4. If you include any Windows specific code (or a derivative thereof) from
|
|
+ * the apps directory (application code) you must include an acknowledgement:
|
|
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
|
|
+ *
|
|
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
|
|
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
+ * SUCH DAMAGE.
|
|
+ *
|
|
+ * The licence and distribution terms for any publically available version or
|
|
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
|
|
+ * copied and put under another distribution licence
|
|
+ * [including the GNU Public Licence.] */
|
|
+
|
|
+// Altivec-optimized SHA1 in C. This is tested on ppc64le only.
|
|
+//
|
|
+// References:
|
|
+// https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
|
|
+// http://arctic.org/~dean/crypto/sha1.html
|
|
+//
|
|
+// This code used the generic SHA-1 from OpenSSL as a basis and AltiVec
|
|
+// optimisations were added on top.
|
|
+
|
|
+#include <openssl/sha.h>
|
|
+
|
|
+#if defined(OPENSSL_PPC64LE)
|
|
+
|
|
+#include <altivec.h>
|
|
+
|
|
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *data, size_t num);
|
|
+
|
|
+static uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); }
|
|
+
|
|
+typedef vector unsigned int vec_uint32_t;
|
|
+typedef vector unsigned char vec_uint8_t;
|
|
+
|
|
+// Vector constants
|
|
+static const vec_uint8_t k_swap_endianness = {3, 2, 1, 0, 7, 6, 5, 4,
|
|
+ 11, 10, 9, 8, 15, 14, 13, 12};
|
|
+
|
|
+// Shift amounts for byte and bit shifts and rotations
|
|
+static const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32,
|
|
+ 32, 32, 32, 32, 32, 32, 32, 32};
|
|
+static const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96,
|
|
+ 96, 96, 96, 96, 96, 96, 96, 96};
|
|
+
|
|
+#define K_00_19 0x5a827999UL
|
|
+#define K_20_39 0x6ed9eba1UL
|
|
+#define K_40_59 0x8f1bbcdcUL
|
|
+#define K_60_79 0xca62c1d6UL
|
|
+
|
|
+// Vector versions of the above.
|
|
+static const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19};
|
|
+static const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39};
|
|
+static const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59};
|
|
+static const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79};
|
|
+
|
|
+// vector message scheduling: compute message schedule for round i..i+3 where i
|
|
+// is divisible by 4. We return the schedule w[i..i+3] as a vector. In
|
|
+// addition, we also precompute sum w[i..+3] and an additive constant K. This
|
|
+// is done to offload some computation of f() in the integer execution units.
|
|
+//
|
|
+// Byte shifting code below may not be correct for big-endian systems.
|
|
+static vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data,
|
|
+ vec_uint32_t k) {
|
|
+ const vector unsigned char unaligned_data =
|
|
+ vec_vsx_ld(0, (const unsigned char*) data);
|
|
+ const vec_uint32_t v = (vec_uint32_t) unaligned_data;
|
|
+ const vec_uint32_t w = vec_perm(v, v, k_swap_endianness);
|
|
+ vec_st(w + k, 0, pre_added);
|
|
+ return w;
|
|
+}
|
|
+
|
|
+// Compute w[i..i+3] using these steps for i in [16, 20, 24, 28]
|
|
+//
|
|
+// w'[i ] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1
|
|
+// w'[i+1] = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1
|
|
+// w'[i+2] = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1
|
|
+// w'[i+3] = ( 0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1
|
|
+//
|
|
+// w[ i] = w'[ i]
|
|
+// w[i+1] = w'[i+1]
|
|
+// w[i+2] = w'[i+2]
|
|
+// w[i+3] = w'[i+3] ^ (w'[i] <<< 1)
|
|
+static vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4,
|
|
+ vec_uint32_t minus_8, vec_uint32_t minus_12,
|
|
+ vec_uint32_t minus_16, vec_uint32_t k) {
|
|
+ const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes);
|
|
+ const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8);
|
|
+ const vec_uint32_t k_1_bit = vec_splat_u32(1);
|
|
+ const vec_uint32_t w_prime =
|
|
+ vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit);
|
|
+ const vec_uint32_t w =
|
|
+ w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit);
|
|
+ vec_st(w + k, 0, pre_added);
|
|
+ return w;
|
|
+}
|
|
+
|
|
+// Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76]
|
|
+// w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2
|
|
+static vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4,
|
|
+ vec_uint32_t minus_8, vec_uint32_t minus_16,
|
|
+ vec_uint32_t minus_28, vec_uint32_t minus_32,
|
|
+ vec_uint32_t k) {
|
|
+ const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8);
|
|
+ const vec_uint32_t k_2_bits = vec_splat_u32(2);
|
|
+ const vec_uint32_t w =
|
|
+ vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits);
|
|
+ vec_st(w + k, 0, pre_added);
|
|
+ return w;
|
|
+}
|
|
+
|
|
+// As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified
|
|
+// to the code in F_00_19. Wei attributes these optimisations to Peter
|
|
+// Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define
|
|
+// F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) I've just become aware of another
|
|
+// tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a
|
|
+#define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d))
|
|
+#define F_20_39(b, c, d) ((b) ^ (c) ^ (d))
|
|
+#define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d)))
|
|
+#define F_60_79(b, c, d) F_20_39(b, c, d)
|
|
+
|
|
+// We pre-added the K constants during message scheduling.
|
|
+#define BODY_00_19(i, a, b, c, d, e, f) \
|
|
+ do { \
|
|
+ (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \
|
|
+ (b) = rotate((b), 30); \
|
|
+ } while (0)
|
|
+
|
|
+#define BODY_20_39(i, a, b, c, d, e, f) \
|
|
+ do { \
|
|
+ (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \
|
|
+ (b) = rotate((b), 30); \
|
|
+ } while (0)
|
|
+
|
|
+#define BODY_40_59(i, a, b, c, d, e, f) \
|
|
+ do { \
|
|
+ (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \
|
|
+ (b) = rotate((b), 30); \
|
|
+ } while (0)
|
|
+
|
|
+#define BODY_60_79(i, a, b, c, d, e, f) \
|
|
+ do { \
|
|
+ (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \
|
|
+ (b) = rotate((b), 30); \
|
|
+ } while (0)
|
|
+
|
|
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *data, size_t num) {
|
|
+ uint32_t A, B, C, D, E, T;
|
|
+
|
|
+ A = state[0];
|
|
+ B = state[1];
|
|
+ C = state[2];
|
|
+ D = state[3];
|
|
+ E = state[4];
|
|
+
|
|
+ for (;;) {
|
|
+ vec_uint32_t vw[20];
|
|
+ const uint32_t *w = (const uint32_t *)&vw;
|
|
+
|
|
+ vec_uint32_t k = K_00_19_x_4;
|
|
+ const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k);
|
|
+ BODY_00_19(0, A, B, C, D, E, T);
|
|
+ BODY_00_19(1, T, A, B, C, D, E);
|
|
+ BODY_00_19(2, E, T, A, B, C, D);
|
|
+ BODY_00_19(3, D, E, T, A, B, C);
|
|
+
|
|
+ const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k);
|
|
+ BODY_00_19(4, C, D, E, T, A, B);
|
|
+ BODY_00_19(5, B, C, D, E, T, A);
|
|
+ BODY_00_19(6, A, B, C, D, E, T);
|
|
+ BODY_00_19(7, T, A, B, C, D, E);
|
|
+
|
|
+ const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k);
|
|
+ BODY_00_19(8, E, T, A, B, C, D);
|
|
+ BODY_00_19(9, D, E, T, A, B, C);
|
|
+ BODY_00_19(10, C, D, E, T, A, B);
|
|
+ BODY_00_19(11, B, C, D, E, T, A);
|
|
+
|
|
+ const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k);
|
|
+ BODY_00_19(12, A, B, C, D, E, T);
|
|
+ BODY_00_19(13, T, A, B, C, D, E);
|
|
+ BODY_00_19(14, E, T, A, B, C, D);
|
|
+ BODY_00_19(15, D, E, T, A, B, C);
|
|
+
|
|
+ const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k);
|
|
+ BODY_00_19(16, C, D, E, T, A, B);
|
|
+ BODY_00_19(17, B, C, D, E, T, A);
|
|
+ BODY_00_19(18, A, B, C, D, E, T);
|
|
+ BODY_00_19(19, T, A, B, C, D, E);
|
|
+
|
|
+ k = K_20_39_x_4;
|
|
+ const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k);
|
|
+ BODY_20_39(20, E, T, A, B, C, D);
|
|
+ BODY_20_39(21, D, E, T, A, B, C);
|
|
+ BODY_20_39(22, C, D, E, T, A, B);
|
|
+ BODY_20_39(23, B, C, D, E, T, A);
|
|
+
|
|
+ const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k);
|
|
+ BODY_20_39(24, A, B, C, D, E, T);
|
|
+ BODY_20_39(25, T, A, B, C, D, E);
|
|
+ BODY_20_39(26, E, T, A, B, C, D);
|
|
+ BODY_20_39(27, D, E, T, A, B, C);
|
|
+
|
|
+ const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k);
|
|
+ BODY_20_39(28, C, D, E, T, A, B);
|
|
+ BODY_20_39(29, B, C, D, E, T, A);
|
|
+ BODY_20_39(30, A, B, C, D, E, T);
|
|
+ BODY_20_39(31, T, A, B, C, D, E);
|
|
+
|
|
+ const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k);
|
|
+ BODY_20_39(32, E, T, A, B, C, D);
|
|
+ BODY_20_39(33, D, E, T, A, B, C);
|
|
+ BODY_20_39(34, C, D, E, T, A, B);
|
|
+ BODY_20_39(35, B, C, D, E, T, A);
|
|
+
|
|
+ const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k);
|
|
+ BODY_20_39(36, A, B, C, D, E, T);
|
|
+ BODY_20_39(37, T, A, B, C, D, E);
|
|
+ BODY_20_39(38, E, T, A, B, C, D);
|
|
+ BODY_20_39(39, D, E, T, A, B, C);
|
|
+
|
|
+ k = K_40_59_x_4;
|
|
+ const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k);
|
|
+ BODY_40_59(40, C, D, E, T, A, B);
|
|
+ BODY_40_59(41, B, C, D, E, T, A);
|
|
+ BODY_40_59(42, A, B, C, D, E, T);
|
|
+ BODY_40_59(43, T, A, B, C, D, E);
|
|
+
|
|
+ const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k);
|
|
+ BODY_40_59(44, E, T, A, B, C, D);
|
|
+ BODY_40_59(45, D, E, T, A, B, C);
|
|
+ BODY_40_59(46, C, D, E, T, A, B);
|
|
+ BODY_40_59(47, B, C, D, E, T, A);
|
|
+
|
|
+ const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k);
|
|
+ BODY_40_59(48, A, B, C, D, E, T);
|
|
+ BODY_40_59(49, T, A, B, C, D, E);
|
|
+ BODY_40_59(50, E, T, A, B, C, D);
|
|
+ BODY_40_59(51, D, E, T, A, B, C);
|
|
+
|
|
+ const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k);
|
|
+ BODY_40_59(52, C, D, E, T, A, B);
|
|
+ BODY_40_59(53, B, C, D, E, T, A);
|
|
+ BODY_40_59(54, A, B, C, D, E, T);
|
|
+ BODY_40_59(55, T, A, B, C, D, E);
|
|
+
|
|
+ const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k);
|
|
+ BODY_40_59(56, E, T, A, B, C, D);
|
|
+ BODY_40_59(57, D, E, T, A, B, C);
|
|
+ BODY_40_59(58, C, D, E, T, A, B);
|
|
+ BODY_40_59(59, B, C, D, E, T, A);
|
|
+
|
|
+ k = K_60_79_x_4;
|
|
+ const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k);
|
|
+ BODY_60_79(60, A, B, C, D, E, T);
|
|
+ BODY_60_79(61, T, A, B, C, D, E);
|
|
+ BODY_60_79(62, E, T, A, B, C, D);
|
|
+ BODY_60_79(63, D, E, T, A, B, C);
|
|
+
|
|
+ const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k);
|
|
+ BODY_60_79(64, C, D, E, T, A, B);
|
|
+ BODY_60_79(65, B, C, D, E, T, A);
|
|
+ BODY_60_79(66, A, B, C, D, E, T);
|
|
+ BODY_60_79(67, T, A, B, C, D, E);
|
|
+
|
|
+ const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k);
|
|
+ BODY_60_79(68, E, T, A, B, C, D);
|
|
+ BODY_60_79(69, D, E, T, A, B, C);
|
|
+ BODY_60_79(70, C, D, E, T, A, B);
|
|
+ BODY_60_79(71, B, C, D, E, T, A);
|
|
+
|
|
+ const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k);
|
|
+ BODY_60_79(72, A, B, C, D, E, T);
|
|
+ BODY_60_79(73, T, A, B, C, D, E);
|
|
+ BODY_60_79(74, E, T, A, B, C, D);
|
|
+ BODY_60_79(75, D, E, T, A, B, C);
|
|
+
|
|
+ // We don't use the last value
|
|
+ (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k);
|
|
+ BODY_60_79(76, C, D, E, T, A, B);
|
|
+ BODY_60_79(77, B, C, D, E, T, A);
|
|
+ BODY_60_79(78, A, B, C, D, E, T);
|
|
+ BODY_60_79(79, T, A, B, C, D, E);
|
|
+
|
|
+ const uint32_t mask = 0xffffffffUL;
|
|
+ state[0] = (state[0] + E) & mask;
|
|
+ state[1] = (state[1] + T) & mask;
|
|
+ state[2] = (state[2] + A) & mask;
|
|
+ state[3] = (state[3] + B) & mask;
|
|
+ state[4] = (state[4] + C) & mask;
|
|
+
|
|
+ data += 64;
|
|
+ if (--num == 0) {
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ A = state[0];
|
|
+ B = state[1];
|
|
+ C = state[2];
|
|
+ D = state[3];
|
|
+ E = state[4];
|
|
+ }
|
|
+}
|
|
+
|
|
+#endif // OPENSSL_PPC64LE
|
|
+
|
|
+#undef K_00_19
|
|
+#undef K_20_39
|
|
+#undef K_40_59
|
|
+#undef K_60_79
|
|
+#undef F_00_19
|
|
+#undef F_20_39
|
|
+#undef F_40_59
|
|
+#undef F_60_79
|
|
+#undef BODY_00_19
|
|
+#undef BODY_20_39
|
|
+#undef BODY_40_59
|
|
+#undef BODY_60_79
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/internal.h
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/internal.h
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/internal.h
|
|
@@ -183,8 +183,9 @@ extern "C" {
|
|
#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \
|
|
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
|
|
defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
|
|
-// x86, x86_64, and the ARMs need to record the result of a cpuid/getauxval call
|
|
-// for the asm to work correctly, unless compiled without asm code.
|
|
+// x86, x86_64, the ARMs, and ppc64le need to record the result of a
|
|
+// cpuid/getauxval call for the asm to work correctly, unless compiled without
|
|
+// asm code.
|
|
#define NEED_CPUID
|
|
|
|
// OPENSSL_cpuid_setup initializes the platform-specific feature cache. This
|
|
@@ -1723,6 +1724,16 @@ OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA51
|
|
|
|
#endif // OPENSSL_ARM || OPENSSL_AARCH64
|
|
|
|
+#if defined(OPENSSL_PPC64LE)
|
|
+
|
|
+// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports
|
|
+// the Vector.AES category of instructions.
|
|
+int CRYPTO_is_PPC64LE_vcrypto_capable(void);
|
|
+
|
|
+extern unsigned long OPENSSL_ppc64le_hwcap2;
|
|
+
|
|
+#endif // OPENSSL_PPC64LE
|
|
+
|
|
#if defined(BORINGSSL_DISPATCH_TEST)
|
|
// Runtime CPU dispatch testing support
|
|
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/perlasm/ppc-xlate.pl
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/perlasm/ppc-xlate.pl
|
|
@@ -0,0 +1,320 @@
|
|
+#! /usr/bin/env perl
|
|
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
|
|
+#
|
|
+# Licensed under the OpenSSL license (the "License"). You may not use
|
|
+# this file except in compliance with the License. You can obtain a copy
|
|
+# in the file LICENSE in the source distribution or at
|
|
+# https://www.openssl.org/source/license.html
|
|
+
|
|
+my $flavour = shift;
|
|
+my $output = shift;
|
|
+open STDOUT,">$output" || die "can't open $output: $!";
|
|
+
|
|
+my %GLOBALS;
|
|
+my %TYPES;
|
|
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
|
|
+
|
|
+################################################################
|
|
+# directives which need special treatment on different platforms
|
|
+################################################################
|
|
+my $type = sub {
|
|
+ my ($dir,$name,$type) = @_;
|
|
+
|
|
+ $TYPES{$name} = $type;
|
|
+ if ($flavour =~ /linux/) {
|
|
+ $name =~ s|^\.||;
|
|
+ ".type $name,$type";
|
|
+ } else {
|
|
+ "";
|
|
+ }
|
|
+};
|
|
+my $globl = sub {
|
|
+ my $junk = shift;
|
|
+ my $name = shift;
|
|
+ my $global = \$GLOBALS{$name};
|
|
+ my $type = \$TYPES{$name};
|
|
+ my $ret;
|
|
+
|
|
+ $name =~ s|^\.||;
|
|
+
|
|
+ SWITCH: for ($flavour) {
|
|
+ /aix/ && do { if (!$$type) {
|
|
+ $$type = "\@function";
|
|
+ }
|
|
+ if ($$type =~ /function/) {
|
|
+ $name = ".$name";
|
|
+ }
|
|
+ last;
|
|
+ };
|
|
+ /osx/ && do { $name = "_$name";
|
|
+ last;
|
|
+ };
|
|
+ /linux.*(32|64le)/
|
|
+ && do { $ret .= ".globl $name";
|
|
+ if (!$$type) {
|
|
+ $ret .= "\n.type $name,\@function";
|
|
+ $$type = "\@function";
|
|
+ }
|
|
+ last;
|
|
+ };
|
|
+ /linux.*64/ && do { $ret .= ".globl $name";
|
|
+ if (!$$type) {
|
|
+ $ret .= "\n.type $name,\@function";
|
|
+ $$type = "\@function";
|
|
+ }
|
|
+ if ($$type =~ /function/) {
|
|
+ $ret .= "\n.section \".opd\",\"aw\"";
|
|
+ $ret .= "\n.align 3";
|
|
+ $ret .= "\n$name:";
|
|
+ $ret .= "\n.quad .$name,.TOC.\@tocbase,0";
|
|
+ $ret .= "\n.previous";
|
|
+ $name = ".$name";
|
|
+ }
|
|
+ last;
|
|
+ };
|
|
+ }
|
|
+
|
|
+ $ret = ".globl $name" if (!$ret);
|
|
+ $$global = $name;
|
|
+ $ret;
|
|
+};
|
|
+my $text = sub {
|
|
+ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
|
|
+ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
|
|
+ $ret;
|
|
+};
|
|
+my $machine = sub {
|
|
+ my $junk = shift;
|
|
+ my $arch = shift;
|
|
+ if ($flavour =~ /osx/)
|
|
+ { $arch =~ s/\"//g;
|
|
+ $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
|
|
+ }
|
|
+ ".machine $arch";
|
|
+};
|
|
+my $size = sub {
|
|
+ if ($flavour =~ /linux/)
|
|
+ { shift;
|
|
+ my $name = shift;
|
|
+ my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name;
|
|
+ my $ret = ".size $$real,.-$$real";
|
|
+ $name =~ s|^\.||;
|
|
+ if ($$real ne $name) {
|
|
+ $ret .= "\n.size $name,.-$$real";
|
|
+ }
|
|
+ $ret;
|
|
+ }
|
|
+ else
|
|
+ { ""; }
|
|
+};
|
|
+my $asciz = sub {
|
|
+ shift;
|
|
+ my $line = join(",",@_);
|
|
+ if ($line =~ /^"(.*)"$/)
|
|
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
|
|
+ else
|
|
+ { ""; }
|
|
+};
|
|
+my $quad = sub {
|
|
+ shift;
|
|
+ my @ret;
|
|
+ my ($hi,$lo);
|
|
+ for (@_) {
|
|
+ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
|
|
+ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
|
|
+ elsif (/^([0-9]+)$/o)
|
|
+ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
|
|
+ else
|
|
+ { $hi=undef; $lo=$_; }
|
|
+
|
|
+ if (defined($hi))
|
|
+ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
|
|
+ else
|
|
+ { push(@ret,".quad $lo"); }
|
|
+ }
|
|
+ join("\n",@ret);
|
|
+};
|
|
+
|
|
+################################################################
|
|
+# simplified mnemonics not handled by at least one assembler
|
|
+################################################################
|
|
+my $cmplw = sub {
|
|
+ my $f = shift;
|
|
+ my $cr = 0; $cr = shift if ($#_>1);
|
|
+ # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
|
|
+ ($flavour =~ /linux.*32/) ?
|
|
+ " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
|
|
+ " cmplw ".join(',',$cr,@_);
|
|
+};
|
|
+my $bdnz = sub {
|
|
+ my $f = shift;
|
|
+ my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
|
|
+ " bc $bo,0,".shift;
|
|
+} if ($flavour!~/linux/);
|
|
+my $bltlr = sub {
|
|
+ my $f = shift;
|
|
+ my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
|
|
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
|
|
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
|
|
+ " bclr $bo,0";
|
|
+};
|
|
+my $bnelr = sub {
|
|
+ my $f = shift;
|
|
+ my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
|
|
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
|
|
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
|
|
+ " bclr $bo,2";
|
|
+};
|
|
+my $beqlr = sub {
|
|
+ my $f = shift;
|
|
+ my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
|
|
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
|
|
+ " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
|
|
+ " bclr $bo,2";
|
|
+};
|
|
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
|
|
+# arguments is 64, with "operand out of range" error.
|
|
+my $extrdi = sub {
|
|
+ my ($f,$ra,$rs,$n,$b) = @_;
|
|
+ $b = ($b+$n)&63; $n = 64-$n;
|
|
+ " rldicl $ra,$rs,$b,$n";
|
|
+};
|
|
+my $vmr = sub {
|
|
+ my ($f,$vx,$vy) = @_;
|
|
+ " vor $vx,$vy,$vy";
|
|
+};
|
|
+
|
|
+# Some ABIs specify vrsave, special-purpose register #256, as reserved
|
|
+# for system use.
|
|
+my $no_vrsave = ($flavour =~ /aix|linux64le/);
|
|
+my $mtspr = sub {
|
|
+ my ($f,$idx,$ra) = @_;
|
|
+ if ($idx == 256 && $no_vrsave) {
|
|
+ " or $ra,$ra,$ra";
|
|
+ } else {
|
|
+ " mtspr $idx,$ra";
|
|
+ }
|
|
+};
|
|
+my $mfspr = sub {
|
|
+ my ($f,$rd,$idx) = @_;
|
|
+ if ($idx == 256 && $no_vrsave) {
|
|
+ " li $rd,-1";
|
|
+ } else {
|
|
+ " mfspr $rd,$idx";
|
|
+ }
|
|
+};
|
|
+
|
|
+# PowerISA 2.06 stuff
|
|
+sub vsxmem_op {
|
|
+ my ($f, $vrt, $ra, $rb, $op) = @_;
|
|
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
|
|
+}
|
|
+# made-up unaligned memory reference AltiVec/VMX instructions
|
|
+my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
|
|
+my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
|
|
+my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
|
|
+my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
|
|
+my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
|
|
+my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
|
|
+
|
|
+# PowerISA 2.07 stuff
|
|
+sub vcrypto_op {
|
|
+ my ($f, $vrt, $vra, $vrb, $op) = @_;
|
|
+ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
|
|
+}
|
|
+my $vcipher = sub { vcrypto_op(@_, 1288); };
|
|
+my $vcipherlast = sub { vcrypto_op(@_, 1289); };
|
|
+my $vncipher = sub { vcrypto_op(@_, 1352); };
|
|
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
|
|
+my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
|
|
+my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
|
|
+my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
|
|
+my $vpmsumb = sub { vcrypto_op(@_, 1032); };
|
|
+my $vpmsumd = sub { vcrypto_op(@_, 1224); };
|
|
+my $vpmsubh = sub { vcrypto_op(@_, 1096); };
|
|
+my $vpmsumw = sub { vcrypto_op(@_, 1160); };
|
|
+my $vaddudm = sub { vcrypto_op(@_, 192); };
|
|
+
|
|
+my $mtsle = sub {
|
|
+ my ($f, $arg) = @_;
|
|
+ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
|
|
+};
|
|
+
|
|
+# PowerISA 3.0 stuff
|
|
+my $maddhdu = sub {
|
|
+ my ($f, $rt, $ra, $rb, $rc) = @_;
|
|
+ " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49;
|
|
+};
|
|
+my $maddld = sub {
|
|
+ my ($f, $rt, $ra, $rb, $rc) = @_;
|
|
+ " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51;
|
|
+};
|
|
+
|
|
+my $darn = sub {
|
|
+ my ($f, $rt, $l) = @_;
|
|
+ " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1);
|
|
+};
|
|
+
|
|
+print <<___;
|
|
+// This file is generated from a similarly-named Perl script in the BoringSSL
|
|
+// source tree. Do not edit by hand.
|
|
+
|
|
+#if defined(__has_feature)
|
|
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
|
|
+#define OPENSSL_NO_ASM
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__) && defined(__ELF__)
|
|
+___
|
|
+
|
|
+while($line=<>) {
|
|
+
|
|
+ $line =~ s|[#!;].*$||; # get rid of asm-style comments...
|
|
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
|
|
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
|
|
+ $line =~ s|\s+$||; # ... and at the end
|
|
+
|
|
+ {
|
|
+ $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel
|
|
+ $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
|
|
+ }
|
|
+
|
|
+ {
|
|
+ $line =~ s|(^[\.\w]+)\:\s*||;
|
|
+ my $label = $1;
|
|
+ if ($label) {
|
|
+ my $xlated = ($GLOBALS{$label} or $label);
|
|
+ print "$xlated:";
|
|
+ if ($flavour =~ /linux.*64le/) {
|
|
+ if ($TYPES{$label} =~ /function/) {
|
|
+ printf "\n.localentry %s,0\n",$xlated;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ {
|
|
+ $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
|
|
+ my $c = $1; $c = "\t" if ($c eq "");
|
|
+ my $mnemonic = $2;
|
|
+ my $f = $3;
|
|
+ my $opcode = eval("\$$mnemonic");
|
|
+ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
|
|
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
|
|
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
|
|
+ }
|
|
+
|
|
+ print $line if ($line);
|
|
+ print "\n";
|
|
+}
|
|
+
|
|
+print <<___;
|
|
+#endif // !OPENSSL_NO_ASM && __powerpc64__ && __ELF__
|
|
+#if defined(__ELF__)
|
|
+// See https://www.airs.com/blog/archives/518.
|
|
+.section .note.GNU-stack,"",\%progbits
|
|
+#endif
|
|
+___
|
|
+
|
|
+close STDOUT or die "error closing STDOUT: $!";
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/test/abi_test.h
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/test/abi_test.h
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/test/abi_test.h
|
|
@@ -179,7 +179,78 @@ struct alignas(16) Reg128 {
|
|
CALLER_STATE_REGISTER(uint64_t, x28) \
|
|
CALLER_STATE_REGISTER(uint64_t, x29)
|
|
|
|
-#endif // X86_64 || X86 || ARM || AARCH64
|
|
+#elif defined(OPENSSL_PPC64LE)
|
|
+
|
|
+// CRReg only compares the CR2-CR4 bits of a CR register.
|
|
+struct CRReg {
|
|
+ uint32_t masked() const { return value & 0x00fff000; }
|
|
+ bool operator==(CRReg r) const { return masked() == r.masked(); }
|
|
+ bool operator!=(CRReg r) const { return masked() != r.masked(); }
|
|
+ uint32_t value;
|
|
+};
|
|
+
|
|
+// References:
|
|
+// ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
|
|
+//
|
|
+// Note vector and floating-point registers on POWER have two different names.
|
|
+// Originally, there were 32 floating-point registers and 32 vector registers,
|
|
+// labelled f0-f31 and v0-v31 respectively. Later, VSX (Vector Scalar Extension)
|
|
+// unified them into 64 registers vs0-vs63. f0-f31 map to the lower halves of
|
|
+// vs0-vs31. v0-v31 map to vs32-vs63. The ABI was defined in terms of pre-VSX
|
|
+// names, so we use those names here. In particular, f14-f31 are
|
|
+// callee-saved, but the upper halves of vs14-vs31 are not.
|
|
+#define LOOP_CALLER_STATE_REGISTERS() \
|
|
+ CALLER_STATE_REGISTER(Reg128, v20) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v21) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v22) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v23) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v24) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v25) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v26) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v27) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v28) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v29) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v30) \
|
|
+ CALLER_STATE_REGISTER(Reg128, v31) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r14) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r15) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r16) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r17) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r18) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r19) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r20) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r21) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r22) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r23) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r24) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r25) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r26) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r27) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r28) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r29) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r30) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, r31) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f14) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f15) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f16) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f17) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f18) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f19) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f20) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f21) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f22) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f23) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f24) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f25) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f26) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f27) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f28) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f29) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f30) \
|
|
+ CALLER_STATE_REGISTER(uint64_t, f31) \
|
|
+ CALLER_STATE_REGISTER(CRReg, cr)
|
|
+
|
|
+#endif // X86_64 || X86 || ARM || AARCH64 || PPC64LE
|
|
|
|
// Enable ABI testing if all of the following are true.
|
|
//
|
|
@@ -231,6 +302,12 @@ inline crypto_word_t ToWord(T t) {
|
|
// on 32-bit architectures for simplicity.
|
|
static_assert(sizeof(T) == 4, "parameter types must be word-sized");
|
|
return (crypto_word_t)t;
|
|
+#elif defined(OPENSSL_PPC64LE)
|
|
+ // ELFv2, section 2.2.2.3 says the parameter save area sign- or zero-extends
|
|
+ // parameters passed in memory. Section 2.2.3 is unclear on how to handle
|
|
+ // register parameters, but section 2.2.2.3 additionally says that the memory
|
|
+ // copy of a parameter is identical to the register one.
|
|
+ return (crypto_word_t)t;
|
|
#elif defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)
|
|
// AAPCS64, section 5.4.2, clauses C.7 and C.14 says any remaining bits in
|
|
// aarch are unspecified. iOS64 contradicts this and says the callee extends
|
|
@@ -285,9 +362,9 @@ inline crypto_word_t ToWord(T t) {
|
|
template <typename R, typename... Args>
|
|
inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...),
|
|
typename DeductionGuard<Args>::Type... args) {
|
|
- // We only support up to 8 arguments, so all arguments on aarch64 are passed
|
|
- // in registers. This is simpler and avoids the iOS discrepancy around packing
|
|
- // small arguments on the stack. (See the iOS64 reference.)
|
|
+ // We only support up to 8 arguments, so all arguments on aarch64 and ppc64le
|
|
+ // are passed in registers. This is simpler and avoids the iOS discrepancy
|
|
+ // around packing small arguments on the stack. (See the iOS64 reference.)
|
|
static_assert(sizeof...(args) <= 8,
|
|
"too many arguments for abi_test_trampoline");
|
|
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/test/asm/trampoline-ppc.pl
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/test/asm/trampoline-ppc.pl
|
|
@@ -0,0 +1,262 @@
|
|
+#!/usr/bin/env perl
|
|
+# Copyright (c) 2019, Google Inc.
|
|
+#
|
|
+# Permission to use, copy, modify, and/or distribute this software for any
|
|
+# purpose with or without fee is hereby granted, provided that the above
|
|
+# copyright notice and this permission notice appear in all copies.
|
|
+#
|
|
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
+
|
|
+# This file defines helper functions for crypto/test/abi_test.h on ppc64le. See
|
|
+# that header for details on how to use this.
|
|
+#
|
|
+# For convenience, this file is linked into libcrypto, where consuming builds
|
|
+# already support architecture-specific sources. The static linker should drop
|
|
+# this code in non-test binaries. This includes a shared library build of
|
|
+# libcrypto, provided --gc-sections or equivalent is used.
|
|
+#
|
|
+# References:
|
|
+#
|
|
+# ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
|
|
+
|
|
+use strict;
|
|
+
|
|
+my $flavour = shift;
|
|
+my $output = shift;
|
|
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
|
+
|
|
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
|
|
+my $dir = $1;
|
|
+my $xlate;
|
|
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
|
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
|
+die "can't locate ppc-xlate.pl";
|
|
+
|
|
+open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
|
+*STDOUT = *OUT;
|
|
+
|
|
+unless ($flavour =~ /linux.*64le/) {
|
|
+ die "This file only supports the ELFv2 ABI, used by ppc64le";
|
|
+}
|
|
+
|
|
+my $code = "";
|
|
+
|
|
+sub load_or_store_regs {
|
|
+ # $op is "l" or "st".
|
|
+ my ($op, $base_reg, $base_offset) = @_;
|
|
+ # Vector registers.
|
|
+ foreach (20..31) {
|
|
+ my $offset = $base_offset + ($_ - 20) * 16;
|
|
+ # Vector registers only support indexed register addressing.
|
|
+ $code .= "\tli\tr11, $offset\n";
|
|
+ $code .= "\t${op}vx\tv$_, r11, $base_reg\n";
|
|
+ }
|
|
+ # Save general registers.
|
|
+ foreach (14..31) {
|
|
+ my $offset = $base_offset + 192 + ($_ - 14) * 8;
|
|
+ $code .= "\t${op}d\tr$_, $offset($base_reg)\n";
|
|
+ }
|
|
+ # Save floating point registers.
|
|
+ foreach (14..31) {
|
|
+ my $offset = $base_offset + 336 + ($_ - 14) * 8;
|
|
+ $code .= "\t${op}fd\tf$_, $offset($base_reg)\n";
|
|
+ }
|
|
+}
|
|
+
|
|
+sub load_regs {
|
|
+ my ($base_reg, $base_offset) = @_;
|
|
+ load_or_store_regs("l", $base_reg, $base_offset);
|
|
+}
|
|
+
|
|
+sub store_regs {
|
|
+ my ($base_reg, $base_offset) = @_;
|
|
+ load_or_store_regs("st", $base_reg, $base_offset);
|
|
+}
|
|
+
|
|
+my ($func, $state, $argv, $argc) = ("r3", "r4", "r5", "r6");
|
|
+$code .= <<____;
|
|
+.machine "any"
|
|
+.text
|
|
+
|
|
+# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
|
+# with |argv|, then saves the callee-saved registers into |state|. It returns
|
|
+# the result of |func|. The |unwind| argument is unused.
|
|
+# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
|
+# const uint64_t *argv, size_t argc,
|
|
+# uint64_t unwind);
|
|
+.globl abi_test_trampoline
|
|
+.align 5
|
|
+abi_test_trampoline:
|
|
+ # LR is saved into the caller's stack frame.
|
|
+ mflr r0
|
|
+ std r0, 16(r1)
|
|
+
|
|
+ # Allocate 66*8 = 528 bytes of stack frame. From the top of the stack
|
|
+ # to the bottom, the stack frame is:
|
|
+ #
|
|
+ # 0(r1) - Back chain pointer
|
|
+ # 8(r1) - CR save area
|
|
+ # 16(r1) - LR save area (for |func|)
|
|
+ # 24(r1) - TOC pointer save area
|
|
+ # 32(r1) - Saved copy of |state|
|
|
+ # 40(r1) - Padding
|
|
+ # 48(r1) - Vector register save area (v20-v31, 12 registers)
|
|
+ # 240(r1) - General register save area (r14-r31, 18 registers)
|
|
+ # 384(r1) - Floating point register save area (f14-f31, 18 registers)
|
|
+ #
|
|
+ # Note the layouts of the register save areas and CallerState match.
|
|
+ #
|
|
+ # In the ELFv2 ABI, the parameter save area is optional if the function
|
|
+ # is non-variadic and all parameters fit in registers. We only support
|
|
+ # such functions, so we omit it to test that |func| does not rely on it.
|
|
+ stdu r1, -528(r1)
|
|
+
|
|
+ mfcr r0
|
|
+ std r0, 8(r1) # Save CR
|
|
+ std r2, 24(r1) # Save TOC
|
|
+ std $state, 32(r1) # Save |state|
|
|
+____
|
|
+# Save registers to the stack.
|
|
+store_regs("r1", 48);
|
|
+# Load registers from the caller.
|
|
+load_regs($state, 0);
|
|
+$code .= <<____;
|
|
+ # Load CR from |state|.
|
|
+ ld r0, 480($state)
|
|
+ mtcr r0
|
|
+
|
|
+ # Move parameters into temporary registers so they are not clobbered.
|
|
+ addi r11, $argv, -8 # Adjust for ldu below
|
|
+ mr r12, $func
|
|
+
|
|
+ # Load parameters into registers.
|
|
+ cmpdi $argc, 0
|
|
+ beq .Largs_done
|
|
+ mtctr $argc
|
|
+ ldu r3, 8(r11)
|
|
+ bdz .Largs_done
|
|
+ ldu r4, 8(r11)
|
|
+ bdz .Largs_done
|
|
+ ldu r5, 8(r11)
|
|
+ bdz .Largs_done
|
|
+ ldu r6, 8(r11)
|
|
+ bdz .Largs_done
|
|
+ ldu r7, 8(r11)
|
|
+ bdz .Largs_done
|
|
+ ldu r8, 8(r11)
|
|
+ bdz .Largs_done
|
|
+ ldu r9, 8(r11)
|
|
+ bdz .Largs_done
|
|
+ ldu r10, 8(r11)
|
|
+
|
|
+.Largs_done:
|
|
+ li r2, 0 # Clear TOC to test |func|'s global entry point
|
|
+ mtctr r12
|
|
+ bctrl
|
|
+ ld r2, 24(r1) # Restore TOC
|
|
+
|
|
+ ld $state, 32(r1) # Reload |state|
|
|
+____
|
|
+# Output resulting registers to the caller.
|
|
+store_regs($state, 0);
|
|
+# Restore registers from the stack.
|
|
+load_regs("r1", 48);
|
|
+$code .= <<____;
|
|
+ mfcr r0
|
|
+ std r0, 480($state) # Output CR to caller
|
|
+ ld r0, 8(r1)
|
|
+ mtcrf 0b00111000, r0 # Restore CR2-CR4
|
|
+ addi r1, r1, 528
|
|
+ ld r0, 16(r1) # Restore LR
|
|
+ mtlr r0
|
|
+ blr
|
|
+.size abi_test_trampoline,.-abi_test_trampoline
|
|
+____
|
|
+
|
|
+# abi_test_clobber_* clobbers the corresponding register. These are used to test
|
|
+# the ABI-testing framework.
|
|
+foreach (0..31) {
|
|
+ # r1 is the stack pointer. r13 is the thread pointer.
|
|
+ next if ($_ == 1 || $_ == 13);
|
|
+ $code .= <<____;
|
|
+.globl abi_test_clobber_r$_
|
|
+.align 5
|
|
+abi_test_clobber_r$_:
|
|
+ li r$_, 0
|
|
+ blr
|
|
+.size abi_test_clobber_r$_,.-abi_test_clobber_r$_
|
|
+____
|
|
+}
|
|
+
|
|
+foreach (0..31) {
|
|
+ $code .= <<____;
|
|
+.globl abi_test_clobber_f$_
|
|
+.align 4
|
|
+abi_test_clobber_f$_:
|
|
+ li r0, 0
|
|
+ # Use the red zone.
|
|
+ std r0, -8(r1)
|
|
+ lfd f$_, -8(r1)
|
|
+ blr
|
|
+.size abi_test_clobber_f$_,.-abi_test_clobber_f$_
|
|
+____
|
|
+}
|
|
+
|
|
+foreach (0..31) {
|
|
+ $code .= <<____;
|
|
+.globl abi_test_clobber_v$_
|
|
+.align 4
|
|
+abi_test_clobber_v$_:
|
|
+ vxor v$_, v$_, v$_
|
|
+ blr
|
|
+.size abi_test_clobber_v$_,.-abi_test_clobber_v$_
|
|
+____
|
|
+}
|
|
+
|
|
+foreach (0..7) {
|
|
+ # PPC orders CR fields in big-endian, so the mask is reversed from what one
|
|
+ # would expect.
|
|
+ my $mask = 1 << (7 - $_);
|
|
+ $code .= <<____;
|
|
+.globl abi_test_clobber_cr$_
|
|
+.align 4
|
|
+abi_test_clobber_cr$_:
|
|
+ # Flip the bits on cr$_ rather than setting to zero. With a four-bit
|
|
+ # register, zeroing it will do nothing 1 in 16 times.
|
|
+ mfcr r0
|
|
+ not r0, r0
|
|
+ mtcrf $mask, r0
|
|
+ blr
|
|
+.size abi_test_clobber_cr$_,.-abi_test_clobber_cr$_
|
|
+____
|
|
+}
|
|
+
|
|
+$code .= <<____;
|
|
+.globl abi_test_clobber_ctr
|
|
+.align 4
|
|
+abi_test_clobber_ctr:
|
|
+ li r0, 0
|
|
+ mtctr r0
|
|
+ blr
|
|
+.size abi_test_clobber_ctr,.-abi_test_clobber_ctr
|
|
+
|
|
+.globl abi_test_clobber_lr
|
|
+.align 4
|
|
+abi_test_clobber_lr:
|
|
+ mflr r0
|
|
+ mtctr r0
|
|
+ li r0, 0
|
|
+ mtlr r0
|
|
+ bctr
|
|
+.size abi_test_clobber_lr,.-abi_test_clobber_lr
|
|
+
|
|
+____
|
|
+
|
|
+print $code;
|
|
+close STDOUT or die "error closing STDOUT: $!";
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/include/openssl/target.h
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/include/openssl/target.h
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/include/openssl/target.h
|
|
@@ -34,6 +34,9 @@
|
|
#elif defined(__ARMEL__) || defined(_M_ARM)
|
|
#define OPENSSL_32_BIT
|
|
#define OPENSSL_ARM
|
|
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && defined(_LITTLE_ENDIAN)
|
|
+#define OPENSSL_64_BIT
|
|
+#define OPENSSL_PPC64LE
|
|
#elif defined(__MIPSEL__) && !defined(__LP64__)
|
|
#define OPENSSL_32_BIT
|
|
#define OPENSSL_MIPS
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
|
|
@@ -37,6 +37,8 @@ int main(int argc, char **argv) {
|
|
puts("ARM (32-bit)");
|
|
#elif defined(OPENSSL_AARCH64)
|
|
puts("aarch64 (64-bit)");
|
|
+#elif defined(OPENSSL_PPC64LE)
|
|
+ puts("PPC64LE (64-bit)");
|
|
#else
|
|
#error "FIPS build not supported on this architecture"
|
|
#endif
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/delocate.go
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/util/fipstools/delocate/delocate.go
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/delocate.go
|
|
@@ -54,7 +54,8 @@ type stringWriter interface {
|
|
type processorType int
|
|
|
|
const (
|
|
- x86_64 processorType = iota + 1
|
|
+ ppc64le processorType = iota + 1
|
|
+ x86_64
|
|
aarch64
|
|
)
|
|
|
|
@@ -67,6 +68,8 @@ type delocation struct {
|
|
|
|
// symbols is the set of symbols defined in the module.
|
|
symbols map[string]struct{}
|
|
+ // localEntrySymbols is the set of symbols with .localentry directives.
|
|
+ localEntrySymbols map[string]struct{}
|
|
// redirectors maps from out-call symbol name to the name of a
|
|
// redirector function for that symbol. E.g. “memcpy” ->
|
|
// “bcm_redirector_memcpy”.
|
|
@@ -75,6 +78,9 @@ type delocation struct {
|
|
// should be used to reference it. E.g. “P384_data_storage” ->
|
|
// “P384_data_storage”.
|
|
bssAccessorsNeeded map[string]string
|
|
+ // tocLoaders is a set of symbol names for which TOC helper functions
|
|
+ // are required. (ppc64le only.)
|
|
+ tocLoaders map[string]struct{}
|
|
// gotExternalsNeeded is a set of symbol names for which we need
|
|
// “delta” symbols: symbols that contain the offset from their location
|
|
// to the memory in question.
|
|
@@ -151,6 +157,8 @@ func (d *delocation) processInput(input
|
|
switch d.processor {
|
|
case x86_64:
|
|
statement, err = d.processIntelInstruction(statement, node.up)
|
|
+ case ppc64le:
|
|
+ statement, err = d.processPPCInstruction(statement, node.up)
|
|
case aarch64:
|
|
statement, err = d.processAarch64Instruction(statement, node.up)
|
|
default:
|
|
@@ -247,7 +255,7 @@ func (d *delocation) processDirective(st
|
|
d.writeNode(statement)
|
|
break
|
|
|
|
- case ".debug", ".note":
|
|
+ case ".debug", ".note", ".toc":
|
|
d.writeNode(statement)
|
|
break
|
|
|
|
@@ -336,6 +344,10 @@ func (d *delocation) processLabelContain
|
|
d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
|
|
}
|
|
|
|
+ if name == ".localentry" {
|
|
+ d.output.WriteString(localEntryName(args[0]) + ":\n")
|
|
+ }
|
|
+
|
|
return statement, nil
|
|
}
|
|
|
|
@@ -659,6 +671,191 @@ func (d *delocation) processAarch64Instr
|
|
return statement, nil
|
|
}
|
|
|
|
+/* ppc64le
|
|
+
|
|
+[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st,
|
|
+ 2017
|
|
+
|
|
+(Also useful is “Power ISA Version 2.07 B”. Note that version three of that
|
|
+document is /not/ good as that's POWER9 specific.)
|
|
+
|
|
+ppc64le doesn't have IP-relative addressing and does a lot to work around this.
|
|
+Rather than reference a PLT and GOT direction, it has a single structure called
|
|
+the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
|
|
+.got, .plt, .bss, etc sections [PABI;3.3].
|
|
+
|
|
+A pointer to the TOC is maintained in r2 and the following pattern is used to
|
|
+load the address of an element into a register:
|
|
+
|
|
+ addis <address register>, 2, foo@toc@ha
|
|
+ addi <address register>, <address register>, foo@toc@l
|
|
+
|
|
+The “addis” instruction shifts a signed constant left 16 bits and adds the
|
|
+result to its second argument, saving the result in the first argument. The
|
|
+“addi” instruction does the same, but without shifting. Thus the “@toc@ha"
|
|
+suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means
|
|
+“the bottom 16 bits of the offset”. However, note that both values are signed,
|
|
+thus offsets in the top half of a 64KB chunk will have an @ha value that's one
|
|
+greater than expected and a negative @l value.
|
|
+
|
|
+The TOC is specific to a “module” (basically an executable or shared object).
|
|
+This means that there's not a single TOC in a process and that r2 needs to
|
|
+change as control moves between modules. Thus functions have two entry points:
|
|
+the “global” entry point and the “local” entry point. Jumps from within the
|
|
+same module can use the local entry while jumps from other modules must use the
|
|
+global entry. The global entry establishes the correct value of r2 before
|
|
+running the function and the local entry skips that code.
|
|
+
|
|
+The global entry point for a function is defined by its label. The local entry
|
|
+is a power-of-two number of bytes from the global entry, set by the
|
|
+“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset
|
|
+of 1 or 2 bytes is treated as an offset of zero.)
|
|
+
|
|
+In order to help the global entry code set r2 to point to the local TOC, r12 is
|
|
+set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
|
|
+the global entry will typically use an addis+addi pair to add a known offset to
|
|
+r12 and store it in r2. For example:
|
|
+
|
|
+foo:
|
|
+ addis 2, 12, .TOC. - foo@ha
|
|
+ addi 2, 2, .TOC. - foo@l
|
|
+
|
|
+(It's worth noting that the '@' operator binds very loosely, so the 3rd
|
|
+arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
|
|
+
|
|
+When calling a function, the compiler doesn't know whether that function is in
|
|
+the same module or not. Thus it doesn't know whether r12 needs to be set nor
|
|
+whether r2 will be clobbered on return. Rather than always assume the worst,
|
|
+the linker fixes stuff up once it knows that a call is going out of module:
|
|
+
|
|
+Firstly, calling, say, memcpy (which we assume to be in a different module)
|
|
+won't actually jump directly to memcpy, or even a PLT resolution function.
|
|
+It'll call a synthesised function that:
|
|
+ a) saves r2 in the caller's stack frame
|
|
+ b) loads the address of memcpy@PLT into r12
|
|
+ c) jumps to r12.
|
|
+
|
|
+As this synthesised function loads memcpy@PLT, a call to memcpy from the
|
|
+compiled code just references “memcpy” directly, not “memcpy@PLT”.
|
|
+
|
|
+Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
|
|
+calls must be followed by a nop. If the call ends up going out-of-module, the
|
|
+linker will rewrite that nop to load r2 from the stack.
|
|
+
|
|
+Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
|
|
+red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
|
|
+followed as called functions will write into their parent's stack frame. For
|
|
+example, the synthesised out-of-module trampolines will save r2 24 bytes into
|
|
+the caller's frame and all non-leaf functions save the return address 16 bytes
|
|
+into the caller's frame.
|
|
+
|
|
+A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
|
|
+result in zero and all writes are discarded. POWER does something a little like
|
|
+that, but r0 is only special in certain argument positions for certain
|
|
+instructions. You just have to read the manual to know which they are.
|
|
+
|
|
+
|
|
+Delocation is easier than Intel because there's just TOC references, but it's
|
|
+also harder because there's no IP-relative addressing.
|
|
+
|
|
+Jumps are IP-relative however, and have a 24-bit immediate value. So we can
|
|
+jump to functions that set a register to the needed value. (r3 is the
|
|
+return-value register and so that's what is generally used here.) */
|
|
+
|
|
+// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
|
|
+// source to relative and writing the result to target.
|
|
+func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
|
|
+ instruction := skipWS(statement.up).up
|
|
+ assertNodeType(instruction, ruleInstructionName)
|
|
+ name1 := d.contents(instruction)
|
|
+ args1 := instructionArgs(instruction.next)
|
|
+
|
|
+ statement = statement.next
|
|
+ instruction = skipWS(statement.up).up
|
|
+ assertNodeType(instruction, ruleInstructionName)
|
|
+ name2 := d.contents(instruction)
|
|
+ args2 := instructionArgs(instruction.next)
|
|
+
|
|
+ if name1 != "addis" ||
|
|
+ len(args1) != 3 ||
|
|
+ name2 != "addi" ||
|
|
+ len(args2) != 3 {
|
|
+ return "", "", "", false
|
|
+ }
|
|
+
|
|
+ target = d.contents(args1[0])
|
|
+ relative = d.contents(args1[1])
|
|
+ source1 := d.contents(args1[2])
|
|
+ source2 := d.contents(args2[2])
|
|
+
|
|
+ if !strings.HasSuffix(source1, "@ha") ||
|
|
+ !strings.HasSuffix(source2, "@l") ||
|
|
+ source1[:len(source1)-3] != source2[:len(source2)-2] ||
|
|
+ d.contents(args2[0]) != target ||
|
|
+ d.contents(args2[1]) != target {
|
|
+ return "", "", "", false
|
|
+ }
|
|
+
|
|
+ source = source1[:len(source1)-3]
|
|
+ ok = true
|
|
+ return
|
|
+}
|
|
+
|
|
+// establishTOC writes the global entry prelude for a function. The standard
|
|
+// prelude involves relocations so this version moves the relocation outside
|
|
+// the integrity-checked area.
|
|
+func establishTOC(w stringWriter) {
|
|
+ w.WriteString("999:\n")
|
|
+ w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
|
|
+ w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
|
|
+ w.WriteString("\tld 12, 0(2)\n")
|
|
+ w.WriteString("\tadd 2, 2, 12\n")
|
|
+}
|
|
+
|
|
+// loadTOCFuncName returns the name of a synthesized function that sets r3 to
|
|
+// the value of “symbol+offset”.
|
|
+func loadTOCFuncName(symbol, offset string) string {
|
|
+ symbol = strings.Replace(symbol, ".", "_dot_", -1)
|
|
+ ret := ".Lbcm_loadtoc_" + symbol
|
|
+ if len(offset) != 0 {
|
|
+ offset = strings.Replace(offset, "+", "_plus_", -1)
|
|
+ offset = strings.Replace(offset, "-", "_minus_", -1)
|
|
+ ret += "_" + offset
|
|
+ }
|
|
+ return ret
|
|
+}
|
|
+
|
|
+func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
|
|
+ d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
|
|
+
|
|
+ return func(k func()) {
|
|
+ w.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
|
|
+ w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
|
|
+ w.WriteString("\tstd " + dest + ", -8(1)\n")
|
|
+ // The TOC loader will use r3, so stash it if necessary.
|
|
+ if dest != "3" {
|
|
+ w.WriteString("\tstd 3, -16(1)\n")
|
|
+ }
|
|
+
|
|
+ // Because loadTOCFuncName returns a “.L” name, we don't need a
|
|
+ // nop after this call.
|
|
+ w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
|
|
+
|
|
+ // Cycle registers around. We need r3 -> destReg, -8(1) ->
|
|
+ // lr and, optionally, -16(1) -> r3.
|
|
+ w.WriteString("\tstd 3, -24(1)\n")
|
|
+ w.WriteString("\tld 3, -8(1)\n")
|
|
+ w.WriteString("\tmtlr 3\n")
|
|
+ w.WriteString("\tld " + dest + ", -24(1)\n")
|
|
+ if dest != "3" {
|
|
+ w.WriteString("\tld 3, -16(1)\n")
|
|
+ }
|
|
+ w.WriteString("\taddi 1, 1, 288\n")
|
|
+
|
|
+ k()
|
|
+ }
|
|
+}
|
|
+
|
|
func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
|
|
for symRef != nil && symRef.pegRule == ruleOffset {
|
|
offset := d.contents(symRef)
|
|
@@ -713,6 +910,215 @@ func (d *delocation) parseMemRef(memRef
|
|
return
|
|
}
|
|
|
|
+func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
|
|
+ assertNodeType(instruction, ruleInstructionName)
|
|
+ instructionName := d.contents(instruction)
|
|
+ isBranch := instructionName[0] == 'b'
|
|
+
|
|
+ argNodes := instructionArgs(instruction.next)
|
|
+
|
|
+ var wrappers wrapperStack
|
|
+ var args []string
|
|
+ changed := false
|
|
+
|
|
+Args:
|
|
+ for i, arg := range argNodes {
|
|
+ fullArg := arg
|
|
+ isIndirect := false
|
|
+
|
|
+ if arg.pegRule == ruleIndirectionIndicator {
|
|
+ arg = arg.next
|
|
+ isIndirect = true
|
|
+ }
|
|
+
|
|
+ switch arg.pegRule {
|
|
+ case ruleRegisterOrConstant, ruleLocalLabelRef:
|
|
+ args = append(args, d.contents(fullArg))
|
|
+
|
|
+ case ruleTOCRefLow:
|
|
+ return nil, errors.New("Found low TOC reference outside preamble pattern")
|
|
+
|
|
+ case ruleTOCRefHigh:
|
|
+ target, _, relative, ok := d.isPPC64LEAPair(statement)
|
|
+ if !ok {
|
|
+ return nil, errors.New("Found high TOC reference outside preamble pattern")
|
|
+ }
|
|
+
|
|
+ if relative != "12" {
|
|
+ return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
|
|
+ }
|
|
+
|
|
+ if target != "2" {
|
|
+ return nil, fmt.Errorf("preamble is setting %q, not r2", target)
|
|
+ }
|
|
+
|
|
+ statement = statement.next
|
|
+ establishTOC(d.output)
|
|
+ instructionName = ""
|
|
+ changed = true
|
|
+ break Args
|
|
+
|
|
+ case ruleMemoryRef:
|
|
+ symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
|
|
+ changed = didChange
|
|
+
|
|
+ if len(symbol) > 0 {
|
|
+ if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
|
|
+ symbol = localEntryName(symbol)
|
|
+ changed = true
|
|
+ } else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
|
|
+ symbol = localTargetName(symbol)
|
|
+ changed = true
|
|
+ } else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
|
|
+ changed = true
|
|
+ d.redirectors[symbol] = redirectorName(symbol)
|
|
+ symbol = redirectorName(symbol)
|
|
+ // TODO(davidben): This should sanity-check the next
|
|
+ // instruction is a nop and ideally remove it.
|
|
+ wrappers = append(wrappers, func(k func()) {
|
|
+ k()
|
|
+ // Like the linker's PLT stubs, redirector functions
|
|
+ // expect callers to restore r2.
|
|
+ d.output.WriteString("\tld 2, 24(1)\n")
|
|
+ })
|
|
+ }
|
|
+ }
|
|
+
|
|
+ switch section {
|
|
+ case "":
|
|
+
|
|
+ case "tls":
|
|
+ // This section identifier just tells the
|
|
+ // assembler to use r13, the pointer to the
|
|
+ // thread-local data [PABI;3.7.3.3].
|
|
+
|
|
+ case "toc@ha":
|
|
+ // Delete toc@ha instructions. Per
|
|
+ // [PABI;3.6.3], the linker is allowed to erase
|
|
+ // toc@ha instructions. We take advantage of
|
|
+ // this by unconditionally erasing the toc@ha
|
|
+ // instructions and doing the full lookup when
|
|
+ // processing toc@l.
|
|
+ //
|
|
+ // Note that any offset here applies before @ha
|
|
+ // and @l. That is, 42+foo@toc@ha is
|
|
+ // #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
|
|
+ // corresponding toc@l references are required
|
|
+ // by the ABI to have the same offset. The
|
|
+ // offset will be incorporated in full when
|
|
+ // those are processed.
|
|
+ if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
|
|
+ return nil, errors.New("can't process toc@ha reference")
|
|
+ }
|
|
+ changed = true
|
|
+ instructionName = ""
|
|
+ break Args
|
|
+
|
|
+ case "toc@l":
|
|
+ // Per [PAB;3.6.3], this instruction must take
|
|
+ // as input a register which was the output of
|
|
+ // a toc@ha computation and compute the actual
|
|
+ // address of some symbol. The toc@ha
|
|
+ // computation was elided, so we ignore that
|
|
+ // input register and compute the address
|
|
+ // directly.
|
|
+ changed = true
|
|
+
|
|
+ // For all supported toc@l instructions, the
|
|
+ // destination register is the first argument.
|
|
+ destReg := args[0]
|
|
+
|
|
+ wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
|
|
+ switch instructionName {
|
|
+ case "addi":
|
|
+ // The original instruction was:
|
|
+ // addi destReg, tocHaReg, offset+symbol@toc@l
|
|
+ instructionName = ""
|
|
+
|
|
+ case "ld", "lhz", "lwz":
|
|
+ // The original instruction was:
|
|
+ // l?? destReg, offset+symbol@toc@l(tocHaReg)
|
|
+ //
|
|
+ // We transform that into the
|
|
+ // equivalent dereference of destReg:
|
|
+ // l?? destReg, 0(destReg)
|
|
+ origInstructionName := instructionName
|
|
+ instructionName = ""
|
|
+
|
|
+ assertNodeType(memRef, ruleBaseIndexScale)
|
|
+ assertNodeType(memRef.up, ruleRegisterOrConstant)
|
|
+ if memRef.next != nil || memRef.up.next != nil {
|
|
+ return nil, errors.New("expected single register in BaseIndexScale for ld argument")
|
|
+ }
|
|
+
|
|
+ baseReg := destReg
|
|
+ if baseReg == "0" {
|
|
+ // Register zero is special as the base register for a load.
|
|
+ // Avoid it by spilling and using r3 instead.
|
|
+ baseReg = "3"
|
|
+ wrappers = append(wrappers, func(k func()) {
|
|
+ d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
|
|
+ d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
|
|
+ d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
|
|
+ k()
|
|
+ d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
|
|
+ d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
|
|
+ })
|
|
+ }
|
|
+
|
|
+ wrappers = append(wrappers, func(k func()) {
|
|
+ d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
|
|
+ })
|
|
+ default:
|
|
+ return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
|
|
+ }
|
|
+
|
|
+ default:
|
|
+ return nil, fmt.Errorf("Unknown section type %q", section)
|
|
+ }
|
|
+
|
|
+ argStr := ""
|
|
+ if isIndirect {
|
|
+ argStr += "*"
|
|
+ }
|
|
+ argStr += symbol
|
|
+ if len(offset) > 0 {
|
|
+ argStr += offset
|
|
+ }
|
|
+ if len(section) > 0 {
|
|
+ argStr += "@"
|
|
+ argStr += section
|
|
+ }
|
|
+
|
|
+ for ; memRef != nil; memRef = memRef.next {
|
|
+ argStr += d.contents(memRef)
|
|
+ }
|
|
+
|
|
+ args = append(args, argStr)
|
|
+
|
|
+ default:
|
|
+ panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if changed {
|
|
+ d.writeCommentedNode(statement)
|
|
+
|
|
+ var replacement string
|
|
+ if len(instructionName) > 0 {
|
|
+ replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
|
|
+ }
|
|
+
|
|
+ wrappers.do(func() {
|
|
+ d.output.WriteString(replacement)
|
|
+ })
|
|
+ } else {
|
|
+ d.writeNode(statement)
|
|
+ }
|
|
+
|
|
+ return statement, nil
|
|
+}
|
|
+
|
|
/* Intel */
|
|
|
|
type instructionType int
|
|
@@ -1345,6 +1751,8 @@ func writeAarch64Function(w stringWriter
|
|
func transform(w stringWriter, inputs []inputFile) error {
|
|
// symbols contains all defined symbols.
|
|
symbols := make(map[string]struct{})
|
|
+ // localEntrySymbols contains all symbols with a .localentry directive.
|
|
+ localEntrySymbols := make(map[string]struct{})
|
|
// fileNumbers is the set of IDs seen in .file directives.
|
|
fileNumbers := make(map[int]struct{})
|
|
// maxObservedFileNumber contains the largest seen file number in a
|
|
@@ -1368,6 +1776,25 @@ func transform(w stringWriter, inputs []
|
|
}, ruleStatement, ruleLabel, ruleSymbolName)
|
|
|
|
forEachPath(input.ast.up, func(node *node32) {
|
|
+ node = node.up
|
|
+ assertNodeType(node, ruleLabelContainingDirectiveName)
|
|
+ directive := input.contents[node.begin:node.end]
|
|
+ if directive != ".localentry" {
|
|
+ return
|
|
+ }
|
|
+ // Extract the first argument.
|
|
+ node = skipWS(node.next)
|
|
+ assertNodeType(node, ruleSymbolArgs)
|
|
+ node = node.up
|
|
+ assertNodeType(node, ruleSymbolArg)
|
|
+ symbol := input.contents[node.begin:node.end]
|
|
+ if _, ok := localEntrySymbols[symbol]; ok {
|
|
+ panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
|
|
+ }
|
|
+ localEntrySymbols[symbol] = struct{}{}
|
|
+ }, ruleStatement, ruleLabelContainingDirective)
|
|
+
|
|
+ forEachPath(input.ast.up, func(node *node32) {
|
|
assertNodeType(node, ruleLocationDirective)
|
|
directive := input.contents[node.begin:node.end]
|
|
if !strings.HasPrefix(directive, ".file") {
|
|
@@ -1415,11 +1842,13 @@ func transform(w stringWriter, inputs []
|
|
|
|
d := &delocation{
|
|
symbols: symbols,
|
|
+ localEntrySymbols: localEntrySymbols,
|
|
processor: processor,
|
|
commentIndicator: commentIndicator,
|
|
output: w,
|
|
redirectors: make(map[string]string),
|
|
bssAccessorsNeeded: make(map[string]string),
|
|
+ tocLoaders: make(map[string]struct{}),
|
|
gotExternalsNeeded: make(map[string]struct{}),
|
|
gotOffsetsNeeded: make(map[string]struct{}),
|
|
gotOffOffsetsNeeded: make(map[string]struct{}),
|
|
@@ -1454,6 +1883,22 @@ func transform(w stringWriter, inputs []
|
|
for _, name := range redirectorNames {
|
|
redirector := d.redirectors[name]
|
|
switch d.processor {
|
|
+ case ppc64le:
|
|
+ w.WriteString(".section \".toc\", \"aw\"\n")
|
|
+ w.WriteString(".Lredirector_toc_" + name + ":\n")
|
|
+ w.WriteString(".quad " + name + "\n")
|
|
+ w.WriteString(".text\n")
|
|
+ w.WriteString(".type " + redirector + ", @function\n")
|
|
+ w.WriteString(redirector + ":\n")
|
|
+ // |name| will clobber r2, so save it. This is matched by a restore in
|
|
+ // redirector calls.
|
|
+ w.WriteString("\tstd 2, 24(1)\n")
|
|
+ // Load and call |name|'s global entry point.
|
|
+ w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
|
|
+ w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
|
|
+ w.WriteString("\tmtctr 12\n")
|
|
+ w.WriteString("\tbctr\n")
|
|
+
|
|
case aarch64:
|
|
writeAarch64Function(w, redirector, func(w stringWriter) {
|
|
w.WriteString("\tb " + name + "\n")
|
|
@@ -1478,6 +1923,13 @@ func transform(w stringWriter, inputs []
|
|
target := d.bssAccessorsNeeded[name]
|
|
|
|
switch d.processor {
|
|
+ case ppc64le:
|
|
+ w.WriteString(".type " + funcName + ", @function\n")
|
|
+ w.WriteString(funcName + ":\n")
|
|
+ w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
|
|
+ w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
|
|
+ w.WriteString("\tblr\n")
|
|
+
|
|
case x86_64:
|
|
w.WriteString(".type " + funcName + ", @function\n")
|
|
w.WriteString(funcName + ":\n")
|
|
@@ -1493,6 +1945,26 @@ func transform(w stringWriter, inputs []
|
|
}
|
|
|
|
switch d.processor {
|
|
+ case ppc64le:
|
|
+ loadTOCNames := sortedSet(d.tocLoaders)
|
|
+ for _, symbolAndOffset := range loadTOCNames {
|
|
+ parts := strings.SplitN(symbolAndOffset, "\x00", 2)
|
|
+ symbol, offset := parts[0], parts[1]
|
|
+
|
|
+ funcName := loadTOCFuncName(symbol, offset)
|
|
+ ref := symbol + offset
|
|
+
|
|
+ w.WriteString(".type " + funcName[2:] + ", @function\n")
|
|
+ w.WriteString(funcName[2:] + ":\n")
|
|
+ w.WriteString(funcName + ":\n")
|
|
+ w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
|
|
+ w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
|
|
+ w.WriteString("\tblr\n")
|
|
+ }
|
|
+
|
|
+ w.WriteString(".LBORINGSSL_external_toc:\n")
|
|
+ w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
|
|
+
|
|
case aarch64:
|
|
externalNames := sortedSet(d.gotExternalsNeeded)
|
|
for _, symbol := range externalNames {
|
|
@@ -1803,6 +2275,10 @@ func localTargetName(name string) string
|
|
return ".L" + name + "_local_target"
|
|
}
|
|
|
|
+func localEntryName(name string) string {
|
|
+ return ".L" + name + "_local_entry"
|
|
+}
|
|
+
|
|
func isSynthesized(symbol string) bool {
|
|
return strings.HasSuffix(symbol, "_bss_get") ||
|
|
symbol == "OPENSSL_ia32cap_get" ||
|
|
@@ -1858,6 +2334,8 @@ func detectProcessor(input inputFile) pr
|
|
switch instructionName {
|
|
case "movq", "call", "leaq":
|
|
return x86_64
|
|
+ case "addis", "addi", "mflr":
|
|
+ return ppc64le
|
|
case "str", "bl", "ldr", "st1":
|
|
return aarch64
|
|
}
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
|
|
@@ -12,7 +12,7 @@
|
|
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
|
|
|
-# This is a rough parser for x86-64 and aarch64 assembly designed to work with
|
|
+# This is a rough parser for x86-64 and ppc64le assembly designed to work with
|
|
# https://github.com/pointlander/peg. delocate.go has a go:generate line for
|
|
# rebuilding delocate.peg.go from this file.
|
|
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
|
|
@@ -39,6 +39,11 @@ func (test *delocateTest) Path(file stri
|
|
|
|
var delocateTests = []delocateTest{
|
|
{"generic-FileDirectives", []string{"in.s"}, "out.s"},
|
|
+ {"ppc64le-GlobalEntry", []string{"in.s"}, "out.s"},
|
|
+ {"ppc64le-LoadToR0", []string{"in.s"}, "out.s"},
|
|
+ {"ppc64le-Sample2", []string{"in.s"}, "out.s"},
|
|
+ {"ppc64le-Sample", []string{"in.s"}, "out.s"},
|
|
+ {"ppc64le-TOCWithOffset", []string{"in.s"}, "out.s"},
|
|
{"x86_64-Basic", []string{"in.s"}, "out.s"},
|
|
{"x86_64-BSS", []string{"in.s"}, "out.s"},
|
|
{"x86_64-GOTRewrite", []string{"in.s"}, "out.s"},
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/in.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/in.s
|
|
@@ -0,0 +1,9 @@
|
|
+ .text
|
|
+foo:
|
|
+.LCF0:
|
|
+0:
|
|
+ addis 2,12,.TOC.-.LCF0@ha
|
|
+ addi 2,2,.TOC.-.LCF0@l
|
|
+ .localentry foo,.-foo
|
|
+.LVL0:
|
|
+ bl
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/out.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/out.s
|
|
@@ -0,0 +1,62 @@
|
|
+.text
|
|
+.file 1 "inserted_by_delocate.c"
|
|
+.loc 1 1 0
|
|
+BORINGSSL_bcm_text_start:
|
|
+ .text
|
|
+.Lfoo_local_target:
|
|
+foo:
|
|
+.LCF0:
|
|
+
|
|
+0:
|
|
+
|
|
+999:
|
|
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
|
|
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
|
|
+ ld 12, 0(2)
|
|
+ add 2, 2, 12
|
|
+# WAS addi 2,2,.TOC.-.LCF0@l
|
|
+ .localentry foo,.-foo
|
|
+.Lfoo_local_entry:
|
|
+.LVL0:
|
|
+
|
|
+ bl
|
|
+.text
|
|
+.loc 1 2 0
|
|
+BORINGSSL_bcm_text_end:
|
|
+.LBORINGSSL_external_toc:
|
|
+.quad .TOC.-.LBORINGSSL_external_toc
|
|
+.type BORINGSSL_bcm_text_hash, @object
|
|
+.size BORINGSSL_bcm_text_hash, 32
|
|
+BORINGSSL_bcm_text_hash:
|
|
+.byte 0xae
|
|
+.byte 0x2c
|
|
+.byte 0xea
|
|
+.byte 0x2a
|
|
+.byte 0xbd
|
|
+.byte 0xa6
|
|
+.byte 0xf3
|
|
+.byte 0xec
|
|
+.byte 0x97
|
|
+.byte 0x7f
|
|
+.byte 0x9b
|
|
+.byte 0xf6
|
|
+.byte 0x94
|
|
+.byte 0x9a
|
|
+.byte 0xfc
|
|
+.byte 0x83
|
|
+.byte 0x68
|
|
+.byte 0x27
|
|
+.byte 0xcb
|
|
+.byte 0xa0
|
|
+.byte 0xa0
|
|
+.byte 0x9f
|
|
+.byte 0x6b
|
|
+.byte 0x6f
|
|
+.byte 0xde
|
|
+.byte 0x52
|
|
+.byte 0xcd
|
|
+.byte 0xe2
|
|
+.byte 0xcd
|
|
+.byte 0xff
|
|
+.byte 0x31
|
|
+.byte 0x80
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/in.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/in.s
|
|
@@ -0,0 +1,4 @@
|
|
+ .text
|
|
+foo:
|
|
+ addis 22,2,bar@toc@ha
|
|
+ ld 0,bar@toc@l(22)
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/out.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/out.s
|
|
@@ -0,0 +1,72 @@
|
|
+.text
|
|
+.file 1 "inserted_by_delocate.c"
|
|
+.loc 1 1 0
|
|
+BORINGSSL_bcm_text_start:
|
|
+ .text
|
|
+.Lfoo_local_target:
|
|
+foo:
|
|
+# WAS addis 22,2,bar@toc@ha
|
|
+# WAS ld 0,bar@toc@l(22)
|
|
+ addi 1, 1, -288
|
|
+ mflr 0
|
|
+ std 0, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc_bar
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 0, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ addi 1, 1, -288
|
|
+ std 3, -8(1)
|
|
+ mr 3, 0
|
|
+ ld 0, 0(3)
|
|
+ ld 3, -8(1)
|
|
+ addi 1, 1, 288
|
|
+.text
|
|
+.loc 1 2 0
|
|
+BORINGSSL_bcm_text_end:
|
|
+.type bcm_loadtoc_bar, @function
|
|
+bcm_loadtoc_bar:
|
|
+.Lbcm_loadtoc_bar:
|
|
+ addis 3, 2, bar@toc@ha
|
|
+ addi 3, 3, bar@toc@l
|
|
+ blr
|
|
+.LBORINGSSL_external_toc:
|
|
+.quad .TOC.-.LBORINGSSL_external_toc
|
|
+.type BORINGSSL_bcm_text_hash, @object
|
|
+.size BORINGSSL_bcm_text_hash, 32
|
|
+BORINGSSL_bcm_text_hash:
|
|
+.byte 0xae
|
|
+.byte 0x2c
|
|
+.byte 0xea
|
|
+.byte 0x2a
|
|
+.byte 0xbd
|
|
+.byte 0xa6
|
|
+.byte 0xf3
|
|
+.byte 0xec
|
|
+.byte 0x97
|
|
+.byte 0x7f
|
|
+.byte 0x9b
|
|
+.byte 0xf6
|
|
+.byte 0x94
|
|
+.byte 0x9a
|
|
+.byte 0xfc
|
|
+.byte 0x83
|
|
+.byte 0x68
|
|
+.byte 0x27
|
|
+.byte 0xcb
|
|
+.byte 0xa0
|
|
+.byte 0xa0
|
|
+.byte 0x9f
|
|
+.byte 0x6b
|
|
+.byte 0x6f
|
|
+.byte 0xde
|
|
+.byte 0x52
|
|
+.byte 0xcd
|
|
+.byte 0xe2
|
|
+.byte 0xcd
|
|
+.byte 0xff
|
|
+.byte 0x31
|
|
+.byte 0x80
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/in.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/in.s
|
|
@@ -0,0 +1,161 @@
|
|
+ .file "foo.c"
|
|
+ .abiversion 2
|
|
+ .section ".toc","aw"
|
|
+ .section ".text"
|
|
+ .section .rodata
|
|
+ .align 3
|
|
+ .type kString, @object
|
|
+ .size kString, 12
|
|
+kString:
|
|
+ .string "hello world"
|
|
+ .globl kExportedString
|
|
+ .align 3
|
|
+ .type kExportedString, @object
|
|
+ .size kExportedString, 26
|
|
+kExportedString:
|
|
+ .string "hello world, more visibly"
|
|
+ .align 2
|
|
+ .type kGiantArray, @object
|
|
+ .size kGiantArray, 400000
|
|
+kGiantArray:
|
|
+ .long 1
|
|
+ .long 0
|
|
+ .zero 399992
|
|
+ .lcomm bss,20,4
|
|
+ .type bss, @object
|
|
+ .align 3
|
|
+.LC1:
|
|
+ .string "kString is %p\n"
|
|
+ .align 3
|
|
+.LC2:
|
|
+ .string "kExportedString is %p\n"
|
|
+ .align 3
|
|
+.LC4:
|
|
+ .string "function is %p\n"
|
|
+ .align 3
|
|
+.LC5:
|
|
+ .string "exported_function is %p\n"
|
|
+ .align 3
|
|
+.LC7:
|
|
+ .string "&kString[5] is %p\n"
|
|
+ .align 3
|
|
+.LC9:
|
|
+ .string "&kGiantArray[0x12345] is %p\n"
|
|
+ .section ".toc","aw"
|
|
+.LC0:
|
|
+ .quad stderr
|
|
+.LC3:
|
|
+ .quad kExportedString
|
|
+.LC6:
|
|
+ .quad exported_function
|
|
+.LC8:
|
|
+ .quad kString+5
|
|
+.LC10:
|
|
+ .quad kGiantArray+298260
|
|
+ .section ".text"
|
|
+ .align 2
|
|
+ .type function, @function
|
|
+function:
|
|
+0: addis 2,12,.TOC.-0b@ha
|
|
+ addi 2,2,.TOC.-0b@l
|
|
+ .localentry function,.-function
|
|
+ mflr 0
|
|
+ std 0,16(1)
|
|
+ std 31,-8(1)
|
|
+ stdu 1,-112(1)
|
|
+ mr 31,1
|
|
+ addis 10,2,.LC0@toc@ha
|
|
+ ld 9,.LC0@toc@l(10)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+ addis 4,2,.LC1@toc@ha
|
|
+ addi 4,4,.LC1@toc@l
|
|
+ addis 5,2,kString@toc@ha
|
|
+ addi 5,5,kString@toc@l
|
|
+ bl fprintf
|
|
+ nop
|
|
+ addis 10,2,.LC0@toc@ha
|
|
+ ld 9,.LC0@toc@l(10)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+ addis 4,2,.LC2@toc@ha
|
|
+ addi 4,4,.LC2@toc@l
|
|
+ addis 9,2,.LC3@toc@ha
|
|
+ ld 5,.LC3@toc@l(9)
|
|
+ bl fprintf
|
|
+ nop
|
|
+ addis 10,2,.LC0@toc@ha
|
|
+ ld 9,.LC0@toc@l(10)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+ addis 4,2,.LC4@toc@ha
|
|
+ addi 4,4,.LC4@toc@l
|
|
+ addis 5,2,function@toc@ha
|
|
+ addi 5,5,function@toc@l
|
|
+ bl fprintf
|
|
+ nop
|
|
+ addis 10,2,.LC0@toc@ha
|
|
+ ld 9,.LC0@toc@l(10)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+ addis 4,2,.LC5@toc@ha
|
|
+ addi 4,4,.LC5@toc@l
|
|
+ addis 9,2,.LC6@toc@ha
|
|
+ ld 5,.LC6@toc@l(9)
|
|
+ bl fprintf
|
|
+ nop
|
|
+ addis 10,2,.LC0@toc@ha
|
|
+ ld 9,.LC0@toc@l(10)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+ addis 4,2,.LC7@toc@ha
|
|
+ addi 4,4,.LC7@toc@l
|
|
+ addis 9,2,.LC8@toc@ha
|
|
+ ld 5,.LC8@toc@l(9)
|
|
+ bl fprintf
|
|
+ nop
|
|
+ addis 10,2,.LC0@toc@ha
|
|
+ ld 9,.LC0@toc@l(10)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+ addis 4,2,.LC9@toc@ha
|
|
+ addi 4,4,.LC9@toc@l
|
|
+ addis 9,2,.LC10@toc@ha
|
|
+ ld 5,.LC10@toc@l(9)
|
|
+ bl fprintf
|
|
+ nop
|
|
+ bl exported_function
|
|
+ nop
|
|
+ mr 3,9
|
|
+ addi 1,31,112
|
|
+ ld 0,16(1)
|
|
+ mtlr 0
|
|
+ ld 31,-8(1)
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,0,0,1,128,1,0,1
|
|
+ .size function,.-function
|
|
+ .align 2
|
|
+ .globl exported_function
|
|
+ .type exported_function, @function
|
|
+exported_function:
|
|
+0: addis 2,12,.TOC.-0b@ha
|
|
+ addi 2,2,.TOC.-0b@l
|
|
+ .localentry exported_function,.-exported_function
|
|
+ mflr 0
|
|
+ std 0,16(1)
|
|
+ std 31,-8(1)
|
|
+ stdu 1,-48(1)
|
|
+ mr 31,1
|
|
+ bl function
|
|
+ mr 3,9
|
|
+ addi 1,31,48
|
|
+ ld 0,16(1)
|
|
+ mtlr 0
|
|
+ ld 31,-8(1)
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,0,0,1,128,1,0,1
|
|
+ .size exported_function,.-exported_function
|
|
+ .ident "GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
|
|
+ .section .note.GNU-stack,"",@progbits
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/out.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/out.s
|
|
@@ -0,0 +1,552 @@
|
|
+.text
|
|
+.file 1 "inserted_by_delocate.c"
|
|
+.loc 1 1 0
|
|
+BORINGSSL_bcm_text_start:
|
|
+ .file "foo.c"
|
|
+ .abiversion 2
|
|
+ .section ".toc","aw"
|
|
+# WAS .section ".text"
|
|
+.text
|
|
+# WAS .section .rodata
|
|
+.text
|
|
+ .align 3
|
|
+ .type kString, @object
|
|
+ .size kString, 12
|
|
+.LkString_local_target:
|
|
+kString:
|
|
+ .string "hello world"
|
|
+ .globl kExportedString
|
|
+ .align 3
|
|
+ .type kExportedString, @object
|
|
+ .size kExportedString, 26
|
|
+.LkExportedString_local_target:
|
|
+kExportedString:
|
|
+ .string "hello world, more visibly"
|
|
+ .align 2
|
|
+ .type kGiantArray, @object
|
|
+ .size kGiantArray, 400000
|
|
+.LkGiantArray_local_target:
|
|
+kGiantArray:
|
|
+ .long 1
|
|
+ .long 0
|
|
+ .zero 399992
|
|
+ .lcomm bss,20,4
|
|
+ .type bss, @object
|
|
+ .align 3
|
|
+.LC1:
|
|
+
|
|
+ .string "kString is %p\n"
|
|
+ .align 3
|
|
+.LC2:
|
|
+
|
|
+ .string "kExportedString is %p\n"
|
|
+ .align 3
|
|
+.LC4:
|
|
+
|
|
+ .string "function is %p\n"
|
|
+ .align 3
|
|
+.LC5:
|
|
+
|
|
+ .string "exported_function is %p\n"
|
|
+ .align 3
|
|
+.LC7:
|
|
+
|
|
+ .string "&kString[5] is %p\n"
|
|
+ .align 3
|
|
+.LC9:
|
|
+
|
|
+ .string "&kGiantArray[0x12345] is %p\n"
|
|
+ .section ".toc","aw"
|
|
+.LC0:
|
|
+
|
|
+ .quad stderr
|
|
+.LC3:
|
|
+
|
|
+ .quad kExportedString
|
|
+.LC6:
|
|
+
|
|
+ .quad exported_function
|
|
+.LC8:
|
|
+
|
|
+ .quad kString+5
|
|
+.LC10:
|
|
+
|
|
+ .quad kGiantArray+298260
|
|
+# WAS .section ".text"
|
|
+.text
|
|
+ .align 2
|
|
+ .type function, @function
|
|
+.Lfunction_local_target:
|
|
+function:
|
|
+0:
|
|
+999:
|
|
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
|
|
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
|
|
+ ld 12, 0(2)
|
|
+ add 2, 2, 12
|
|
+# WAS addi 2,2,.TOC.-0b@l
|
|
+ .localentry function,.-function
|
|
+.Lfunction_local_entry:
|
|
+ mflr 0
|
|
+ std 0,16(1)
|
|
+ std 31,-8(1)
|
|
+ stdu 1,-112(1)
|
|
+ mr 31,1
|
|
+# WAS addis 10,2,.LC0@toc@ha
|
|
+# WAS ld 9,.LC0@toc@l(10)
|
|
+ addi 1, 1, -288
|
|
+ mflr 9
|
|
+ std 9, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 9, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 9, 0(9)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+# WAS addis 4,2,.LC1@toc@ha
|
|
+# WAS addi 4,4,.LC1@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC1
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addis 5,2,kString@toc@ha
|
|
+# WAS addi 5,5,kString@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LkString_local_target
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS bl fprintf
|
|
+ bl bcm_redirector_fprintf
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS addis 10,2,.LC0@toc@ha
|
|
+# WAS ld 9,.LC0@toc@l(10)
|
|
+ addi 1, 1, -288
|
|
+ mflr 9
|
|
+ std 9, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 9, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 9, 0(9)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+# WAS addis 4,2,.LC2@toc@ha
|
|
+# WAS addi 4,4,.LC2@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC2
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addis 9,2,.LC3@toc@ha
|
|
+# WAS ld 5,.LC3@toc@l(9)
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC3
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 5, 0(5)
|
|
+# WAS bl fprintf
|
|
+ bl bcm_redirector_fprintf
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS addis 10,2,.LC0@toc@ha
|
|
+# WAS ld 9,.LC0@toc@l(10)
|
|
+ addi 1, 1, -288
|
|
+ mflr 9
|
|
+ std 9, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 9, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 9, 0(9)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+# WAS addis 4,2,.LC4@toc@ha
|
|
+# WAS addi 4,4,.LC4@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC4
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addis 5,2,function@toc@ha
|
|
+# WAS addi 5,5,function@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfunction_local_target
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS bl fprintf
|
|
+ bl bcm_redirector_fprintf
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS addis 10,2,.LC0@toc@ha
|
|
+# WAS ld 9,.LC0@toc@l(10)
|
|
+ addi 1, 1, -288
|
|
+ mflr 9
|
|
+ std 9, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 9, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 9, 0(9)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+# WAS addis 4,2,.LC5@toc@ha
|
|
+# WAS addi 4,4,.LC5@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC5
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addis 9,2,.LC6@toc@ha
|
|
+# WAS ld 5,.LC6@toc@l(9)
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC6
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 5, 0(5)
|
|
+# WAS bl fprintf
|
|
+ bl bcm_redirector_fprintf
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS addis 10,2,.LC0@toc@ha
|
|
+# WAS ld 9,.LC0@toc@l(10)
|
|
+ addi 1, 1, -288
|
|
+ mflr 9
|
|
+ std 9, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 9, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 9, 0(9)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+# WAS addis 4,2,.LC7@toc@ha
|
|
+# WAS addi 4,4,.LC7@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC7
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addis 9,2,.LC8@toc@ha
|
|
+# WAS ld 5,.LC8@toc@l(9)
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC8
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 5, 0(5)
|
|
+# WAS bl fprintf
|
|
+ bl bcm_redirector_fprintf
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS addis 10,2,.LC0@toc@ha
|
|
+# WAS ld 9,.LC0@toc@l(10)
|
|
+ addi 1, 1, -288
|
|
+ mflr 9
|
|
+ std 9, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 9, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 9, 0(9)
|
|
+ ld 9,0(9)
|
|
+ mr 3,9
|
|
+# WAS addis 4,2,.LC9@toc@ha
|
|
+# WAS addi 4,4,.LC9@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC9
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addis 9,2,.LC10@toc@ha
|
|
+# WAS ld 5,.LC10@toc@l(9)
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC10
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 5, 0(5)
|
|
+# WAS bl fprintf
|
|
+ bl bcm_redirector_fprintf
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS bl exported_function
|
|
+ bl .Lexported_function_local_entry
|
|
+ nop
|
|
+ mr 3,9
|
|
+ addi 1,31,112
|
|
+ ld 0,16(1)
|
|
+ mtlr 0
|
|
+ ld 31,-8(1)
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,0,0,1,128,1,0,1
|
|
+ .size function,.-function
|
|
+ .align 2
|
|
+ .globl exported_function
|
|
+ .type exported_function, @function
|
|
+.Lexported_function_local_target:
|
|
+exported_function:
|
|
+0:
|
|
+999:
|
|
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
|
|
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
|
|
+ ld 12, 0(2)
|
|
+ add 2, 2, 12
|
|
+# WAS addi 2,2,.TOC.-0b@l
|
|
+ .localentry exported_function,.-exported_function
|
|
+.Lexported_function_local_entry:
|
|
+ mflr 0
|
|
+ std 0,16(1)
|
|
+ std 31,-8(1)
|
|
+ stdu 1,-48(1)
|
|
+ mr 31,1
|
|
+# WAS bl function
|
|
+ bl .Lfunction_local_entry
|
|
+ mr 3,9
|
|
+ addi 1,31,48
|
|
+ ld 0,16(1)
|
|
+ mtlr 0
|
|
+ ld 31,-8(1)
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,0,0,1,128,1,0,1
|
|
+ .size exported_function,.-exported_function
|
|
+ .ident "GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
|
|
+ .section .note.GNU-stack,"",@progbits
|
|
+.text
|
|
+.loc 1 2 0
|
|
+BORINGSSL_bcm_text_end:
|
|
+.section ".toc", "aw"
|
|
+.Lredirector_toc_fprintf:
|
|
+.quad fprintf
|
|
+.text
|
|
+.type bcm_redirector_fprintf, @function
|
|
+bcm_redirector_fprintf:
|
|
+ std 2, 24(1)
|
|
+ addis 12, 2, .Lredirector_toc_fprintf@toc@ha
|
|
+ ld 12, .Lredirector_toc_fprintf@toc@l(12)
|
|
+ mtctr 12
|
|
+ bctr
|
|
+.type bss_bss_get, @function
|
|
+bss_bss_get:
|
|
+ addis 3, 2, bss@toc@ha
|
|
+ addi 3, 3, bss@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC0, @function
|
|
+bcm_loadtoc__dot_LC0:
|
|
+.Lbcm_loadtoc__dot_LC0:
|
|
+ addis 3, 2, .LC0@toc@ha
|
|
+ addi 3, 3, .LC0@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC1, @function
|
|
+bcm_loadtoc__dot_LC1:
|
|
+.Lbcm_loadtoc__dot_LC1:
|
|
+ addis 3, 2, .LC1@toc@ha
|
|
+ addi 3, 3, .LC1@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC10, @function
|
|
+bcm_loadtoc__dot_LC10:
|
|
+.Lbcm_loadtoc__dot_LC10:
|
|
+ addis 3, 2, .LC10@toc@ha
|
|
+ addi 3, 3, .LC10@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC2, @function
|
|
+bcm_loadtoc__dot_LC2:
|
|
+.Lbcm_loadtoc__dot_LC2:
|
|
+ addis 3, 2, .LC2@toc@ha
|
|
+ addi 3, 3, .LC2@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC3, @function
|
|
+bcm_loadtoc__dot_LC3:
|
|
+.Lbcm_loadtoc__dot_LC3:
|
|
+ addis 3, 2, .LC3@toc@ha
|
|
+ addi 3, 3, .LC3@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC4, @function
|
|
+bcm_loadtoc__dot_LC4:
|
|
+.Lbcm_loadtoc__dot_LC4:
|
|
+ addis 3, 2, .LC4@toc@ha
|
|
+ addi 3, 3, .LC4@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC5, @function
|
|
+bcm_loadtoc__dot_LC5:
|
|
+.Lbcm_loadtoc__dot_LC5:
|
|
+ addis 3, 2, .LC5@toc@ha
|
|
+ addi 3, 3, .LC5@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC6, @function
|
|
+bcm_loadtoc__dot_LC6:
|
|
+.Lbcm_loadtoc__dot_LC6:
|
|
+ addis 3, 2, .LC6@toc@ha
|
|
+ addi 3, 3, .LC6@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC7, @function
|
|
+bcm_loadtoc__dot_LC7:
|
|
+.Lbcm_loadtoc__dot_LC7:
|
|
+ addis 3, 2, .LC7@toc@ha
|
|
+ addi 3, 3, .LC7@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC8, @function
|
|
+bcm_loadtoc__dot_LC8:
|
|
+.Lbcm_loadtoc__dot_LC8:
|
|
+ addis 3, 2, .LC8@toc@ha
|
|
+ addi 3, 3, .LC8@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC9, @function
|
|
+bcm_loadtoc__dot_LC9:
|
|
+.Lbcm_loadtoc__dot_LC9:
|
|
+ addis 3, 2, .LC9@toc@ha
|
|
+ addi 3, 3, .LC9@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_Lfunction_local_target, @function
|
|
+bcm_loadtoc__dot_Lfunction_local_target:
|
|
+.Lbcm_loadtoc__dot_Lfunction_local_target:
|
|
+ addis 3, 2, .Lfunction_local_target@toc@ha
|
|
+ addi 3, 3, .Lfunction_local_target@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LkString_local_target, @function
|
|
+bcm_loadtoc__dot_LkString_local_target:
|
|
+.Lbcm_loadtoc__dot_LkString_local_target:
|
|
+ addis 3, 2, .LkString_local_target@toc@ha
|
|
+ addi 3, 3, .LkString_local_target@toc@l
|
|
+ blr
|
|
+.LBORINGSSL_external_toc:
|
|
+.quad .TOC.-.LBORINGSSL_external_toc
|
|
+.type BORINGSSL_bcm_text_hash, @object
|
|
+.size BORINGSSL_bcm_text_hash, 32
|
|
+BORINGSSL_bcm_text_hash:
|
|
+.byte 0xae
|
|
+.byte 0x2c
|
|
+.byte 0xea
|
|
+.byte 0x2a
|
|
+.byte 0xbd
|
|
+.byte 0xa6
|
|
+.byte 0xf3
|
|
+.byte 0xec
|
|
+.byte 0x97
|
|
+.byte 0x7f
|
|
+.byte 0x9b
|
|
+.byte 0xf6
|
|
+.byte 0x94
|
|
+.byte 0x9a
|
|
+.byte 0xfc
|
|
+.byte 0x83
|
|
+.byte 0x68
|
|
+.byte 0x27
|
|
+.byte 0xcb
|
|
+.byte 0xa0
|
|
+.byte 0xa0
|
|
+.byte 0x9f
|
|
+.byte 0x6b
|
|
+.byte 0x6f
|
|
+.byte 0xde
|
|
+.byte 0x52
|
|
+.byte 0xcd
|
|
+.byte 0xe2
|
|
+.byte 0xcd
|
|
+.byte 0xff
|
|
+.byte 0x31
|
|
+.byte 0x80
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/in.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/in.s
|
|
@@ -0,0 +1,226 @@
|
|
+ .file "foo.c"
|
|
+ .abiversion 2
|
|
+ .section ".toc","aw"
|
|
+ .section ".text"
|
|
+ .section ".toc","aw"
|
|
+.LC0:
|
|
+ .quad stderr
|
|
+.LC3:
|
|
+ .quad kExportedString
|
|
+.LC6:
|
|
+ .quad exported_function
|
|
+ .section ".text"
|
|
+ .align 2
|
|
+ .p2align 4,,15
|
|
+ .globl exported_function
|
|
+ .type exported_function, @function
|
|
+exported_function:
|
|
+0: addis 2,12,.TOC.-0b@ha
|
|
+ addi 2,2,.TOC.-0b@l
|
|
+ .localentry exported_function,.-exported_function
|
|
+ mflr 0
|
|
+ std 19,-104(1)
|
|
+ std 20,-96(1)
|
|
+ std 21,-88(1)
|
|
+ std 22,-80(1)
|
|
+ addis 21,2,.LC1@toc@ha
|
|
+ addis 22,2,.LC2@toc@ha
|
|
+ std 23,-72(1)
|
|
+ std 24,-64(1)
|
|
+ addis 23,2,.LC4@toc@ha
|
|
+ addis 24,2,function@toc@ha
|
|
+ std 25,-56(1)
|
|
+ std 26,-48(1)
|
|
+ addis 25,2,.LC5@toc@ha
|
|
+ addis 26,2,.LC7@toc@ha
|
|
+ std 27,-40(1)
|
|
+ std 28,-32(1)
|
|
+ addis 28,2,.LC8@toc@ha
|
|
+ addi 21,21,.LC1@toc@l
|
|
+ std 29,-24(1)
|
|
+ std 30,-16(1)
|
|
+ addis 29,2,.LANCHOR0@toc@ha
|
|
+ addi 22,22,.LC2@toc@l
|
|
+ std 31,-8(1)
|
|
+ std 0,16(1)
|
|
+ addi 29,29,.LANCHOR0@toc@l
|
|
+ addi 23,23,.LC4@toc@l
|
|
+ stdu 1,-208(1)
|
|
+ addis 31,2,.LC0@toc@ha # gpr load fusion, type long
|
|
+ ld 31,.LC0@toc@l(31)
|
|
+ addis 19,2,.LC3@toc@ha # gpr load fusion, type long
|
|
+ ld 19,.LC3@toc@l(19)
|
|
+ addis 30,29,0x5
|
|
+ addi 24,24,function@toc@l
|
|
+ addis 20,2,.LC6@toc@ha # gpr load fusion, type long
|
|
+ ld 20,.LC6@toc@l(20)
|
|
+ addi 25,25,.LC5@toc@l
|
|
+ addi 26,26,.LC7@toc@l
|
|
+ addi 27,29,5
|
|
+ addi 28,28,.LC8@toc@l
|
|
+ addi 30,30,-29404
|
|
+ .p2align 4,,15
|
|
+.L2:
|
|
+ ld 3,0(31)
|
|
+ mr 5,21
|
|
+ mr 6,29
|
|
+ li 4,1
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ mr 5,22
|
|
+ mr 6,19
|
|
+ li 4,1
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ mr 5,23
|
|
+ mr 6,24
|
|
+ li 4,1
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ mr 5,25
|
|
+ mr 6,20
|
|
+ li 4,1
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ mr 5,26
|
|
+ mr 6,27
|
|
+ li 4,1
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ li 4,1
|
|
+ mr 5,28
|
|
+ mr 6,30
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ b .L2
|
|
+ .long 0
|
|
+ .byte 0,0,0,1,128,13,0,0
|
|
+ .size exported_function,.-exported_function
|
|
+ .section ".toc","aw"
|
|
+ .set .LC11,.LC0
|
|
+ .set .LC12,.LC3
|
|
+ .set .LC13,.LC6
|
|
+ .section ".text"
|
|
+ .align 2
|
|
+ .p2align 4,,15
|
|
+ .type function, @function
|
|
+function:
|
|
+0: addis 2,12,.TOC.-0b@ha
|
|
+ addi 2,2,.TOC.-0b@l
|
|
+ .localentry function,.-function
|
|
+ mflr 0
|
|
+ std 31,-8(1)
|
|
+ addis 31,2,.LC11@toc@ha # gpr load fusion, type long
|
|
+ ld 31,.LC11@toc@l(31)
|
|
+ addis 5,2,.LC1@toc@ha
|
|
+ std 30,-16(1)
|
|
+ addis 30,2,.LANCHOR0@toc@ha
|
|
+ addi 5,5,.LC1@toc@l
|
|
+ addi 30,30,.LANCHOR0@toc@l
|
|
+ li 4,1
|
|
+ mr 6,30
|
|
+ std 0,16(1)
|
|
+ stdu 1,-112(1)
|
|
+ ld 3,0(31)
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ addis 6,2,.LC12@toc@ha # gpr load fusion, type long
|
|
+ ld 6,.LC12@toc@l(6)
|
|
+ ld 3,0(31)
|
|
+ addis 5,2,.LC2@toc@ha
|
|
+ li 4,1
|
|
+ addi 5,5,.LC2@toc@l
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ addis 5,2,.LC4@toc@ha
|
|
+ addis 6,2,function@toc@ha
|
|
+ addi 5,5,.LC4@toc@l
|
|
+ addi 6,6,function@toc@l
|
|
+ li 4,1
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ addis 6,2,.LC13@toc@ha # gpr load fusion, type long
|
|
+ ld 6,.LC13@toc@l(6)
|
|
+ ld 3,0(31)
|
|
+ addis 5,2,.LC5@toc@ha
|
|
+ li 4,1
|
|
+ addi 5,5,.LC5@toc@l
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ addis 5,2,.LC7@toc@ha
|
|
+ addi 6,30,5
|
|
+ addi 5,5,.LC7@toc@l
|
|
+ li 4,1
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ addis 6,30,0x5
|
|
+ addis 5,2,.LC8@toc@ha
|
|
+ li 4,1
|
|
+ addi 5,5,.LC8@toc@l
|
|
+ addi 6,6,-29404
|
|
+ bl __fprintf_chk
|
|
+ nop
|
|
+ bl exported_function
|
|
+ nop
|
|
+ addi 1,1,112
|
|
+ ld 0,16(1)
|
|
+ ld 30,-16(1)
|
|
+ ld 31,-8(1)
|
|
+ mtlr 0
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,0,0,1,128,2,0,0
|
|
+ .size function,.-function
|
|
+ .globl kExportedString
|
|
+ .section .rodata
|
|
+ .align 4
|
|
+ .set .LANCHOR0,. + 0
|
|
+ .type kString, @object
|
|
+ .size kString, 12
|
|
+kString:
|
|
+ .string "hello world"
|
|
+ .zero 4
|
|
+ .type kGiantArray, @object
|
|
+ .size kGiantArray, 400000
|
|
+kGiantArray:
|
|
+ .long 1
|
|
+ .long 0
|
|
+ .zero 399992
|
|
+ .type kExportedString, @object
|
|
+ .size kExportedString, 26
|
|
+kExportedString:
|
|
+ .string "hello world, more visibly"
|
|
+ .section .rodata.str1.8,"aMS",@progbits,1
|
|
+ .align 3
|
|
+.LC1:
|
|
+ .string "kString is %p\n"
|
|
+ .zero 1
|
|
+.LC2:
|
|
+ .string "kExportedString is %p\n"
|
|
+ .zero 1
|
|
+.LC4:
|
|
+ .string "function is %p\n"
|
|
+.LC5:
|
|
+ .string "exported_function is %p\n"
|
|
+ .zero 7
|
|
+.LC7:
|
|
+ .string "&kString[5] is %p\n"
|
|
+ .zero 5
|
|
+.LC8:
|
|
+ .string "&kGiantArray[0x12345] is %p\n"
|
|
+ .section ".bss"
|
|
+ .align 2
|
|
+ .type bss, @object
|
|
+ .size bss, 20
|
|
+bss:
|
|
+ .zero 20
|
|
+ .ident "GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
|
|
+ .section .note.GNU-stack,"",@progbits
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/out.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/out.s
|
|
@@ -0,0 +1,677 @@
|
|
+.text
|
|
+.file 1 "inserted_by_delocate.c"
|
|
+.loc 1 1 0
|
|
+BORINGSSL_bcm_text_start:
|
|
+ .file "foo.c"
|
|
+ .abiversion 2
|
|
+ .section ".toc","aw"
|
|
+# WAS .section ".text"
|
|
+.text
|
|
+ .section ".toc","aw"
|
|
+.LC0:
|
|
+
|
|
+ .quad stderr
|
|
+.LC3:
|
|
+
|
|
+ .quad kExportedString
|
|
+.LC6:
|
|
+
|
|
+ .quad exported_function
|
|
+# WAS .section ".text"
|
|
+.text
|
|
+ .align 2
|
|
+ .p2align 4,,15
|
|
+ .globl exported_function
|
|
+ .type exported_function, @function
|
|
+.Lexported_function_local_target:
|
|
+exported_function:
|
|
+0:
|
|
+999:
|
|
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
|
|
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
|
|
+ ld 12, 0(2)
|
|
+ add 2, 2, 12
|
|
+# WAS addi 2,2,.TOC.-0b@l
|
|
+ .localentry exported_function,.-exported_function
|
|
+.Lexported_function_local_entry:
|
|
+ mflr 0
|
|
+ std 19,-104(1)
|
|
+ std 20,-96(1)
|
|
+ std 21,-88(1)
|
|
+ std 22,-80(1)
|
|
+# WAS addis 21,2,.LC1@toc@ha
|
|
+# WAS addis 22,2,.LC2@toc@ha
|
|
+ std 23,-72(1)
|
|
+ std 24,-64(1)
|
|
+# WAS addis 23,2,.LC4@toc@ha
|
|
+# WAS addis 24,2,function@toc@ha
|
|
+ std 25,-56(1)
|
|
+ std 26,-48(1)
|
|
+# WAS addis 25,2,.LC5@toc@ha
|
|
+# WAS addis 26,2,.LC7@toc@ha
|
|
+ std 27,-40(1)
|
|
+ std 28,-32(1)
|
|
+# WAS addis 28,2,.LC8@toc@ha
|
|
+# WAS addi 21,21,.LC1@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 21
|
|
+ std 21, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC1
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 21, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ std 29,-24(1)
|
|
+ std 30,-16(1)
|
|
+# WAS addis 29,2,.LANCHOR0@toc@ha
|
|
+# WAS addi 22,22,.LC2@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 22
|
|
+ std 22, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC2
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 22, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ std 31,-8(1)
|
|
+ std 0,16(1)
|
|
+# WAS addi 29,29,.LANCHOR0@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 29
|
|
+ std 29, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LANCHOR0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 29, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addi 23,23,.LC4@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 23
|
|
+ std 23, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC4
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 23, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ stdu 1,-208(1)
|
|
+# WAS addis 31,2,.LC0@toc@ha # gpr load fusion, type long
|
|
+# WAS ld 31,.LC0@toc@l(31)
|
|
+ addi 1, 1, -288
|
|
+ mflr 31
|
|
+ std 31, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 31, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 31, 0(31)
|
|
+# WAS addis 19,2,.LC3@toc@ha # gpr load fusion, type long
|
|
+# WAS ld 19,.LC3@toc@l(19)
|
|
+ addi 1, 1, -288
|
|
+ mflr 19
|
|
+ std 19, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC3
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 19, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 19, 0(19)
|
|
+ addis 30,29,0x5
|
|
+# WAS addi 24,24,function@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 24
|
|
+ std 24, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfunction_local_target
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 24, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addis 20,2,.LC6@toc@ha # gpr load fusion, type long
|
|
+# WAS ld 20,.LC6@toc@l(20)
|
|
+ addi 1, 1, -288
|
|
+ mflr 20
|
|
+ std 20, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC6
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 20, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 20, 0(20)
|
|
+# WAS addi 25,25,.LC5@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 25
|
|
+ std 25, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC5
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 25, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addi 26,26,.LC7@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 26
|
|
+ std 26, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC7
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 26, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ addi 27,29,5
|
|
+# WAS addi 28,28,.LC8@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 28
|
|
+ std 28, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC8
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 28, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ addi 30,30,-29404
|
|
+ .p2align 4,,15
|
|
+.L2:
|
|
+
|
|
+ ld 3,0(31)
|
|
+ mr 5,21
|
|
+ mr 6,29
|
|
+ li 4,1
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ mr 5,22
|
|
+ mr 6,19
|
|
+ li 4,1
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ mr 5,23
|
|
+ mr 6,24
|
|
+ li 4,1
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ mr 5,25
|
|
+ mr 6,20
|
|
+ li 4,1
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ mr 5,26
|
|
+ mr 6,27
|
|
+ li 4,1
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ li 4,1
|
|
+ mr 5,28
|
|
+ mr 6,30
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ b .L2
|
|
+ .long 0
|
|
+ .byte 0,0,0,1,128,13,0,0
|
|
+ .size exported_function,.-exported_function
|
|
+ .section ".toc","aw"
|
|
+ .set .LC11,.LC0
|
|
+ .set .LC12,.LC3
|
|
+ .set .LC13,.LC6
|
|
+# WAS .section ".text"
|
|
+.text
|
|
+ .align 2
|
|
+ .p2align 4,,15
|
|
+ .type function, @function
|
|
+.Lfunction_local_target:
|
|
+function:
|
|
+0:
|
|
+999:
|
|
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
|
|
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
|
|
+ ld 12, 0(2)
|
|
+ add 2, 2, 12
|
|
+# WAS addi 2,2,.TOC.-0b@l
|
|
+ .localentry function,.-function
|
|
+.Lfunction_local_entry:
|
|
+ mflr 0
|
|
+ std 31,-8(1)
|
|
+# WAS addis 31,2,.LC11@toc@ha # gpr load fusion, type long
|
|
+# WAS ld 31,.LC11@toc@l(31)
|
|
+ addi 1, 1, -288
|
|
+ mflr 31
|
|
+ std 31, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC11
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 31, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 31, 0(31)
|
|
+# WAS addis 5,2,.LC1@toc@ha
|
|
+ std 30,-16(1)
|
|
+# WAS addis 30,2,.LANCHOR0@toc@ha
|
|
+# WAS addi 5,5,.LC1@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC1
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addi 30,30,.LANCHOR0@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 30
|
|
+ std 30, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LANCHOR0
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 30, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ li 4,1
|
|
+ mr 6,30
|
|
+ std 0,16(1)
|
|
+ stdu 1,-112(1)
|
|
+ ld 3,0(31)
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS addis 6,2,.LC12@toc@ha # gpr load fusion, type long
|
|
+# WAS ld 6,.LC12@toc@l(6)
|
|
+ addi 1, 1, -288
|
|
+ mflr 6
|
|
+ std 6, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC12
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 6, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 6, 0(6)
|
|
+ ld 3,0(31)
|
|
+# WAS addis 5,2,.LC2@toc@ha
|
|
+ li 4,1
|
|
+# WAS addi 5,5,.LC2@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC2
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+# WAS addis 5,2,.LC4@toc@ha
|
|
+# WAS addis 6,2,function@toc@ha
|
|
+# WAS addi 5,5,.LC4@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC4
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS addi 6,6,function@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 6
|
|
+ std 6, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfunction_local_target
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 6, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ li 4,1
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS addis 6,2,.LC13@toc@ha # gpr load fusion, type long
|
|
+# WAS ld 6,.LC13@toc@l(6)
|
|
+ addi 1, 1, -288
|
|
+ mflr 6
|
|
+ std 6, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC13
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 6, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 6, 0(6)
|
|
+ ld 3,0(31)
|
|
+# WAS addis 5,2,.LC5@toc@ha
|
|
+ li 4,1
|
|
+# WAS addi 5,5,.LC5@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC5
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+# WAS addis 5,2,.LC7@toc@ha
|
|
+ addi 6,30,5
|
|
+# WAS addi 5,5,.LC7@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC7
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ li 4,1
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+ ld 3,0(31)
|
|
+ addis 6,30,0x5
|
|
+# WAS addis 5,2,.LC8@toc@ha
|
|
+ li 4,1
|
|
+# WAS addi 5,5,.LC8@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_LC8
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ addi 6,6,-29404
|
|
+# WAS bl __fprintf_chk
|
|
+ bl bcm_redirector___fprintf_chk
|
|
+ ld 2, 24(1)
|
|
+ nop
|
|
+# WAS bl exported_function
|
|
+ bl .Lexported_function_local_entry
|
|
+ nop
|
|
+ addi 1,1,112
|
|
+ ld 0,16(1)
|
|
+ ld 30,-16(1)
|
|
+ ld 31,-8(1)
|
|
+ mtlr 0
|
|
+ blr
|
|
+ .long 0
|
|
+ .byte 0,0,0,1,128,2,0,0
|
|
+ .size function,.-function
|
|
+ .globl kExportedString
|
|
+# WAS .section .rodata
|
|
+.text
|
|
+ .align 4
|
|
+ .set .LANCHOR0,. + 0
|
|
+ .type kString, @object
|
|
+ .size kString, 12
|
|
+.LkString_local_target:
|
|
+kString:
|
|
+ .string "hello world"
|
|
+ .zero 4
|
|
+ .type kGiantArray, @object
|
|
+ .size kGiantArray, 400000
|
|
+.LkGiantArray_local_target:
|
|
+kGiantArray:
|
|
+ .long 1
|
|
+ .long 0
|
|
+ .zero 399992
|
|
+ .type kExportedString, @object
|
|
+ .size kExportedString, 26
|
|
+.LkExportedString_local_target:
|
|
+kExportedString:
|
|
+ .string "hello world, more visibly"
|
|
+# WAS .section .rodata.str1.8,"aMS",@progbits,1
|
|
+.text
|
|
+ .align 3
|
|
+.LC1:
|
|
+
|
|
+ .string "kString is %p\n"
|
|
+ .zero 1
|
|
+.LC2:
|
|
+
|
|
+ .string "kExportedString is %p\n"
|
|
+ .zero 1
|
|
+.LC4:
|
|
+
|
|
+ .string "function is %p\n"
|
|
+.LC5:
|
|
+
|
|
+ .string "exported_function is %p\n"
|
|
+ .zero 7
|
|
+.LC7:
|
|
+
|
|
+ .string "&kString[5] is %p\n"
|
|
+ .zero 5
|
|
+.LC8:
|
|
+
|
|
+ .string "&kGiantArray[0x12345] is %p\n"
|
|
+ .section ".bss"
|
|
+ .align 2
|
|
+ .type bss, @object
|
|
+ .size bss, 20
|
|
+bss:
|
|
+.Lbss_local_target:
|
|
+
|
|
+ .zero 20
|
|
+ .ident "GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
|
|
+ .section .note.GNU-stack,"",@progbits
|
|
+.text
|
|
+.loc 1 2 0
|
|
+BORINGSSL_bcm_text_end:
|
|
+.section ".toc", "aw"
|
|
+.Lredirector_toc___fprintf_chk:
|
|
+.quad __fprintf_chk
|
|
+.text
|
|
+.type bcm_redirector___fprintf_chk, @function
|
|
+bcm_redirector___fprintf_chk:
|
|
+ std 2, 24(1)
|
|
+ addis 12, 2, .Lredirector_toc___fprintf_chk@toc@ha
|
|
+ ld 12, .Lredirector_toc___fprintf_chk@toc@l(12)
|
|
+ mtctr 12
|
|
+ bctr
|
|
+.type bss_bss_get, @function
|
|
+bss_bss_get:
|
|
+ addis 3, 2, .Lbss_local_target@toc@ha
|
|
+ addi 3, 3, .Lbss_local_target@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LANCHOR0, @function
|
|
+bcm_loadtoc__dot_LANCHOR0:
|
|
+.Lbcm_loadtoc__dot_LANCHOR0:
|
|
+ addis 3, 2, .LANCHOR0@toc@ha
|
|
+ addi 3, 3, .LANCHOR0@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC0, @function
|
|
+bcm_loadtoc__dot_LC0:
|
|
+.Lbcm_loadtoc__dot_LC0:
|
|
+ addis 3, 2, .LC0@toc@ha
|
|
+ addi 3, 3, .LC0@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC1, @function
|
|
+bcm_loadtoc__dot_LC1:
|
|
+.Lbcm_loadtoc__dot_LC1:
|
|
+ addis 3, 2, .LC1@toc@ha
|
|
+ addi 3, 3, .LC1@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC11, @function
|
|
+bcm_loadtoc__dot_LC11:
|
|
+.Lbcm_loadtoc__dot_LC11:
|
|
+ addis 3, 2, .LC11@toc@ha
|
|
+ addi 3, 3, .LC11@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC12, @function
|
|
+bcm_loadtoc__dot_LC12:
|
|
+.Lbcm_loadtoc__dot_LC12:
|
|
+ addis 3, 2, .LC12@toc@ha
|
|
+ addi 3, 3, .LC12@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC13, @function
|
|
+bcm_loadtoc__dot_LC13:
|
|
+.Lbcm_loadtoc__dot_LC13:
|
|
+ addis 3, 2, .LC13@toc@ha
|
|
+ addi 3, 3, .LC13@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC2, @function
|
|
+bcm_loadtoc__dot_LC2:
|
|
+.Lbcm_loadtoc__dot_LC2:
|
|
+ addis 3, 2, .LC2@toc@ha
|
|
+ addi 3, 3, .LC2@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC3, @function
|
|
+bcm_loadtoc__dot_LC3:
|
|
+.Lbcm_loadtoc__dot_LC3:
|
|
+ addis 3, 2, .LC3@toc@ha
|
|
+ addi 3, 3, .LC3@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC4, @function
|
|
+bcm_loadtoc__dot_LC4:
|
|
+.Lbcm_loadtoc__dot_LC4:
|
|
+ addis 3, 2, .LC4@toc@ha
|
|
+ addi 3, 3, .LC4@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC5, @function
|
|
+bcm_loadtoc__dot_LC5:
|
|
+.Lbcm_loadtoc__dot_LC5:
|
|
+ addis 3, 2, .LC5@toc@ha
|
|
+ addi 3, 3, .LC5@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC6, @function
|
|
+bcm_loadtoc__dot_LC6:
|
|
+.Lbcm_loadtoc__dot_LC6:
|
|
+ addis 3, 2, .LC6@toc@ha
|
|
+ addi 3, 3, .LC6@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC7, @function
|
|
+bcm_loadtoc__dot_LC7:
|
|
+.Lbcm_loadtoc__dot_LC7:
|
|
+ addis 3, 2, .LC7@toc@ha
|
|
+ addi 3, 3, .LC7@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_LC8, @function
|
|
+bcm_loadtoc__dot_LC8:
|
|
+.Lbcm_loadtoc__dot_LC8:
|
|
+ addis 3, 2, .LC8@toc@ha
|
|
+ addi 3, 3, .LC8@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_Lfunction_local_target, @function
|
|
+bcm_loadtoc__dot_Lfunction_local_target:
|
|
+.Lbcm_loadtoc__dot_Lfunction_local_target:
|
|
+ addis 3, 2, .Lfunction_local_target@toc@ha
|
|
+ addi 3, 3, .Lfunction_local_target@toc@l
|
|
+ blr
|
|
+.LBORINGSSL_external_toc:
|
|
+.quad .TOC.-.LBORINGSSL_external_toc
|
|
+.type BORINGSSL_bcm_text_hash, @object
|
|
+.size BORINGSSL_bcm_text_hash, 32
|
|
+BORINGSSL_bcm_text_hash:
|
|
+.byte 0xae
|
|
+.byte 0x2c
|
|
+.byte 0xea
|
|
+.byte 0x2a
|
|
+.byte 0xbd
|
|
+.byte 0xa6
|
|
+.byte 0xf3
|
|
+.byte 0xec
|
|
+.byte 0x97
|
|
+.byte 0x7f
|
|
+.byte 0x9b
|
|
+.byte 0xf6
|
|
+.byte 0x94
|
|
+.byte 0x9a
|
|
+.byte 0xfc
|
|
+.byte 0x83
|
|
+.byte 0x68
|
|
+.byte 0x27
|
|
+.byte 0xcb
|
|
+.byte 0xa0
|
|
+.byte 0xa0
|
|
+.byte 0x9f
|
|
+.byte 0x6b
|
|
+.byte 0x6f
|
|
+.byte 0xde
|
|
+.byte 0x52
|
|
+.byte 0xcd
|
|
+.byte 0xe2
|
|
+.byte 0xcd
|
|
+.byte 0xff
|
|
+.byte 0x31
|
|
+.byte 0x80
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/in.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/in.s
|
|
@@ -0,0 +1,23 @@
|
|
+ .text
|
|
+foo:
|
|
+ # TOC references may have offsets.
|
|
+ addis 3, 2, 5+foo@toc@ha
|
|
+ addi 3, 3, 10+foo@toc@l
|
|
+
|
|
+ addis 3, 2, 15+foo@toc@ha
|
|
+ addi 3, 3, 20+foo@toc@l
|
|
+
|
|
+ addis 4, 2, foo@toc@ha
|
|
+ addi 4, 4, foo@toc@l
|
|
+
|
|
+ addis 5, 2, 5+foo@toc@ha
|
|
+ ld 5, 10+foo@toc@l(5)
|
|
+
|
|
+ addis 4, 2, foo-10@toc@ha
|
|
+ addi 4, 4, foo-10@toc@l
|
|
+
|
|
+ addis 4, 2, foo@toc@ha+25
|
|
+ addi 4, 4, foo@toc@l+25
|
|
+
|
|
+ addis 4, 2, 1+foo-2@toc@ha+3
|
|
+ addi 4, 4, 1+foo-2@toc@l+3
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/out.s
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/out.s
|
|
@@ -0,0 +1,178 @@
|
|
+.text
|
|
+.file 1 "inserted_by_delocate.c"
|
|
+.loc 1 1 0
|
|
+BORINGSSL_bcm_text_start:
|
|
+ .text
|
|
+.Lfoo_local_target:
|
|
+foo:
|
|
+ # TOC references may have offsets.
|
|
+# WAS addis 3, 2, 5+foo@toc@ha
|
|
+# WAS addi 3, 3, 10+foo@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 3
|
|
+ std 3, -8(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_10
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 3, -24(1)
|
|
+ addi 1, 1, 288
|
|
+
|
|
+# WAS addis 3, 2, 15+foo@toc@ha
|
|
+# WAS addi 3, 3, 20+foo@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 3
|
|
+ std 3, -8(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_20
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 3, -24(1)
|
|
+ addi 1, 1, 288
|
|
+
|
|
+# WAS addis 4, 2, foo@toc@ha
|
|
+# WAS addi 4, 4, foo@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+
|
|
+# WAS addis 5, 2, 5+foo@toc@ha
|
|
+# WAS ld 5, 10+foo@toc@l(5)
|
|
+ addi 1, 1, -288
|
|
+ mflr 5
|
|
+ std 5, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_10
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 5, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+ ld 5, 0(5)
|
|
+
|
|
+# WAS addis 4, 2, foo-10@toc@ha
|
|
+# WAS addi 4, 4, foo-10@toc@l
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__minus_10
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+
|
|
+# WAS addis 4, 2, foo@toc@ha+25
|
|
+# WAS addi 4, 4, foo@toc@l+25
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_25
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+
|
|
+# WAS addis 4, 2, 1+foo-2@toc@ha+3
|
|
+# WAS addi 4, 4, 1+foo-2@toc@l+3
|
|
+ addi 1, 1, -288
|
|
+ mflr 4
|
|
+ std 4, -8(1)
|
|
+ std 3, -16(1)
|
|
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3
|
|
+ std 3, -24(1)
|
|
+ ld 3, -8(1)
|
|
+ mtlr 3
|
|
+ ld 4, -24(1)
|
|
+ ld 3, -16(1)
|
|
+ addi 1, 1, 288
|
|
+.text
|
|
+.loc 1 2 0
|
|
+BORINGSSL_bcm_text_end:
|
|
+.type bcm_loadtoc__dot_Lfoo_local_target, @function
|
|
+bcm_loadtoc__dot_Lfoo_local_target:
|
|
+.Lbcm_loadtoc__dot_Lfoo_local_target:
|
|
+ addis 3, 2, .Lfoo_local_target@toc@ha
|
|
+ addi 3, 3, .Lfoo_local_target@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3, @function
|
|
+bcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3:
|
|
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3:
|
|
+ addis 3, 2, .Lfoo_local_target+1-2+3@toc@ha
|
|
+ addi 3, 3, .Lfoo_local_target+1-2+3@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_10, @function
|
|
+bcm_loadtoc__dot_Lfoo_local_target__plus_10:
|
|
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_10:
|
|
+ addis 3, 2, .Lfoo_local_target+10@toc@ha
|
|
+ addi 3, 3, .Lfoo_local_target+10@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_20, @function
|
|
+bcm_loadtoc__dot_Lfoo_local_target__plus_20:
|
|
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_20:
|
|
+ addis 3, 2, .Lfoo_local_target+20@toc@ha
|
|
+ addi 3, 3, .Lfoo_local_target+20@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_25, @function
|
|
+bcm_loadtoc__dot_Lfoo_local_target__plus_25:
|
|
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_25:
|
|
+ addis 3, 2, .Lfoo_local_target+25@toc@ha
|
|
+ addi 3, 3, .Lfoo_local_target+25@toc@l
|
|
+ blr
|
|
+.type bcm_loadtoc__dot_Lfoo_local_target__minus_10, @function
|
|
+bcm_loadtoc__dot_Lfoo_local_target__minus_10:
|
|
+.Lbcm_loadtoc__dot_Lfoo_local_target__minus_10:
|
|
+ addis 3, 2, .Lfoo_local_target-10@toc@ha
|
|
+ addi 3, 3, .Lfoo_local_target-10@toc@l
|
|
+ blr
|
|
+.LBORINGSSL_external_toc:
|
|
+.quad .TOC.-.LBORINGSSL_external_toc
|
|
+.type BORINGSSL_bcm_text_hash, @object
|
|
+.size BORINGSSL_bcm_text_hash, 32
|
|
+BORINGSSL_bcm_text_hash:
|
|
+.byte 0xae
|
|
+.byte 0x2c
|
|
+.byte 0xea
|
|
+.byte 0x2a
|
|
+.byte 0xbd
|
|
+.byte 0xa6
|
|
+.byte 0xf3
|
|
+.byte 0xec
|
|
+.byte 0x97
|
|
+.byte 0x7f
|
|
+.byte 0x9b
|
|
+.byte 0xf6
|
|
+.byte 0x94
|
|
+.byte 0x9a
|
|
+.byte 0xfc
|
|
+.byte 0x83
|
|
+.byte 0x68
|
|
+.byte 0x27
|
|
+.byte 0xcb
|
|
+.byte 0xa0
|
|
+.byte 0xa0
|
|
+.byte 0x9f
|
|
+.byte 0x6b
|
|
+.byte 0x6f
|
|
+.byte 0xde
|
|
+.byte 0x52
|
|
+.byte 0xcd
|
|
+.byte 0xe2
|
|
+.byte 0xcd
|
|
+.byte 0xff
|
|
+.byte 0x31
|
|
+.byte 0x80
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c.inc
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c.inc
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c.inc
|
|
@@ -408,6 +408,10 @@ static void sha1_block_data_order(uint32
|
|
return;
|
|
}
|
|
#endif
|
|
+#if defined(SHA1_ASM_PPC64)
|
|
+ sha1_block_data_order_ppc64(state, data, num);
|
|
+ return;
|
|
+#endif
|
|
sha1_block_data_order_nohw(state, data, num);
|
|
}
|
|
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/build.json
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/build.json
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/build.json
|
|
@@ -118,6 +118,10 @@
|
|
{"src": "crypto/fipsmodule/sha/asm/sha512-armv4.pl"},
|
|
{"src": "crypto/fipsmodule/aes/asm/vpaes-armv7.pl"}
|
|
],
|
|
+ "perlasm_ppc64le": [
|
|
+ {"src": "crypto/fipsmodule/aes/asm/aesp8-ppc.pl"},
|
|
+ {"src": "crypto/fipsmodule/modes/asm/ghashp8-ppc.pl"}
|
|
+ ],
|
|
"perlasm_x86": [
|
|
{"src": "crypto/fipsmodule/aes/asm/aesni-x86.pl"},
|
|
{"src": "crypto/fipsmodule/bn/asm/bn-586.pl"},
|
|
@@ -219,6 +223,7 @@
|
|
"crypto/cpu_arm_freebsd.c",
|
|
"crypto/cpu_arm_linux.c",
|
|
"crypto/cpu_intel.c",
|
|
+ "crypto/cpu_ppc64le.c",
|
|
"crypto/crypto.c",
|
|
"crypto/curve25519/curve25519.c",
|
|
"crypto/curve25519/curve25519_64_adx.c",
|
|
@@ -799,6 +804,9 @@
|
|
"perlasm_arm": [
|
|
{"src": "crypto/test/asm/trampoline-armv4.pl"}
|
|
],
|
|
+ "perlasm_ppc64le": [
|
|
+ {"src": "crypto/test/asm/trampoline-ppc.pl"}
|
|
+ ],
|
|
"perlasm_x86": [
|
|
{"src": "crypto/test/asm/trampoline-x86.pl"}
|
|
],
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/util/pregenerate/build.go
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/util/pregenerate/build.go
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/util/pregenerate/build.go
|
|
@@ -38,6 +38,7 @@ type InputTarget struct {
|
|
// architecture.
|
|
PerlasmAarch64 []PerlasmSource `json:"perlasm_aarch64,omitempty"`
|
|
PerlasmArm []PerlasmSource `json:"perlasm_arm,omitempty"`
|
|
+ PerlasmPPC64LE []PerlasmSource `json:"perlasm_ppc64le,omitempty"`
|
|
PerlasmX86 []PerlasmSource `json:"perlasm_x86,omitempty"`
|
|
PerlasmX86_64 []PerlasmSource `json:"perlasm_x86_64,omitempty"`
|
|
}
|
|
@@ -116,6 +117,9 @@ func (in *InputTarget) Pregenerate(name
|
|
for _, p := range in.PerlasmArm {
|
|
addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"linux32"})
|
|
}
|
|
+ for _, p := range in.PerlasmPPC64LE {
|
|
+ addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"linux64le"})
|
|
+ }
|
|
for _, p := range in.PerlasmX86 {
|
|
addPerlasmTask(&out.Asm, &p, "-apple.S", []string{"macosx", "-fPIC"})
|
|
addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"elf", "-fPIC"})
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/README.ppc64le
|
|
===================================================================
|
|
--- /dev/null
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/README.ppc64le
|
|
@@ -0,0 +1,8 @@
|
|
+==============================================================
|
|
+To recreate boringssl pregenerated files patch for ppc64le:
|
|
+
|
|
+cd third_party/boringssl/src
|
|
+cp -Rp gen gen.orig
|
|
+go run ./util/pregenerate
|
|
+cd ../../../../
|
|
+diff -urN chromium-*/third_party/boringssl/src/gen.orig chromium-*/third_party/boringssl/src/gen
|
|
Index: chromium-130.0.6723.44/third_party/boringssl/src/gen/sources.gni
|
|
===================================================================
|
|
--- chromium-130.0.6723.44.orig/third_party/boringssl/src/gen/sources.gni
|
|
+++ chromium-130.0.6723.44/third_party/boringssl/src/gen/sources.gni
|
|
@@ -107,6 +107,7 @@ bcm_sources_asm = [
|
|
"gen/bcm/aesv8-gcm-armv8-apple.S",
|
|
"gen/bcm/aesv8-gcm-armv8-linux.S",
|
|
"gen/bcm/aesv8-gcm-armv8-win.S",
|
|
+ "gen/bcm/aesp8-ppc-linux.S",
|
|
"gen/bcm/armv4-mont-linux.S",
|
|
"gen/bcm/armv8-mont-apple.S",
|
|
"gen/bcm/armv8-mont-linux.S",
|
|
@@ -123,6 +124,7 @@ bcm_sources_asm = [
|
|
"gen/bcm/ghash-neon-armv8-apple.S",
|
|
"gen/bcm/ghash-neon-armv8-linux.S",
|
|
"gen/bcm/ghash-neon-armv8-win.S",
|
|
+ "gen/bcm/ghashp8-ppc-linux.S",
|
|
"gen/bcm/ghash-ssse3-x86-apple.S",
|
|
"gen/bcm/ghash-ssse3-x86-linux.S",
|
|
"gen/bcm/ghash-ssse3-x86_64-apple.S",
|
|
@@ -314,6 +316,7 @@ crypto_sources = [
|
|
"crypto/cpu_arm_freebsd.c",
|
|
"crypto/cpu_arm_linux.c",
|
|
"crypto/cpu_intel.c",
|
|
+ "crypto/cpu_ppc64le.c",
|
|
"crypto/crypto.c",
|
|
"crypto/curve25519/curve25519.c",
|
|
"crypto/curve25519/curve25519_64_adx.c",
|