You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
chromium/0001-Add-PPC64-support-for-...

8928 lines
241 KiB

8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/abi_self_test.cc
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/abi_self_test.cc
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/abi_self_test.cc
@@ -521,3 +521,289 @@ TEST(ABITest, AArch64) {
CHECK_ABI_NO_UNWIND(abi_test_clobber_v15_upper);
}
#endif // OPENSSL_AARCH64 && SUPPORTS_ABI_TEST
+
+#if defined(OPENSSL_PPC64LE) && defined(SUPPORTS_ABI_TEST)
+extern "C" {
+void abi_test_clobber_r0(void);
+// r1 is the stack pointer.
+void abi_test_clobber_r2(void);
+void abi_test_clobber_r3(void);
+void abi_test_clobber_r4(void);
+void abi_test_clobber_r5(void);
+void abi_test_clobber_r6(void);
+void abi_test_clobber_r7(void);
+void abi_test_clobber_r8(void);
+void abi_test_clobber_r9(void);
+void abi_test_clobber_r10(void);
+void abi_test_clobber_r11(void);
+void abi_test_clobber_r12(void);
+// r13 is the thread pointer.
+void abi_test_clobber_r14(void);
+void abi_test_clobber_r15(void);
+void abi_test_clobber_r16(void);
+void abi_test_clobber_r17(void);
+void abi_test_clobber_r18(void);
+void abi_test_clobber_r19(void);
+void abi_test_clobber_r20(void);
+void abi_test_clobber_r21(void);
+void abi_test_clobber_r22(void);
+void abi_test_clobber_r23(void);
+void abi_test_clobber_r24(void);
+void abi_test_clobber_r25(void);
+void abi_test_clobber_r26(void);
+void abi_test_clobber_r27(void);
+void abi_test_clobber_r28(void);
+void abi_test_clobber_r29(void);
+void abi_test_clobber_r30(void);
+void abi_test_clobber_r31(void);
+
+void abi_test_clobber_f0(void);
+void abi_test_clobber_f1(void);
+void abi_test_clobber_f2(void);
+void abi_test_clobber_f3(void);
+void abi_test_clobber_f4(void);
+void abi_test_clobber_f5(void);
+void abi_test_clobber_f6(void);
+void abi_test_clobber_f7(void);
+void abi_test_clobber_f8(void);
+void abi_test_clobber_f9(void);
+void abi_test_clobber_f10(void);
+void abi_test_clobber_f11(void);
+void abi_test_clobber_f12(void);
+void abi_test_clobber_f13(void);
+void abi_test_clobber_f14(void);
+void abi_test_clobber_f15(void);
+void abi_test_clobber_f16(void);
+void abi_test_clobber_f17(void);
+void abi_test_clobber_f18(void);
+void abi_test_clobber_f19(void);
+void abi_test_clobber_f20(void);
+void abi_test_clobber_f21(void);
+void abi_test_clobber_f22(void);
+void abi_test_clobber_f23(void);
+void abi_test_clobber_f24(void);
+void abi_test_clobber_f25(void);
+void abi_test_clobber_f26(void);
+void abi_test_clobber_f27(void);
+void abi_test_clobber_f28(void);
+void abi_test_clobber_f29(void);
+void abi_test_clobber_f30(void);
+void abi_test_clobber_f31(void);
+
+void abi_test_clobber_v0(void);
+void abi_test_clobber_v1(void);
+void abi_test_clobber_v2(void);
+void abi_test_clobber_v3(void);
+void abi_test_clobber_v4(void);
+void abi_test_clobber_v5(void);
+void abi_test_clobber_v6(void);
+void abi_test_clobber_v7(void);
+void abi_test_clobber_v8(void);
+void abi_test_clobber_v9(void);
+void abi_test_clobber_v10(void);
+void abi_test_clobber_v11(void);
+void abi_test_clobber_v12(void);
+void abi_test_clobber_v13(void);
+void abi_test_clobber_v14(void);
+void abi_test_clobber_v15(void);
+void abi_test_clobber_v16(void);
+void abi_test_clobber_v17(void);
+void abi_test_clobber_v18(void);
+void abi_test_clobber_v19(void);
+void abi_test_clobber_v20(void);
+void abi_test_clobber_v21(void);
+void abi_test_clobber_v22(void);
+void abi_test_clobber_v23(void);
+void abi_test_clobber_v24(void);
+void abi_test_clobber_v25(void);
+void abi_test_clobber_v26(void);
+void abi_test_clobber_v27(void);
+void abi_test_clobber_v28(void);
+void abi_test_clobber_v29(void);
+void abi_test_clobber_v30(void);
+void abi_test_clobber_v31(void);
+
+void abi_test_clobber_cr0(void);
+void abi_test_clobber_cr1(void);
+void abi_test_clobber_cr2(void);
+void abi_test_clobber_cr3(void);
+void abi_test_clobber_cr4(void);
+void abi_test_clobber_cr5(void);
+void abi_test_clobber_cr6(void);
+void abi_test_clobber_cr7(void);
+
+void abi_test_clobber_ctr(void);
+void abi_test_clobber_lr(void);
+
+} // extern "C"
+
+TEST(ABITest, PPC64LE) {
+ // abi_test_trampoline hides unsaved registers from the caller, so we can
+ // safely call the abi_test_clobber_* functions below.
+ abi_test::internal::CallerState state;
+ RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
+ CHECK_ABI_NO_UNWIND(abi_test_trampoline,
+ reinterpret_cast<crypto_word_t>(abi_test_clobber_r14),
+ &state, nullptr, 0, 0 /* no breakpoint */);
+
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r0);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r2);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r3);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r4);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r5);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r6);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r7);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r8);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r9);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r10);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r11);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_r12);
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r14),
+ "r14 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r15),
+ "r15 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r16),
+ "r16 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r17),
+ "r17 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r18),
+ "r18 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r19),
+ "r19 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r20),
+ "r20 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r21),
+ "r21 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r22),
+ "r22 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r23),
+ "r23 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r24),
+ "r24 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r25),
+ "r25 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r26),
+ "r26 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r27),
+ "r27 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r28),
+ "r28 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r29),
+ "r29 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r30),
+ "r30 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r31),
+ "r31 was not restored after return");
+
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f0);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f1);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f2);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f3);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f4);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f5);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f6);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f7);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f8);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f9);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f10);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f11);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f12);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_f13);
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f14),
+ "f14 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f15),
+ "f15 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f16),
+ "f16 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f17),
+ "f17 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f18),
+ "f18 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f19),
+ "f19 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f20),
+ "f20 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f21),
+ "f21 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f22),
+ "f22 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f23),
+ "f23 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f24),
+ "f24 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f25),
+ "f25 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f26),
+ "f26 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f27),
+ "f27 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f28),
+ "f28 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f29),
+ "f29 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f30),
+ "f30 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f31),
+ "f31 was not restored after return");
+
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v0);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v1);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v2);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v3);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v4);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v5);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v6);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v7);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v8);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v9);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v10);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v11);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v12);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v13);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v14);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v15);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v16);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v17);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v18);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_v19);
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v20),
+ "v20 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v21),
+ "v21 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v22),
+ "v22 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v23),
+ "v23 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v24),
+ "v24 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v25),
+ "v25 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v26),
+ "v26 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v27),
+ "v27 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v28),
+ "v28 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v29),
+ "v29 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v30),
+ "v30 was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v31),
+ "v31 was not restored after return");
+
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr0);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr1);
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr2),
+ "cr was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr3),
+ "cr was not restored after return");
+ EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr4),
+ "cr was not restored after return");
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr5);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr6);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_cr7);
+
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_ctr);
+ CHECK_ABI_NO_UNWIND(abi_test_clobber_lr);
+}
+#endif // OPENSSL_PPC64LE && SUPPORTS_ABI_TEST
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/cpu_ppc64le.c
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/cpu_ppc64le.c
@@ -0,0 +1,38 @@
+/* Copyright (c) 2016, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/base.h>
+
+#if defined(OPENSSL_PPC64LE)
+
+#include <sys/auxv.h>
+
+#include "internal.h"
+
+
+#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
+// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
+// ABI for Linux Supplement”.
+#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
+#endif
+
+void OPENSSL_cpuid_setup(void) {
+ OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
+}
+
+int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
+ return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
+}
+
+#endif // OPENSSL_PPC64LE
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/crypto.c
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/crypto.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/crypto.c
@@ -25,10 +25,12 @@ static_assert(sizeof(ossl_ssize_t) == si
"ossl_ssize_t should be the same size as size_t");
#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \
- (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
- defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
-// x86, x86_64, and the ARMs need to record the result of a cpuid/getauxval call
-// for the asm to work correctly, unless compiled without asm code.
+ (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
+ defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
+ defined(OPENSSL_PPC64LE))
+// x86, x86_64, the ARMs and ppc64le need to record the result of a
+// cpuid/getauxval call for the asm to work correctly, unless compiled without
+// asm code.
#define NEED_CPUID
#else
@@ -39,7 +41,8 @@ static_assert(sizeof(ossl_ssize_t) == si
#define BORINGSSL_NO_STATIC_INITIALIZER
#endif
-#endif // !NO_ASM && !STATIC_ARMCAP && (X86 || X86_64 || ARM || AARCH64)
+#endif // !NO_ASM && !STATIC_ARMCAP &&
+ // (X86 || X86_64 || ARM || AARCH64 || PPC64LE)
// Our assembly does not use the GOT to reference symbols, which means
@@ -83,6 +86,10 @@ uint32_t OPENSSL_get_ia32cap(int idx) {
return OPENSSL_ia32cap_P[idx];
}
+#elif defined(OPENSSL_PPC64LE)
+
+HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0;
+
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#include <openssl/arm_arch.h>
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
@@ -0,0 +1,3809 @@
+#! /usr/bin/env perl
+# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for AES instructions as per PowerISA
+# specification version 2.07, first implemented by POWER8 processor.
+# The module is endian-agnostic in sense that it supports both big-
+# and little-endian cases. Data alignment in parallelizable modes is
+# handled with VSX loads and stores, which implies MSR.VSX flag being
+# set. It should also be noted that ISA specification doesn't prohibit
+# alignment exceptions for these instructions on page boundaries.
+# Initially alignment was handled in pure AltiVec/VMX way [when data
+# is aligned programmatically, which in turn guarantees exception-
+# free execution], but it turned to hamper performance when vcipher
+# instructions are interleaved. It's reckoned that eventual
+# misalignment penalties at page boundaries are in average lower
+# than additional overhead in pure AltiVec approach.
+#
+# May 2016
+#
+# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
+# systems were measured.
+#
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+# CBC en-/decrypt CTR XTS
+# POWER8[le] 3.96/0.72 0.74 1.1
+# POWER8[be] 3.75/0.65 0.66 1.0
+# POWER9[le] 4.02/0.86 0.84 1.05
+# POWER9[be] 3.99/0.78 0.79 0.97
+
+$flavour = shift;
+$output = shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T =8;
+ $LRSAVE =2*$SIZE_T;
+ $STU ="stdu";
+ $POP ="ld";
+ $PUSH ="std";
+ $UCMP ="cmpld";
+ $SHL ="sldi";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T =4;
+ $LRSAVE =$SIZE_T;
+ $STU ="stwu";
+ $POP ="lwz";
+ $PUSH ="stw";
+ $UCMP ="cmplw";
+ $SHL ="slwi";
+} else { die "nonsense $flavour"; }
+
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
+*STDOUT=*OUT;
+
+$FRAME=8*$SIZE_T;
+$prefix="aes_hw";
+
+$sp="r1";
+$vrsave="r12";
+
+#########################################################################
+{{{ # Key setup procedures #
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
+
+$code.=<<___;
+.machine "any"
+
+.text
+
+.align 7
+Lrcon:
+.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
+.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
+.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
+.long 0,0,0,0 ?asis
+Lconsts:
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $ptr #vvvvv "distance between . and rcon
+ addi $ptr,$ptr,-0x48
+ mtlr r0
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+
+.globl .${prefix}_set_encrypt_key
+.align 5
+.${prefix}_set_encrypt_key:
+Lset_encrypt_key:
+ mflr r11
+ $PUSH r11,$LRSAVE($sp)
+
+ li $ptr,-1
+ ${UCMP}i $inp,0
+ beq- Lenc_key_abort # if ($inp==0) return -1;
+ ${UCMP}i $out,0
+ beq- Lenc_key_abort # if ($out==0) return -1;
+ li $ptr,-2
+ cmpwi $bits,128
+ blt- Lenc_key_abort
+ cmpwi $bits,256
+ bgt- Lenc_key_abort
+ andi. r0,$bits,0x3f
+ bne- Lenc_key_abort
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ bl Lconsts
+ mtlr r11
+
+ neg r9,$inp
+ lvx $in0,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ lvsr $key,0,r9 # borrow $key
+ li r8,0x20
+ cmpwi $bits,192
+ lvx $in1,0,$inp
+ le?vspltisb $mask,0x0f # borrow $mask
+ lvx $rcon,0,$ptr
+ le?vxor $key,$key,$mask # adjust for byte swap
+ lvx $mask,r8,$ptr
+ addi $ptr,$ptr,0x10
+ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
+ li $cnt,8
+ vxor $zero,$zero,$zero
+ mtctr $cnt
+
+ ?lvsr $outperm,0,$out
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$zero,$outmask,$outperm
+
+ blt Loop128
+ addi $inp,$inp,8
+ beq L192
+ addi $inp,$inp,8
+ b L256
+
+.align 4
+Loop128:
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ bdnz Loop128
+
+ lvx $rcon,0,$ptr # last two round keys
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,0x50
+
+ li $rounds,10
+ b Ldone
+
+.align 4
+L192:
+ lvx $tmp,0,$inp
+ li $cnt,4
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ vspltisb $key,8 # borrow $key
+ mtctr $cnt
+ vsububm $mask,$mask,$key # adjust the mask
+
+Loop192:
+ vperm $key,$in1,$in1,$mask # roate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vcipherlast $key,$key,$rcon
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+
+ vsldoi $stage,$zero,$in1,8
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vsldoi $stage,$stage,$in0,8
+
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vsldoi $stage,$in0,$in1,8
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdnz Loop192
+
+ li $rounds,12
+ addi $out,$out,0x20
+ b Ldone
+
+.align 4
+L256:
+ lvx $tmp,0,$inp
+ li $cnt,7
+ li $rounds,14
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ mtctr $cnt
+
+Loop256:
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in1,$in1,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdz Ldone
+
+ vspltw $key,$in0,3 # just splat
+ vsldoi $tmp,$zero,$in1,12 # >>32
+ vsbox $key,$key
+
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+
+ vxor $in1,$in1,$key
+ b Loop256
+
+.align 4
+Ldone:
+ lvx $in1,0,$inp # redundant in aligned case
+ vsel $in1,$outhead,$in1,$outmask
+ stvx $in1,0,$inp
+ li $ptr,0
+ mtspr 256,$vrsave
+ stw $rounds,0($out)
+
+Lenc_key_abort:
+ mr r3,$ptr
+ blr
+ .long 0
+ .byte 0,12,0x14,1,0,0,3,0
+ .long 0
+.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
+
+.globl .${prefix}_set_decrypt_key
+.align 5
+.${prefix}_set_decrypt_key:
+ $STU $sp,-$FRAME($sp)
+ mflr r10
+ $PUSH r10,`$FRAME+$LRSAVE`($sp)
+ bl Lset_encrypt_key
+ mtlr r10
+
+ cmpwi r3,0
+ bne- Ldec_key_abort
+
+ slwi $cnt,$rounds,4
+ subi $inp,$out,240 # first round key
+ srwi $rounds,$rounds,1
+ add $out,$inp,$cnt # last round key
+ mtctr $rounds
+
+Ldeckey:
+ lwz r0, 0($inp)
+ lwz r6, 4($inp)
+ lwz r7, 8($inp)
+ lwz r8, 12($inp)
+ addi $inp,$inp,16
+ lwz r9, 0($out)
+ lwz r10,4($out)
+ lwz r11,8($out)
+ lwz r12,12($out)
+ stw r0, 0($out)
+ stw r6, 4($out)
+ stw r7, 8($out)
+ stw r8, 12($out)
+ subi $out,$out,16
+ stw r9, -16($inp)
+ stw r10,-12($inp)
+ stw r11,-8($inp)
+ stw r12,-4($inp)
+ bdnz Ldeckey
+
+ xor r3,r3,r3 # return value
+Ldec_key_abort:
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,4,1,0x80,0,3,0
+ .long 0
+.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
+___
+}}}
+#########################################################################
+{{{ # Single block en- and decrypt procedures #
+sub gen_block () {
+my $dir = shift;
+my $n = $dir eq "de" ? "n" : "";
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
+
+$code.=<<___;
+.globl .${prefix}_${dir}crypt
+.align 5
+.${prefix}_${dir}crypt:
+ lwz $rounds,240($key)
+ lis r0,0xfc00
+ mfspr $vrsave,256
+ li $idx,15 # 15 is not typo
+ mtspr 256,r0
+
+ lvx v0,0,$inp
+ neg r11,$out
+ lvx v1,$idx,$inp
+ lvsl v2,0,$inp # inpperm
+ le?vspltisb v4,0x0f
+ ?lvsl v3,0,r11 # outperm
+ le?vxor v2,v2,v4
+ li $idx,16
+ vperm v0,v0,v1,v2 # align [and byte swap in LE]
+ lvx v1,0,$key
+ ?lvsl v5,0,$key # keyperm
+ srwi $rounds,$rounds,1
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ subi $rounds,$rounds,1
+ ?vperm v1,v1,v2,v5 # align round key
+
+ vxor v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Loop_${dir}c:
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ ?vperm v1,v1,v2,v5
+ v${n}cipher v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_${dir}c
+
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ ?vperm v1,v1,v2,v5
+ v${n}cipherlast v0,v0,v1
+
+ vspltisb v2,-1
+ vxor v1,v1,v1
+ li $idx,15 # 15 is not typo
+ ?vperm v2,v1,v2,v3 # outmask
+ le?vxor v3,v3,v4
+ lvx v1,0,$out # outhead
+ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
+ vsel v1,v1,v0,v2
+ lvx v4,$idx,$out
+ stvx v1,0,$out
+ vsel v0,v0,v4,v2
+ stvx v0,$idx,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
+.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+#########################################################################
+{{{ # CBC en- and decrypt procedures #
+my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
+ map("v$_",(4..10));
+$code.=<<___;
+.globl .${prefix}_cbc_encrypt
+.align 5
+.${prefix}_cbc_encrypt:
+ ${UCMP}i $len,16
+ bltlr-
+
+ cmpwi $enc,0 # test direction
+ lis r0,0xffe0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ li $idx,15
+ vxor $rndkey0,$rndkey0,$rndkey0
+ le?vspltisb $tmp,0x0f
+
+ lvx $ivec,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $ivec,$ivec,$inptail,$inpperm
+
+ neg r11,$inp
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
+ lwz $rounds,240($key)
+
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inptail,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ?lvsr $outperm,0,$out # prepare for unaligned store
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+
+ srwi $rounds,$rounds,1
+ li $idx,16
+ subi $rounds,$rounds,1
+ beq Lcbc_dec
+
+Lcbc_enc:
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ mtctr $rounds
+ subi $len,$len,16 # len-=16
+
+ lvx $rndkey0,0,$key
+ vperm $inout,$inout,$inptail,$inpperm
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ vxor $inout,$inout,$ivec
+
+Loop_cbc_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_cbc_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $ivec,$inout,$rndkey0
+ ${UCMP}i $len,16
+
+ vperm $tmp,$ivec,$ivec,$outperm
+ vsel $inout,$outhead,$tmp,$outmask
+ vmr $outhead,$tmp
+ stvx $inout,0,$out
+ addi $out,$out,16
+ bge Lcbc_enc
+
+ b Lcbc_done
+
+.align 4
+Lcbc_dec:
+ ${UCMP}i $len,128
+ bge _aesp8_cbc_decrypt8x
+ vmr $tmp,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ mtctr $rounds
+ subi $len,$len,16 # len-=16
+
+ lvx $rndkey0,0,$key
+ vperm $tmp,$tmp,$inptail,$inpperm
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$tmp,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+
+Loop_cbc_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_cbc_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipherlast $inout,$inout,$rndkey0
+ ${UCMP}i $len,16
+
+ vxor $inout,$inout,$ivec
+ vmr $ivec,$tmp
+ vperm $tmp,$inout,$inout,$outperm
+ vsel $inout,$outhead,$tmp,$outmask
+ vmr $outhead,$tmp
+ stvx $inout,0,$out
+ addi $out,$out,16
+ bge Lcbc_dec
+
+Lcbc_done:
+ addi $out,$out,-1
+ lvx $inout,0,$out # redundant in aligned case
+ vsel $inout,$outhead,$inout,$outmask
+ stvx $inout,0,$out
+
+ neg $enc,$ivp # write [unaligned] iv
+ li $idx,15 # 15 is not typo
+ vxor $rndkey0,$rndkey0,$rndkey0
+ vspltisb $outmask,-1
+ le?vspltisb $tmp,0x0f
+ ?lvsl $outperm,0,$enc
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+ lvx $outhead,0,$ivp
+ vperm $ivec,$ivec,$ivec,$outperm
+ vsel $inout,$outhead,$ivec,$outmask
+ lvx $inptail,$idx,$ivp
+ stvx $inout,0,$ivp
+ vsel $inout,$ivec,$inptail,$outmask
+ stvx $inout,$idx,$ivp
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,6,0
+ .long 0
+___
+#########################################################################
+{{ # Optimized CBC decrypt procedure #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+ $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
+
+$code.=<<___;
+.align 5
+_aesp8_cbc_decrypt8x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ li r10,`$FRAME+8*16+15`
+ li r11,`$FRAME+8*16+31`
+ stvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+ subi $len,$len,128 # bias
+
+ lvx $rndkey0,$x00,$key # load key schedule
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ lvx v31,$x00,$key
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,`$FRAME+15`
+ mtctr $rounds
+
+Load_cbc_dec_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_cbc_dec_key
+
+ lvx v26,$x10,$key
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key
+ ?vperm v29,v29,v30,$keyperm
+ lvx $out0,$x70,$key # borrow $out0
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$out0,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ #lvx $inptail,0,$inp # "caller" already did this
+ #addi $inp,$inp,15 # 15 is not typo
+ subi $inp,$inp,15 # undo "caller"
+
+ le?li $idx,8
+ lvx_u $in0,$x00,$inp # load first 8 "words"
+ le?lvsl $inpperm,0,$idx
+ le?vspltisb $tmp,0x0f
+ lvx_u $in1,$x10,$inp
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
+ lvx_u $in2,$x20,$inp
+ le?vperm $in0,$in0,$in0,$inpperm
+ lvx_u $in3,$x30,$inp
+ le?vperm $in1,$in1,$in1,$inpperm
+ lvx_u $in4,$x40,$inp
+ le?vperm $in2,$in2,$in2,$inpperm
+ vxor $out0,$in0,$rndkey0
+ lvx_u $in5,$x50,$inp
+ le?vperm $in3,$in3,$in3,$inpperm
+ vxor $out1,$in1,$rndkey0
+ lvx_u $in6,$x60,$inp
+ le?vperm $in4,$in4,$in4,$inpperm
+ vxor $out2,$in2,$rndkey0
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+ le?vperm $in5,$in5,$in5,$inpperm
+ vxor $out3,$in3,$rndkey0
+ le?vperm $in6,$in6,$in6,$inpperm
+ vxor $out4,$in4,$rndkey0
+ le?vperm $in7,$in7,$in7,$inpperm
+ vxor $out5,$in5,$rndkey0
+ vxor $out6,$in6,$rndkey0
+ vxor $out7,$in7,$rndkey0
+
+ mtctr $rounds
+ b Loop_cbc_dec8x
+.align 5
+Loop_cbc_dec8x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_cbc_dec8x
+
+ subic $len,$len,128 # $len-=128
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+
+ and r0,r0,$len
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+ vncipher $out6,$out6,v26
+ vncipher $out7,$out7,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in7 are loaded
+ # with last "words"
+ vncipher $out0,$out0,v27
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+ vncipher $out6,$out6,v27
+ vncipher $out7,$out7,v27
+
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ vncipher $out6,$out6,v28
+ vncipher $out7,$out7,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ vncipher $out6,$out6,v29
+ vncipher $out7,$out7,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+
+ vncipher $out0,$out0,v30
+ vxor $ivec,$ivec,v31 # xor with last round key
+ vncipher $out1,$out1,v30
+ vxor $in0,$in0,v31
+ vncipher $out2,$out2,v30
+ vxor $in1,$in1,v31
+ vncipher $out3,$out3,v30
+ vxor $in2,$in2,v31
+ vncipher $out4,$out4,v30
+ vxor $in3,$in3,v31
+ vncipher $out5,$out5,v30
+ vxor $in4,$in4,v31
+ vncipher $out6,$out6,v30
+ vxor $in5,$in5,v31
+ vncipher $out7,$out7,v30
+ vxor $in6,$in6,v31
+
+ vncipherlast $out0,$out0,$ivec
+ vncipherlast $out1,$out1,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vncipherlast $out2,$out2,$in1
+ lvx_u $in1,$x10,$inp
+ vncipherlast $out3,$out3,$in2
+ le?vperm $in0,$in0,$in0,$inpperm
+ lvx_u $in2,$x20,$inp
+ vncipherlast $out4,$out4,$in3
+ le?vperm $in1,$in1,$in1,$inpperm
+ lvx_u $in3,$x30,$inp
+ vncipherlast $out5,$out5,$in4
+ le?vperm $in2,$in2,$in2,$inpperm
+ lvx_u $in4,$x40,$inp
+ vncipherlast $out6,$out6,$in5
+ le?vperm $in3,$in3,$in3,$inpperm
+ lvx_u $in5,$x50,$inp
+ vncipherlast $out7,$out7,$in6
+ le?vperm $in4,$in4,$in4,$inpperm
+ lvx_u $in6,$x60,$inp
+ vmr $ivec,$in7
+ le?vperm $in5,$in5,$in5,$inpperm
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $in6,$in6,$in6,$inpperm
+ vxor $out0,$in0,$rndkey0
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $in7,$in7,$in7,$inpperm
+ vxor $out1,$in1,$rndkey0
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$rndkey0
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$rndkey0
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$rndkey0
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ vxor $out5,$in5,$rndkey0
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x60,$out
+ vxor $out6,$in6,$rndkey0
+ stvx_u $out7,$x70,$out
+ addi $out,$out,0x80
+ vxor $out7,$in7,$rndkey0
+
+ mtctr $rounds
+ beq Loop_cbc_dec8x # did $len-=128 borrow?
+
+ addic. $len,$len,128
+ beq Lcbc_dec8x_done
+ nop
+ nop
+
+Loop_cbc_dec8x_tail: # up to 7 "words" tail...
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_cbc_dec8x_tail
+
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+ vncipher $out6,$out6,v26
+ vncipher $out7,$out7,v26
+
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+ vncipher $out6,$out6,v27
+ vncipher $out7,$out7,v27
+
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ vncipher $out6,$out6,v28
+ vncipher $out7,$out7,v28
+
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ vncipher $out6,$out6,v29
+ vncipher $out7,$out7,v29
+
+ vncipher $out1,$out1,v30
+ vxor $ivec,$ivec,v31 # last round key
+ vncipher $out2,$out2,v30
+ vxor $in1,$in1,v31
+ vncipher $out3,$out3,v30
+ vxor $in2,$in2,v31
+ vncipher $out4,$out4,v30
+ vxor $in3,$in3,v31
+ vncipher $out5,$out5,v30
+ vxor $in4,$in4,v31
+ vncipher $out6,$out6,v30
+ vxor $in5,$in5,v31
+ vncipher $out7,$out7,v30
+ vxor $in6,$in6,v31
+
+ cmplwi $len,32 # switch($len)
+ blt Lcbc_dec8x_one
+ nop
+ beq Lcbc_dec8x_two
+ cmplwi $len,64
+ blt Lcbc_dec8x_three
+ nop
+ beq Lcbc_dec8x_four
+ cmplwi $len,96
+ blt Lcbc_dec8x_five
+ nop
+ beq Lcbc_dec8x_six
+
+Lcbc_dec8x_seven:
+ vncipherlast $out1,$out1,$ivec
+ vncipherlast $out2,$out2,$in1
+ vncipherlast $out3,$out3,$in2
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out1,$out1,$out1,$inpperm
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x00,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x10,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x20,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x30,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x40,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x50,$out
+ stvx_u $out7,$x60,$out
+ addi $out,$out,0x70
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_six:
+ vncipherlast $out2,$out2,$ivec
+ vncipherlast $out3,$out3,$in2
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out2,$out2,$out2,$inpperm
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x00,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x10,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x20,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x30,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x40,$out
+ stvx_u $out7,$x50,$out
+ addi $out,$out,0x60
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_five:
+ vncipherlast $out3,$out3,$ivec
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out3,$out3,$out3,$inpperm
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x00,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x10,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x20,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x30,$out
+ stvx_u $out7,$x40,$out
+ addi $out,$out,0x50
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_four:
+ vncipherlast $out4,$out4,$ivec
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out4,$out4,$out4,$inpperm
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x00,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x10,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x20,$out
+ stvx_u $out7,$x30,$out
+ addi $out,$out,0x40
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_three:
+ vncipherlast $out5,$out5,$ivec
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out5,$out5,$out5,$inpperm
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x00,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x10,$out
+ stvx_u $out7,$x20,$out
+ addi $out,$out,0x30
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_two:
+ vncipherlast $out6,$out6,$ivec
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out6,$out6,$out6,$inpperm
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x00,$out
+ stvx_u $out7,$x10,$out
+ addi $out,$out,0x20
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_one:
+ vncipherlast $out7,$out7,$ivec
+ vmr $ivec,$in7
+
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out7,0,$out
+ addi $out,$out,0x10
+
+Lcbc_dec8x_done:
+ le?vperm $ivec,$ivec,$ivec,$inpperm
+ stvx_u $ivec,0,$ivp # write [unaligned] iv
+
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $inpperm,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
+___
+}} }}}
+
+#########################################################################
+{{{ # CTR procedure[s] #
+my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
+ map("v$_",(4..11));
+my $dat=$tmp;
+
+$code.=<<___;
+.globl .${prefix}_ctr32_encrypt_blocks
+.align 5
+.${prefix}_ctr32_encrypt_blocks:
+ ${UCMP}i $len,1
+ bltlr-
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ li $idx,15
+ vxor $rndkey0,$rndkey0,$rndkey0
+ le?vspltisb $tmp,0x0f
+
+ lvx $ivec,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ vspltisb $one,1
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $ivec,$ivec,$inptail,$inpperm
+ vsldoi $one,$rndkey0,$one,1
+
+ neg r11,$inp
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
+ lwz $rounds,240($key)
+
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inptail,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ srwi $rounds,$rounds,1
+ li $idx,16
+ subi $rounds,$rounds,1
+
+ ${UCMP}i $len,8
+ bge _aesp8_ctr32_encrypt8x
+
+ ?lvsr $outperm,0,$out # prepare for unaligned store
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+
+ lvx $rndkey0,0,$key
+ mtctr $rounds
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$ivec,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ b Loop_ctr32_enc
+
+.align 5
+Loop_ctr32_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_ctr32_enc
+
+ vadduwm $ivec,$ivec,$one
+ vmr $dat,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ subic. $len,$len,1 # blocks--
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ vperm $dat,$dat,$inptail,$inpperm
+ li $idx,16
+ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
+ lvx $rndkey0,0,$key
+ vxor $dat,$dat,$rndkey1 # last round key
+ vcipherlast $inout,$inout,$dat
+
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inout,$outperm
+ vsel $dat,$outhead,$inout,$outmask
+ mtctr $rounds
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vmr $outhead,$inout
+ vxor $inout,$ivec,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ stvx $dat,0,$out
+ addi $out,$out,16
+ bne Loop_ctr32_enc
+
+ addi $out,$out,-1
+ lvx $inout,0,$out # redundant in aligned case
+ vsel $inout,$outhead,$inout,$outmask
+ stvx $inout,0,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,6,0
+ .long 0
+___
+#########################################################################
+{{ # Optimized CTR procedure #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+ $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
+my ($two,$three,$four)=($outhead,$outperm,$outmask);
+
+$code.=<<___;
+.align 5
+_aesp8_ctr32_encrypt8x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ li r10,`$FRAME+8*16+15`
+ li r11,`$FRAME+8*16+31`
+ stvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key # load key schedule
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ lvx v31,$x00,$key
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,`$FRAME+15`
+ mtctr $rounds
+
+Load_ctr32_enc_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_ctr32_enc_key
+
+ lvx v26,$x10,$key
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key
+ ?vperm v29,v29,v30,$keyperm
+ lvx $out0,$x70,$key # borrow $out0
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$out0,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vadduwm $two,$one,$one
+ subi $inp,$inp,15 # undo "caller"
+ $SHL $len,$len,4
+
+ vadduwm $out1,$ivec,$one # counter values ...
+ vadduwm $out2,$ivec,$two
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
+ le?li $idx,8
+ vadduwm $out3,$out1,$two
+ vxor $out1,$out1,$rndkey0
+ le?lvsl $inpperm,0,$idx
+ vadduwm $out4,$out2,$two
+ vxor $out2,$out2,$rndkey0
+ le?vspltisb $tmp,0x0f
+ vadduwm $out5,$out3,$two
+ vxor $out3,$out3,$rndkey0
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
+ vadduwm $out6,$out4,$two
+ vxor $out4,$out4,$rndkey0
+ vadduwm $out7,$out5,$two
+ vxor $out5,$out5,$rndkey0
+ vadduwm $ivec,$out6,$two # next counter value
+ vxor $out6,$out6,$rndkey0
+ vxor $out7,$out7,$rndkey0
+
+ mtctr $rounds
+ b Loop_ctr32_enc8x
+.align 5
+Loop_ctr32_enc8x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ vcipher $out6,$out6,v24
+ vcipher $out7,$out7,v24
+Loop_ctr32_enc8x_middle:
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ vcipher $out6,$out6,v25
+ vcipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_ctr32_enc8x
+
+ subic r11,$len,256 # $len-256, borrow $key_
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ vcipher $out6,$out6,v24
+ vcipher $out7,$out7,v24
+
+ subfe r0,r0,r0 # borrow?-1:0
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ vcipher $out6,$out6,v25
+ vcipher $out7,$out7,v25
+
+ and r0,r0,r11
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vcipher $out0,$out0,v26
+ vcipher $out1,$out1,v26
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vcipher $out4,$out4,v26
+ vcipher $out5,$out5,v26
+ vcipher $out6,$out6,v26
+ vcipher $out7,$out7,v26
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ subic $len,$len,129 # $len-=129
+ vcipher $out0,$out0,v27
+ addi $len,$len,1 # $len-=128 really
+ vcipher $out1,$out1,v27
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vcipher $out4,$out4,v27
+ vcipher $out5,$out5,v27
+ vcipher $out6,$out6,v27
+ vcipher $out7,$out7,v27
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+
+ vcipher $out0,$out0,v28
+ lvx_u $in0,$x00,$inp # load input
+ vcipher $out1,$out1,v28
+ lvx_u $in1,$x10,$inp
+ vcipher $out2,$out2,v28
+ lvx_u $in2,$x20,$inp
+ vcipher $out3,$out3,v28
+ lvx_u $in3,$x30,$inp
+ vcipher $out4,$out4,v28
+ lvx_u $in4,$x40,$inp
+ vcipher $out5,$out5,v28
+ lvx_u $in5,$x50,$inp
+ vcipher $out6,$out6,v28
+ lvx_u $in6,$x60,$inp
+ vcipher $out7,$out7,v28
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+
+ vcipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$inpperm
+ vcipher $out1,$out1,v29
+ le?vperm $in1,$in1,$in1,$inpperm
+ vcipher $out2,$out2,v29
+ le?vperm $in2,$in2,$in2,$inpperm
+ vcipher $out3,$out3,v29
+ le?vperm $in3,$in3,$in3,$inpperm
+ vcipher $out4,$out4,v29
+ le?vperm $in4,$in4,$in4,$inpperm
+ vcipher $out5,$out5,v29
+ le?vperm $in5,$in5,$in5,$inpperm
+ vcipher $out6,$out6,v29
+ le?vperm $in6,$in6,$in6,$inpperm
+ vcipher $out7,$out7,v29
+ le?vperm $in7,$in7,$in7,$inpperm
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in7 are loaded
+ # with last "words"
+ subfe. r0,r0,r0 # borrow?-1:0
+ vcipher $out0,$out0,v30
+ vxor $in0,$in0,v31 # xor with last round key
+ vcipher $out1,$out1,v30
+ vxor $in1,$in1,v31
+ vcipher $out2,$out2,v30
+ vxor $in2,$in2,v31
+ vcipher $out3,$out3,v30
+ vxor $in3,$in3,v31
+ vcipher $out4,$out4,v30
+ vxor $in4,$in4,v31
+ vcipher $out5,$out5,v30
+ vxor $in5,$in5,v31
+ vcipher $out6,$out6,v30
+ vxor $in6,$in6,v31
+ vcipher $out7,$out7,v30
+ vxor $in7,$in7,v31
+
+ bne Lctr32_enc8x_break # did $len-129 borrow?
+
+ vcipherlast $in0,$out0,$in0
+ vcipherlast $in1,$out1,$in1
+ vadduwm $out1,$ivec,$one # counter values ...
+ vcipherlast $in2,$out2,$in2
+ vadduwm $out2,$ivec,$two
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
+ vcipherlast $in3,$out3,$in3
+ vadduwm $out3,$out1,$two
+ vxor $out1,$out1,$rndkey0
+ vcipherlast $in4,$out4,$in4
+ vadduwm $out4,$out2,$two
+ vxor $out2,$out2,$rndkey0
+ vcipherlast $in5,$out5,$in5
+ vadduwm $out5,$out3,$two
+ vxor $out3,$out3,$rndkey0
+ vcipherlast $in6,$out6,$in6
+ vadduwm $out6,$out4,$two
+ vxor $out4,$out4,$rndkey0
+ vcipherlast $in7,$out7,$in7
+ vadduwm $out7,$out5,$two
+ vxor $out5,$out5,$rndkey0
+ le?vperm $in0,$in0,$in0,$inpperm
+ vadduwm $ivec,$out6,$two # next counter value
+ vxor $out6,$out6,$rndkey0
+ le?vperm $in1,$in1,$in1,$inpperm
+ vxor $out7,$out7,$rndkey0
+ mtctr $rounds
+
+ vcipher $out0,$out0,v24
+ stvx_u $in0,$x00,$out
+ le?vperm $in2,$in2,$in2,$inpperm
+ vcipher $out1,$out1,v24
+ stvx_u $in1,$x10,$out
+ le?vperm $in3,$in3,$in3,$inpperm
+ vcipher $out2,$out2,v24
+ stvx_u $in2,$x20,$out
+ le?vperm $in4,$in4,$in4,$inpperm
+ vcipher $out3,$out3,v24
+ stvx_u $in3,$x30,$out
+ le?vperm $in5,$in5,$in5,$inpperm
+ vcipher $out4,$out4,v24
+ stvx_u $in4,$x40,$out
+ le?vperm $in6,$in6,$in6,$inpperm
+ vcipher $out5,$out5,v24
+ stvx_u $in5,$x50,$out
+ le?vperm $in7,$in7,$in7,$inpperm
+ vcipher $out6,$out6,v24
+ stvx_u $in6,$x60,$out
+ vcipher $out7,$out7,v24
+ stvx_u $in7,$x70,$out
+ addi $out,$out,0x80
+
+ b Loop_ctr32_enc8x_middle
+
+.align 5
+Lctr32_enc8x_break:
+ cmpwi $len,-0x60
+ blt Lctr32_enc8x_one
+ nop
+ beq Lctr32_enc8x_two
+ cmpwi $len,-0x40
+ blt Lctr32_enc8x_three
+ nop
+ beq Lctr32_enc8x_four
+ cmpwi $len,-0x20
+ blt Lctr32_enc8x_five
+ nop
+ beq Lctr32_enc8x_six
+ cmpwi $len,0x00
+ blt Lctr32_enc8x_seven
+
+Lctr32_enc8x_eight:
+ vcipherlast $out0,$out0,$in0
+ vcipherlast $out1,$out1,$in1
+ vcipherlast $out2,$out2,$in2
+ vcipherlast $out3,$out3,$in3
+ vcipherlast $out4,$out4,$in4
+ vcipherlast $out5,$out5,$in5
+ vcipherlast $out6,$out6,$in6
+ vcipherlast $out7,$out7,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x60,$out
+ stvx_u $out7,$x70,$out
+ addi $out,$out,0x80
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_seven:
+ vcipherlast $out0,$out0,$in1
+ vcipherlast $out1,$out1,$in2
+ vcipherlast $out2,$out2,$in3
+ vcipherlast $out3,$out3,$in4
+ vcipherlast $out4,$out4,$in5
+ vcipherlast $out5,$out5,$in6
+ vcipherlast $out6,$out6,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ stvx_u $out6,$x60,$out
+ addi $out,$out,0x70
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_six:
+ vcipherlast $out0,$out0,$in2
+ vcipherlast $out1,$out1,$in3
+ vcipherlast $out2,$out2,$in4
+ vcipherlast $out3,$out3,$in5
+ vcipherlast $out4,$out4,$in6
+ vcipherlast $out5,$out5,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ stvx_u $out5,$x50,$out
+ addi $out,$out,0x60
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_five:
+ vcipherlast $out0,$out0,$in3
+ vcipherlast $out1,$out1,$in4
+ vcipherlast $out2,$out2,$in5
+ vcipherlast $out3,$out3,$in6
+ vcipherlast $out4,$out4,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_four:
+ vcipherlast $out0,$out0,$in4
+ vcipherlast $out1,$out1,$in5
+ vcipherlast $out2,$out2,$in6
+ vcipherlast $out3,$out3,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_three:
+ vcipherlast $out0,$out0,$in5
+ vcipherlast $out1,$out1,$in6
+ vcipherlast $out2,$out2,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_two:
+ vcipherlast $out0,$out0,$in6
+ vcipherlast $out1,$out1,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_one:
+ vcipherlast $out0,$out0,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ stvx_u $out0,0,$out
+ addi $out,$out,0x10
+
+Lctr32_enc8x_done:
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $inpperm,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
+___
+}} }}}
+
+#########################################################################
+{{{ # XTS procedures #
+# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
+# const AES_KEY *key1, const AES_KEY *key2, #
+# [const] unsigned char iv[16]); #
+# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
+# input tweak value is assumed to be encrypted already, and last tweak #
+# value, one suitable for consecutive call on same chunk of data, is #
+# written back to original buffer. In addition, in "tweak chaining" #
+# mode only complete input blocks are processed. #
+
+my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
+my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
+my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
+my $taillen = $key2;
+
+ ($inp,$idx) = ($idx,$inp); # reassign
+
+$code.=<<___;
+.globl .${prefix}_xts_encrypt
+.align 5
+.${prefix}_xts_encrypt:
+ mr $inp,r3 # reassign
+ li r3,-1
+ ${UCMP}i $len,16
+ bltlr-
+
+ lis r0,0xfff0
+ mfspr r12,256 # save vrsave
+ li r11,0
+ mtspr 256,r0
+
+ vspltisb $seven,0x07 # 0x070707..07
+ le?lvsl $leperm,r11,r11
+ le?vspltisb $tmp,0x0f
+ le?vxor $leperm,$leperm,$seven
+
+ li $idx,15
+ lvx $tweak,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $tweak,$tweak,$inptail,$inpperm
+
+ neg r11,$inp
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inout,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ${UCMP}i $key2,0 # key2==NULL?
+ beq Lxts_enc_no_key2
+
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
+ lwz $rounds,240($key2)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ lvx $rndkey0,0,$key2
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Ltweak_xts_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ bdnz Ltweak_xts_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $tweak,$tweak,$rndkey0
+
+ li $ivp,0 # don't chain the tweak
+ b Lxts_enc
+
+Lxts_enc_no_key2:
+ li $idx,-16
+ and $len,$len,$idx # in "tweak chaining"
+ # mode only complete
+ # blocks are processed
+Lxts_enc:
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+
+ ?lvsl $keyperm,0,$key1 # prepare for unaligned key
+ lwz $rounds,240($key1)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ vslb $eighty7,$seven,$seven # 0x808080..80
+ vor $eighty7,$eighty7,$seven # 0x878787..87
+ vspltisb $tmp,1 # 0x010101..01
+ vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
+
+ ${UCMP}i $len,96
+ bge _aesp8_xts_encrypt6x
+
+ andi. $taillen,$len,15
+ subic r0,$len,32
+ subi $taillen,$taillen,16
+ subfe r0,r0,r0
+ and r0,r0,$taillen
+ add $inp,$inp,r0
+
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ mtctr $rounds
+ b Loop_xts_enc
+
+.align 5
+Loop_xts_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak
+ vcipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+ addi $out,$out,16
+
+ subic. $len,$len,16
+ beq Lxts_enc_done
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+
+ subic r0,$len,32
+ subfe r0,r0,r0
+ and r0,r0,$taillen
+ add $inp,$inp,r0
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $output,$output,$rndkey0 # just in case $len<16
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ mtctr $rounds
+ ${UCMP}i $len,16
+ bge Loop_xts_enc
+
+ vxor $output,$output,$tweak
+ lvsr $inpperm,0,$len # $inpperm is no longer needed
+ vxor $inptail,$inptail,$inptail # $inptail is no longer needed
+ vspltisb $tmp,-1
+ vperm $inptail,$inptail,$tmp,$inpperm
+ vsel $inout,$inout,$output,$inptail
+
+ subi r11,$out,17
+ subi $out,$out,16
+ mtctr $len
+ li $len,16
+Loop_xts_enc_steal:
+ lbzu r0,1(r11)
+ stb r0,16(r11)
+ bdnz Loop_xts_enc_steal
+
+ mtctr $rounds
+ b Loop_xts_enc # one more time...
+
+Lxts_enc_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_enc_ret
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_enc_ret:
+ mtspr 256,r12 # restore vrsave
+ li r3,0
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
+
+.globl .${prefix}_xts_decrypt
+.align 5
+.${prefix}_xts_decrypt:
+ mr $inp,r3 # reassign
+ li r3,-1
+ ${UCMP}i $len,16
+ bltlr-
+
+ lis r0,0xfff8
+ mfspr r12,256 # save vrsave
+ li r11,0
+ mtspr 256,r0
+
+ andi. r0,$len,15
+ neg r0,r0
+ andi. r0,r0,16
+ sub $len,$len,r0
+
+ vspltisb $seven,0x07 # 0x070707..07
+ le?lvsl $leperm,r11,r11
+ le?vspltisb $tmp,0x0f
+ le?vxor $leperm,$leperm,$seven
+
+ li $idx,15
+ lvx $tweak,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $tweak,$tweak,$inptail,$inpperm
+
+ neg r11,$inp
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inout,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ${UCMP}i $key2,0 # key2==NULL?
+ beq Lxts_dec_no_key2
+
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
+ lwz $rounds,240($key2)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ lvx $rndkey0,0,$key2
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Ltweak_xts_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ bdnz Ltweak_xts_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $tweak,$tweak,$rndkey0
+
+ li $ivp,0 # don't chain the tweak
+ b Lxts_dec
+
+Lxts_dec_no_key2:
+ neg $idx,$len
+ andi. $idx,$idx,15
+ add $len,$len,$idx # in "tweak chaining"
+ # mode only complete
+ # blocks are processed
+Lxts_dec:
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+
+ ?lvsl $keyperm,0,$key1 # prepare for unaligned key
+ lwz $rounds,240($key1)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ vslb $eighty7,$seven,$seven # 0x808080..80
+ vor $eighty7,$eighty7,$seven # 0x878787..87
+ vspltisb $tmp,1 # 0x010101..01
+ vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
+
+ ${UCMP}i $len,96
+ bge _aesp8_xts_decrypt6x
+
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ mtctr $rounds
+
+ ${UCMP}i $len,16
+ blt Ltail_xts_dec
+ be?b Loop_xts_dec
+
+.align 5
+Loop_xts_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak
+ vncipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+ addi $out,$out,16
+
+ subic. $len,$len,16
+ beq Lxts_dec_done
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ mtctr $rounds
+ ${UCMP}i $len,16
+ bge Loop_xts_dec
+
+Ltail_xts_dec:
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak1,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak1,$tweak1,$tmp
+
+ subi $inp,$inp,16
+ add $inp,$inp,$len
+
+ vxor $inout,$inout,$tweak # :-(
+ vxor $inout,$inout,$tweak1 # :-)
+
+Loop_xts_dec_short:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_dec_short
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak1
+ vncipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ #addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+
+ lvsr $inpperm,0,$len # $inpperm is no longer needed
+ vxor $inptail,$inptail,$inptail # $inptail is no longer needed
+ vspltisb $tmp,-1
+ vperm $inptail,$inptail,$tmp,$inpperm
+ vsel $inout,$inout,$output,$inptail
+
+ vxor $rndkey0,$rndkey0,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ subi r11,$out,1
+ mtctr $len
+ li $len,16
+Loop_xts_dec_steal:
+ lbzu r0,1(r11)
+ stb r0,16(r11)
+ bdnz Loop_xts_dec_steal
+
+ mtctr $rounds
+ b Loop_xts_dec # one more time...
+
+Lxts_dec_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_dec_ret
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_dec_ret:
+ mtspr 256,r12 # restore vrsave
+ li r3,0
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
+___
+#########################################################################
+{{ # Optimized XTS procedures #
+my $key_=$key2;
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
+ $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
+my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
+my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($keyperm)=($out0); # aliases with "caller", redundant assignment
+my $taillen=$x70;
+
+$code.=<<___;
+.align 5
+_aesp8_xts_encrypt6x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ mflr r11
+ li r7,`$FRAME+8*16+15`
+ li r3,`$FRAME+8*16+31`
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+ stvx v20,r7,$sp # ABI says so
+ addi r7,r7,32
+ stvx v21,r3,$sp
+ addi r3,r3,32
+ stvx v22,r7,$sp
+ addi r7,r7,32
+ stvx v23,r3,$sp
+ addi r3,r3,32
+ stvx v24,r7,$sp
+ addi r7,r7,32
+ stvx v25,r3,$sp
+ addi r3,r3,32
+ stvx v26,r7,$sp
+ addi r7,r7,32
+ stvx v27,r3,$sp
+ addi r3,r3,32
+ stvx v28,r7,$sp
+ addi r7,r7,32
+ stvx v29,r3,$sp
+ addi r3,r3,32
+ stvx v30,r7,$sp
+ stvx v31,r3,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key1 # load key schedule
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ lvx v31,$x00,$key1
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,`$FRAME+15`
+ mtctr $rounds
+
+Load_xts_enc_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key1
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_xts_enc_key
+
+ lvx v26,$x10,$key1
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key1
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key1
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key1
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key1
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key1
+ ?vperm v29,v29,v30,$keyperm
+ lvx $twk5,$x70,$key1 # borrow $twk5
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$twk5,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vperm $in0,$inout,$inptail,$inpperm
+ subi $inp,$inp,31 # undo "caller"
+ vxor $twk0,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $out0,$in0,$twk0
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in1,$x10,$inp
+ vxor $twk1,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in1,$in1,$in1,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out1,$in1,$twk1
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in2,$x20,$inp
+ andi. $taillen,$len,15
+ vxor $twk2,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in2,$in2,$in2,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out2,$in2,$twk2
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in3,$x30,$inp
+ sub $len,$len,$taillen
+ vxor $twk3,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in3,$in3,$in3,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out3,$in3,$twk3
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in4,$x40,$inp
+ subi $len,$len,0x60
+ vxor $twk4,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in4,$in4,$in4,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out4,$in4,$twk4
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ vxor $twk5,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in5,$in5,$in5,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out5,$in5,$twk5
+ vxor $tweak,$tweak,$tmp
+
+ vxor v31,v31,$rndkey0
+ mtctr $rounds
+ b Loop_xts_enc6x
+
+.align 5
+Loop_xts_enc6x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_enc6x
+
+ subic $len,$len,96 # $len-=96
+ vxor $in0,$twk0,v31 # xor with last round key
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk0,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vand $tmp,$tmp,$eighty7
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vxor $tweak,$tweak,$tmp
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vxor $in1,$twk1,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk1,$tweak,$rndkey0
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+
+ and r0,r0,$len
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipher $out0,$out0,v26
+ vcipher $out1,$out1,v26
+ vand $tmp,$tmp,$eighty7
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vxor $tweak,$tweak,$tmp
+ vcipher $out4,$out4,v26
+ vcipher $out5,$out5,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in5 are loaded
+ # with last "words"
+ vxor $in2,$twk2,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk2,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vcipher $out0,$out0,v27
+ vcipher $out1,$out1,v27
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vand $tmp,$tmp,$eighty7
+ vcipher $out4,$out4,v27
+ vcipher $out5,$out5,v27
+
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vxor $tweak,$tweak,$tmp
+ vcipher $out0,$out0,v28
+ vcipher $out1,$out1,v28
+ vxor $in3,$twk3,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk3,$tweak,$rndkey0
+ vcipher $out2,$out2,v28
+ vcipher $out3,$out3,v28
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipher $out4,$out4,v28
+ vcipher $out5,$out5,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vand $tmp,$tmp,$eighty7
+
+ vcipher $out0,$out0,v29
+ vcipher $out1,$out1,v29
+ vxor $tweak,$tweak,$tmp
+ vcipher $out2,$out2,v29
+ vcipher $out3,$out3,v29
+ vxor $in4,$twk4,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk4,$tweak,$rndkey0
+ vcipher $out4,$out4,v29
+ vcipher $out5,$out5,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+
+ vcipher $out0,$out0,v30
+ vcipher $out1,$out1,v30
+ vand $tmp,$tmp,$eighty7
+ vcipher $out2,$out2,v30
+ vcipher $out3,$out3,v30
+ vxor $tweak,$tweak,$tmp
+ vcipher $out4,$out4,v30
+ vcipher $out5,$out5,v30
+ vxor $in5,$twk5,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk5,$tweak,$rndkey0
+
+ vcipherlast $out0,$out0,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vcipherlast $out1,$out1,$in1
+ lvx_u $in1,$x10,$inp
+ vcipherlast $out2,$out2,$in2
+ le?vperm $in0,$in0,$in0,$leperm
+ lvx_u $in2,$x20,$inp
+ vand $tmp,$tmp,$eighty7
+ vcipherlast $out3,$out3,$in3
+ le?vperm $in1,$in1,$in1,$leperm
+ lvx_u $in3,$x30,$inp
+ vcipherlast $out4,$out4,$in4
+ le?vperm $in2,$in2,$in2,$leperm
+ lvx_u $in4,$x40,$inp
+ vxor $tweak,$tweak,$tmp
+ vcipherlast $tmp,$out5,$in5 # last block might be needed
+ # in stealing mode
+ le?vperm $in3,$in3,$in3,$leperm
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ le?vperm $in4,$in4,$in4,$leperm
+ le?vperm $in5,$in5,$in5,$leperm
+
+ le?vperm $out0,$out0,$out0,$leperm
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk0
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $out1,$in1,$twk1
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$twk2
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$twk3
+ le?vperm $out5,$tmp,$tmp,$leperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$twk4
+ le?stvx_u $out5,$x50,$out
+ be?stvx_u $tmp, $x50,$out
+ vxor $out5,$in5,$twk5
+ addi $out,$out,0x60
+
+ mtctr $rounds
+ beq Loop_xts_enc6x # did $len-=96 borrow?
+
+ addic. $len,$len,0x60
+ beq Lxts_enc6x_zero
+ cmpwi $len,0x20
+ blt Lxts_enc6x_one
+ nop
+ beq Lxts_enc6x_two
+ cmpwi $len,0x40
+ blt Lxts_enc6x_three
+ nop
+ beq Lxts_enc6x_four
+
+Lxts_enc6x_five:
+ vxor $out0,$in1,$twk0
+ vxor $out1,$in2,$twk1
+ vxor $out2,$in3,$twk2
+ vxor $out3,$in4,$twk3
+ vxor $out4,$in5,$twk4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk5 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $tmp,$out4,$twk5 # last block prep for stealing
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_four:
+ vxor $out0,$in2,$twk0
+ vxor $out1,$in3,$twk1
+ vxor $out2,$in4,$twk2
+ vxor $out3,$in5,$twk3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk4 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $tmp,$out3,$twk4 # last block prep for stealing
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_three:
+ vxor $out0,$in3,$twk0
+ vxor $out1,$in4,$twk1
+ vxor $out2,$in5,$twk2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk3 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $tmp,$out2,$twk3 # last block prep for stealing
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_two:
+ vxor $out0,$in4,$twk0
+ vxor $out1,$in5,$twk1
+ vxor $out2,$out2,$out2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk2 # unused tweak
+ vxor $tmp,$out1,$twk2 # last block prep for stealing
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_one:
+ vxor $out0,$in5,$twk0
+ nop
+Loop_xts_enc1x:
+ vcipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_enc1x
+
+ add $inp,$inp,$taillen
+ cmpwi $taillen,0
+ vcipher $out0,$out0,v24
+
+ subi $inp,$inp,16
+ vcipher $out0,$out0,v25
+
+ lvsr $inpperm,0,$taillen
+ vcipher $out0,$out0,v26
+
+ lvx_u $in0,0,$inp
+ vcipher $out0,$out0,v27
+
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vcipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vcipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk0,$twk0,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vcipher $out0,$out0,v30
+
+ vperm $in0,$in0,$in0,$inpperm
+ vcipherlast $out0,$out0,$twk0
+
+ vmr $twk0,$twk1 # unused tweak
+ vxor $tmp,$out0,$twk1 # last block prep for stealing
+ le?vperm $out0,$out0,$out0,$leperm
+ stvx_u $out0,$x00,$out # store output
+ addi $out,$out,0x10
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_zero:
+ cmpwi $taillen,0
+ beq Lxts_enc6x_done
+
+ add $inp,$inp,$taillen
+ subi $inp,$inp,16
+ lvx_u $in0,0,$inp
+ lvsr $inpperm,0,$taillen # $in5 is no more
+ le?vperm $in0,$in0,$in0,$leperm
+ vperm $in0,$in0,$in0,$inpperm
+ vxor $tmp,$tmp,$twk0
+Lxts_enc6x_steal:
+ vxor $in0,$in0,$twk0
+ vxor $out0,$out0,$out0
+ vspltisb $out1,-1
+ vperm $out0,$out0,$out1,$inpperm
+ vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
+
+ subi r30,$out,17
+ subi $out,$out,16
+ mtctr $taillen
+Loop_xts_enc6x_steal:
+ lbzu r0,1(r30)
+ stb r0,16(r30)
+ bdnz Loop_xts_enc6x_steal
+
+ li $taillen,0
+ mtctr $rounds
+ b Loop_xts_enc1x # one more time...
+
+.align 4
+Lxts_enc6x_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_enc6x_ret
+
+ vxor $tweak,$twk0,$rndkey0
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_enc6x_ret:
+ mtlr r11
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $seven,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,1,0x80,6,6,0
+ .long 0
+
+.align 5
+_aesp8_xts_enc5x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz _aesp8_xts_enc5x
+
+ add $inp,$inp,$taillen
+ cmpwi $taillen,0
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+
+ subi $inp,$inp,16
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vxor $twk0,$twk0,v31
+
+ vcipher $out0,$out0,v26
+ lvsr $inpperm,0,$taillen # $in5 is no more
+ vcipher $out1,$out1,v26
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vcipher $out4,$out4,v26
+ vxor $in1,$twk1,v31
+
+ vcipher $out0,$out0,v27
+ lvx_u $in0,0,$inp
+ vcipher $out1,$out1,v27
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vcipher $out4,$out4,v27
+ vxor $in2,$twk2,v31
+
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vcipher $out0,$out0,v28
+ vcipher $out1,$out1,v28
+ vcipher $out2,$out2,v28
+ vcipher $out3,$out3,v28
+ vcipher $out4,$out4,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vxor $in3,$twk3,v31
+
+ vcipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$leperm
+ vcipher $out1,$out1,v29
+ vcipher $out2,$out2,v29
+ vcipher $out3,$out3,v29
+ vcipher $out4,$out4,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $in4,$twk4,v31
+
+ vcipher $out0,$out0,v30
+ vperm $in0,$in0,$in0,$inpperm
+ vcipher $out1,$out1,v30
+ vcipher $out2,$out2,v30
+ vcipher $out3,$out3,v30
+ vcipher $out4,$out4,v30
+
+ vcipherlast $out0,$out0,$twk0
+ vcipherlast $out1,$out1,$in1
+ vcipherlast $out2,$out2,$in2
+ vcipherlast $out3,$out3,$in3
+ vcipherlast $out4,$out4,$in4
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+
+.align 5
+_aesp8_xts_decrypt6x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ mflr r11
+ li r7,`$FRAME+8*16+15`
+ li r3,`$FRAME+8*16+31`
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+ stvx v20,r7,$sp # ABI says so
+ addi r7,r7,32
+ stvx v21,r3,$sp
+ addi r3,r3,32
+ stvx v22,r7,$sp
+ addi r7,r7,32
+ stvx v23,r3,$sp
+ addi r3,r3,32
+ stvx v24,r7,$sp
+ addi r7,r7,32
+ stvx v25,r3,$sp
+ addi r3,r3,32
+ stvx v26,r7,$sp
+ addi r7,r7,32
+ stvx v27,r3,$sp
+ addi r3,r3,32
+ stvx v28,r7,$sp
+ addi r7,r7,32
+ stvx v29,r3,$sp
+ addi r3,r3,32
+ stvx v30,r7,$sp
+ stvx v31,r3,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key1 # load key schedule
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ lvx v31,$x00,$key1
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,`$FRAME+15`
+ mtctr $rounds
+
+Load_xts_dec_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key1
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_xts_dec_key
+
+ lvx v26,$x10,$key1
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key1
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key1
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key1
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key1
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key1
+ ?vperm v29,v29,v30,$keyperm
+ lvx $twk5,$x70,$key1 # borrow $twk5
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$twk5,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vperm $in0,$inout,$inptail,$inpperm
+ subi $inp,$inp,31 # undo "caller"
+ vxor $twk0,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $out0,$in0,$twk0
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in1,$x10,$inp
+ vxor $twk1,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in1,$in1,$in1,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out1,$in1,$twk1
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in2,$x20,$inp
+ andi. $taillen,$len,15
+ vxor $twk2,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in2,$in2,$in2,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out2,$in2,$twk2
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in3,$x30,$inp
+ sub $len,$len,$taillen
+ vxor $twk3,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in3,$in3,$in3,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out3,$in3,$twk3
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in4,$x40,$inp
+ subi $len,$len,0x60
+ vxor $twk4,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in4,$in4,$in4,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out4,$in4,$twk4
+ vxor $tweak,$tweak,$tmp
+
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ vxor $twk5,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ le?vperm $in5,$in5,$in5,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out5,$in5,$twk5
+ vxor $tweak,$tweak,$tmp
+
+ vxor v31,v31,$rndkey0
+ mtctr $rounds
+ b Loop_xts_dec6x
+
+.align 5
+Loop_xts_dec6x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_dec6x
+
+ subic $len,$len,96 # $len-=96
+ vxor $in0,$twk0,v31 # xor with last round key
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk0,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vand $tmp,$tmp,$eighty7
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vxor $tweak,$tweak,$tmp
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vxor $in1,$twk1,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk1,$tweak,$rndkey0
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+
+ and r0,r0,$len
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vand $tmp,$tmp,$eighty7
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vxor $tweak,$tweak,$tmp
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in5 are loaded
+ # with last "words"
+ vxor $in2,$twk2,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk2,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vncipher $out0,$out0,v27
+ vncipher $out1,$out1,v27
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vand $tmp,$tmp,$eighty7
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vxor $tweak,$tweak,$tmp
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vxor $in3,$twk3,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk3,$tweak,$rndkey0
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vand $tmp,$tmp,$eighty7
+
+ vncipher $out0,$out0,v29
+ vncipher $out1,$out1,v29
+ vxor $tweak,$tweak,$tmp
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vxor $in4,$twk4,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk4,$tweak,$rndkey0
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+
+ vncipher $out0,$out0,v30
+ vncipher $out1,$out1,v30
+ vand $tmp,$tmp,$eighty7
+ vncipher $out2,$out2,v30
+ vncipher $out3,$out3,v30
+ vxor $tweak,$tweak,$tmp
+ vncipher $out4,$out4,v30
+ vncipher $out5,$out5,v30
+ vxor $in5,$twk5,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk5,$tweak,$rndkey0
+
+ vncipherlast $out0,$out0,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vncipherlast $out1,$out1,$in1
+ lvx_u $in1,$x10,$inp
+ vncipherlast $out2,$out2,$in2
+ le?vperm $in0,$in0,$in0,$leperm
+ lvx_u $in2,$x20,$inp
+ vand $tmp,$tmp,$eighty7
+ vncipherlast $out3,$out3,$in3
+ le?vperm $in1,$in1,$in1,$leperm
+ lvx_u $in3,$x30,$inp
+ vncipherlast $out4,$out4,$in4
+ le?vperm $in2,$in2,$in2,$leperm
+ lvx_u $in4,$x40,$inp
+ vxor $tweak,$tweak,$tmp
+ vncipherlast $out5,$out5,$in5
+ le?vperm $in3,$in3,$in3,$leperm
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ le?vperm $in4,$in4,$in4,$leperm
+ le?vperm $in5,$in5,$in5,$leperm
+
+ le?vperm $out0,$out0,$out0,$leperm
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk0
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $out1,$in1,$twk1
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$twk2
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$twk3
+ le?vperm $out5,$out5,$out5,$leperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$twk4
+ stvx_u $out5,$x50,$out
+ vxor $out5,$in5,$twk5
+ addi $out,$out,0x60
+
+ mtctr $rounds
+ beq Loop_xts_dec6x # did $len-=96 borrow?
+
+ addic. $len,$len,0x60
+ beq Lxts_dec6x_zero
+ cmpwi $len,0x20
+ blt Lxts_dec6x_one
+ nop
+ beq Lxts_dec6x_two
+ cmpwi $len,0x40
+ blt Lxts_dec6x_three
+ nop
+ beq Lxts_dec6x_four
+
+Lxts_dec6x_five:
+ vxor $out0,$in1,$twk0
+ vxor $out1,$in2,$twk1
+ vxor $out2,$in3,$twk2
+ vxor $out3,$in4,$twk3
+ vxor $out4,$in5,$twk4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk5 # unused tweak
+ vxor $twk1,$tweak,$rndkey0
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk1
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_four:
+ vxor $out0,$in2,$twk0
+ vxor $out1,$in3,$twk1
+ vxor $out2,$in4,$twk2
+ vxor $out3,$in5,$twk3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk4 # unused tweak
+ vmr $twk1,$twk5
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk5
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_three:
+ vxor $out0,$in3,$twk0
+ vxor $out1,$in4,$twk1
+ vxor $out2,$in5,$twk2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk3 # unused tweak
+ vmr $twk1,$twk4
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk4
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_two:
+ vxor $out0,$in4,$twk0
+ vxor $out1,$in5,$twk1
+ vxor $out2,$out2,$out2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk2 # unused tweak
+ vmr $twk1,$twk3
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk3
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_one:
+ vxor $out0,$in5,$twk0
+ nop
+Loop_xts_dec1x:
+ vncipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_dec1x
+
+ subi r0,$taillen,1
+ vncipher $out0,$out0,v24
+
+ andi. r0,r0,16
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+
+ sub $inp,$inp,r0
+ vncipher $out0,$out0,v26
+
+ lvx_u $in0,0,$inp
+ vncipher $out0,$out0,v27
+
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vncipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk0,$twk0,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out0,$out0,v30
+
+ mtctr $rounds
+ vncipherlast $out0,$out0,$twk0
+
+ vmr $twk0,$twk1 # unused tweak
+ vmr $twk1,$twk2
+ le?vperm $out0,$out0,$out0,$leperm
+ stvx_u $out0,$x00,$out # store output
+ addi $out,$out,0x10
+ vxor $out0,$in0,$twk2
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_zero:
+ cmpwi $taillen,0
+ beq Lxts_dec6x_done
+
+ lvx_u $in0,0,$inp
+ le?vperm $in0,$in0,$in0,$leperm
+ vxor $out0,$in0,$twk1
+Lxts_dec6x_steal:
+ vncipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Lxts_dec6x_steal
+
+ add $inp,$inp,$taillen
+ vncipher $out0,$out0,v24
+
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+
+ lvx_u $in0,0,$inp
+ vncipher $out0,$out0,v26
+
+ lvsr $inpperm,0,$taillen # $in5 is no more
+ vncipher $out0,$out0,v27
+
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vncipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk1,$twk1,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out0,$out0,v30
+
+ vperm $in0,$in0,$in0,$inpperm
+ vncipherlast $tmp,$out0,$twk1
+
+ le?vperm $out0,$tmp,$tmp,$leperm
+ le?stvx_u $out0,0,$out
+ be?stvx_u $tmp,0,$out
+
+ vxor $out0,$out0,$out0
+ vspltisb $out1,-1
+ vperm $out0,$out0,$out1,$inpperm
+ vsel $out0,$in0,$tmp,$out0
+ vxor $out0,$out0,$twk0
+
+ subi r30,$out,1
+ mtctr $taillen
+Loop_xts_dec6x_steal:
+ lbzu r0,1(r30)
+ stb r0,16(r30)
+ bdnz Loop_xts_dec6x_steal
+
+ li $taillen,0
+ mtctr $rounds
+ b Loop_xts_dec1x # one more time...
+
+.align 4
+Lxts_dec6x_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_dec6x_ret
+
+ vxor $tweak,$twk0,$rndkey0
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_dec6x_ret:
+ mtlr r11
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $seven,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,1,0x80,6,6,0
+ .long 0
+
+.align 5
+_aesp8_xts_dec5x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz _aesp8_xts_dec5x
+
+ subi r0,$taillen,1
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+
+ andi. r0,r0,16
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vxor $twk0,$twk0,v31
+
+ sub $inp,$inp,r0
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vxor $in1,$twk1,v31
+
+ vncipher $out0,$out0,v27
+ lvx_u $in0,0,$inp
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vxor $in2,$twk2,v31
+
+ addi $key_,$sp,`$FRAME+15` # rewind $key_
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vxor $in3,$twk3,v31
+
+ vncipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $in4,$twk4,v31
+
+ vncipher $out0,$out0,v30
+ vncipher $out1,$out1,v30
+ vncipher $out2,$out2,v30
+ vncipher $out3,$out3,v30
+ vncipher $out4,$out4,v30
+
+ vncipherlast $out0,$out0,$twk0
+ vncipherlast $out1,$out1,$in1
+ vncipherlast $out2,$out2,$in2
+ vncipherlast $out3,$out3,$in3
+ vncipherlast $out4,$out4,$in4
+ mtctr $rounds
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+___
+}} }}}
+
+my $consts=1;
+foreach(split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ # constants table endian-specific conversion
+ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
+ my $conv=$3;
+ my @bytes=();
+
+ # convert to endian-agnostic format
+ if ($1 eq "long") {
+ foreach (split(/,\s*/,$2)) {
+ my $l = /^0/?oct:int;
+ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+ }
+ } else {
+ @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
+ }
+
+ # little-endian conversion
+ if ($flavour =~ /le$/o) {
+ SWITCH: for($conv) {
+ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
+ /\?rev/ && do { @bytes=reverse(@bytes); last; };
+ }
+ }
+
+ #emit
+ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+ next;
+ }
+ $consts=0 if (m/Lconsts:/o); # end of table
+
+ # instructions prefixed with '?' are endian-specific and need
+ # to be adjusted accordingly...
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o or
+ s/\?lvsr/lvsl/o or
+ s/\?lvsl/lvsr/o or
+ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+ } else { # big-endian
+ s/le\?/#le#/o or
+ s/be\?//o or
+ s/\?([a-z]+)/$1/o;
+ }
+
+ print $_,"\n";
+}
+
+close STDOUT or die "error closing STDOUT: $!";
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
@@ -59,6 +59,12 @@ OPENSSL_INLINE int vpaes_capable(void) {
OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
#endif
+#elif defined(OPENSSL_PPC64LE)
+#define HWAES
+
+OPENSSL_INLINE int hwaes_capable(void) {
+ return CRYPTO_is_PPC64LE_vcrypto_capable();
+}
#endif
#endif // !NO_ASM
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/bcm.c
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/bcm.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/bcm.c
@@ -102,6 +102,7 @@
#include "self_check/fips.c"
#include "self_check/self_check.c"
#include "service_indicator/service_indicator.c"
+#include "sha/sha1-altivec.c"
#include "sha/sha1.c"
#include "sha/sha256.c"
#include "sha/sha512.c"
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c
@@ -384,6 +384,23 @@ int bn_expand(BIGNUM *bn, size_t bits) {
}
int bn_resize_words(BIGNUM *bn, size_t words) {
+#if defined(OPENSSL_PPC64LE)
+ // This is a workaround for a miscompilation bug in Clang 7.0.1 on POWER.
+ // The unittests catch the miscompilation, if it occurs, and it manifests
+ // as a crash in |bn_fits_in_words|.
+ //
+ // The bug only triggers if building in FIPS mode and with -O3. Clang 8.0.1
+ // has the same bug but this workaround is not effective there---I've not
+ // been able to find a workaround for 8.0.1.
+ //
+ // At the time of writing (2019-08-08), Clang git does *not* have this bug
+ // and does not need this workaroud. The current git version should go on to
+ // be Clang 10 thus, once we can depend on that, this can be removed.
+ if (value_barrier_w((size_t)bn->width == words)) {
+ return 1;
+ }
+#endif
+
if ((size_t)bn->width <= words) {
if (!bn_wexpand(bn, words)) {
return 0;
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
@@ -1455,6 +1455,8 @@ int EVP_has_aes_hardware(void) {
return hwaes_capable() && crypto_gcm_clmul_enabled();
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
return hwaes_capable() && CRYPTO_is_ARMv8_PMULL_capable();
+#elif defined(OPENSSL_PPC64LE)
+ return CRYPTO_is_PPC64LE_vcrypto_capable();
#else
return 0;
#endif
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
@@ -0,0 +1,671 @@
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+# May 2016
+#
+# 2x aggregated reduction improves performance by 50% (resulting
+# performance on POWER8 is 1 cycle per processed byte), and 4x
+# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T=8;
+ $LRSAVE=2*$SIZE_T;
+ $STU="stdu";
+ $POP="ld";
+ $PUSH="std";
+ $UCMP="cmpld";
+ $SHRI="srdi";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T=4;
+ $LRSAVE=$SIZE_T;
+ $STU="stwu";
+ $POP="lwz";
+ $PUSH="stw";
+ $UCMP="cmplw";
+ $SHRI="srwi";
+} else { die "nonsense $flavour"; }
+
+$sp="r1";
+$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
+*STDOUT=*OUT;
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
+my $vrsave="r12";
+
+$code=<<___;
+.machine "any"
+
+.text
+
+.globl .gcm_init_p8
+.align 5
+.gcm_init_p8:
+ li r0,-4096
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $H,0,r4 # load H
+
+ vspltisb $xC2,-16 # 0xf0
+ vspltisb $t0,1 # one
+ vaddubm $xC2,$xC2,$xC2 # 0xe0
+ vxor $zero,$zero,$zero
+ vor $xC2,$xC2,$t0 # 0xe1
+ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
+ vsldoi $t1,$zero,$t0,1 # ...1
+ vaddubm $xC2,$xC2,$xC2 # 0xc2...
+ vspltisb $t2,7
+ vor $xC2,$xC2,$t1 # 0xc2....01
+ vspltb $t1,$H,0 # most significant byte
+ vsl $H,$H,$t0 # H<<=1
+ vsrab $t1,$t1,$t2 # broadcast carry bit
+ vand $t1,$t1,$xC2
+ vxor $IN,$H,$t1 # twisted H
+
+ vsldoi $H,$IN,$IN,8 # twist even more ...
+ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
+ vsldoi $Hl,$zero,$H,8 # ... and split
+ vsldoi $Hh,$H,$zero,8
+
+ stvx_u $xC2,0,r3 # save pre-computed table
+ stvx_u $Hl,r8,r3
+ li r8,0x40
+ stvx_u $H, r9,r3
+ li r9,0x50
+ stvx_u $Hh,r10,r3
+ li r10,0x60
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
+ vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $IN1,$Xl,$t1
+
+ vsldoi $H2,$IN1,$IN1,8
+ vsldoi $H2l,$zero,$H2,8
+ vsldoi $H2h,$H2,$zero,8
+
+ stvx_u $H2l,r8,r3 # save H^2
+ li r8,0x70
+ stvx_u $H2,r9,r3
+ li r9,0x80
+ stvx_u $H2h,r10,r3
+ li r10,0x90
+___
+{
+my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
+$code.=<<___;
+ vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
+ vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
+ vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
+ vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
+ vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
+ vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+ vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vsldoi $t4,$Xm1,$zero,8
+ vsldoi $t5,$zero,$Xm1,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+ vxor $Xl1,$Xl1,$t4
+ vxor $Xh1,$Xh1,$t5
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vsldoi $Xl1,$Xl1,$Xl1,8
+ vxor $Xl,$Xl,$t2
+ vxor $Xl1,$Xl1,$t6
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vpmsumd $Xl1,$Xl1,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $t5,$t5,$Xh1
+ vxor $Xl,$Xl,$t1
+ vxor $Xl1,$Xl1,$t5
+
+ vsldoi $H,$Xl,$Xl,8
+ vsldoi $H2,$Xl1,$Xl1,8
+ vsldoi $Hl,$zero,$H,8
+ vsldoi $Hh,$H,$zero,8
+ vsldoi $H2l,$zero,$H2,8
+ vsldoi $H2h,$H2,$zero,8
+
+ stvx_u $Hl,r8,r3 # save H^3
+ li r8,0xa0
+ stvx_u $H,r9,r3
+ li r9,0xb0
+ stvx_u $Hh,r10,r3
+ li r10,0xc0
+ stvx_u $H2l,r8,r3 # save H^4
+ stvx_u $H2,r9,r3
+ stvx_u $H2h,r10,r3
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_init_p8,.-.gcm_init_p8
+___
+}
+$code.=<<___;
+.globl .gcm_gmult_p8
+.align 5
+.gcm_gmult_p8:
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $IN,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $zero,$zero,$zero
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_gmult_p8,.-.gcm_gmult_p8
+
+.globl .gcm_ghash_p8
+.align 5
+.gcm_ghash_p8:
+ li r0,-4096
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $Xl,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ li r8,0x40
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ li r9,0x50
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ li r10,0x60
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ vxor $zero,$zero,$zero
+
+ ${UCMP}i $len,64
+ bge Lgcm_ghash_p8_4x
+
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+ subic. $len,$len,16
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $IN,$IN,$Xl
+ beq Lshort
+
+ lvx_u $H2l,r8,$Htbl # load H^2
+ li r8,16
+ lvx_u $H2, r9,$Htbl
+ add r9,$inp,$len # end of input
+ lvx_u $H2h,r10,$Htbl
+ be?b Loop_2x
+
+.align 5
+Loop_2x:
+ lvx_u $IN1,0,$inp
+ le?vperm $IN1,$IN1,$IN1,$lemask
+
+ subic $len,$len,32
+ vpmsumd $Xl,$IN,$H2l # H^2.lo·Xi.lo
+ vpmsumd $Xl1,$IN1,$Hl # H.lo·Xi+1.lo
+ subfe r0,r0,r0 # borrow?-1:0
+ vpmsumd $Xm,$IN,$H2 # H^2.hi·Xi.lo+H^2.lo·Xi.hi
+ vpmsumd $Xm1,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+1.hi
+ and r0,r0,$len
+ vpmsumd $Xh,$IN,$H2h # H^2.hi·Xi.hi
+ vpmsumd $Xh1,$IN1,$Hh # H.hi·Xi+1.hi
+ add $inp,$inp,r0
+
+ vxor $Xl,$Xl,$Xl1
+ vxor $Xm,$Xm,$Xm1
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xh,$Xh,$Xh1
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+ lvx_u $IN,r8,$inp
+ addi $inp,$inp,32
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $t1,$t1,$Xh
+ vxor $IN,$IN,$t1
+ vxor $IN,$IN,$Xl
+ $UCMP r9,$inp
+ bgt Loop_2x # done yet?
+
+ cmplwi $len,0
+ bne Leven
+
+Lshort:
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+
+Leven:
+ vxor $Xl,$Xl,$t1
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
+___
+{
+my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h,
+ $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31));
+my $IN0=$IN;
+my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h);
+
+$code.=<<___;
+.align 5
+.gcm_ghash_p8_4x:
+Lgcm_ghash_p8_4x:
+ $STU $sp,-$FRAME($sp)
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
+ stvx v20,r10,$sp
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ li r10,0x60
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME-4`($sp) # save vrsave
+ mtspr 256,r0 # preserve all AltiVec registers
+
+ lvsl $t0,0,r8 # 0x0001..0e0f
+ #lvx_u $H2l,r8,$Htbl # load H^2
+ li r8,0x70
+ lvx_u $H2, r9,$Htbl
+ li r9,0x80
+ vspltisb $t1,8 # 0x0808..0808
+ #lvx_u $H2h,r10,$Htbl
+ li r10,0x90
+ lvx_u $H3l,r8,$Htbl # load H^3
+ li r8,0xa0
+ lvx_u $H3, r9,$Htbl
+ li r9,0xb0
+ lvx_u $H3h,r10,$Htbl
+ li r10,0xc0
+ lvx_u $H4l,r8,$Htbl # load H^4
+ li r8,0x10
+ lvx_u $H4, r9,$Htbl
+ li r9,0x20
+ lvx_u $H4h,r10,$Htbl
+ li r10,0x30
+
+ vsldoi $t2,$zero,$t1,8 # 0x0000..0808
+ vaddubm $hiperm,$t0,$t2 # 0x0001..1617
+ vaddubm $loperm,$t1,$hiperm # 0x0809..1e1f
+
+ $SHRI $len,$len,4 # this allows to use sign bit
+ # as carry
+ lvx_u $IN0,0,$inp # load input
+ lvx_u $IN1,r8,$inp
+ subic. $len,$len,8
+ lvx_u $IN2,r9,$inp
+ lvx_u $IN3,r10,$inp
+ addi $inp,$inp,0x40
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+ le?vperm $IN3,$IN3,$IN3,$lemask
+
+ vxor $Xh,$IN0,$Xl
+
+ vpmsumd $Xl1,$IN1,$H3l
+ vpmsumd $Xm1,$IN1,$H3
+ vpmsumd $Xh1,$IN1,$H3h
+
+ vperm $H21l,$H2,$H,$hiperm
+ vperm $t0,$IN2,$IN3,$loperm
+ vperm $H21h,$H2,$H,$loperm
+ vperm $t1,$IN2,$IN3,$hiperm
+ vpmsumd $Xm2,$IN2,$H2 # H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
+ vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
+ vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
+ vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
+
+ vxor $Xm2,$Xm2,$Xm1
+ vxor $Xl3,$Xl3,$Xl1
+ vxor $Xm3,$Xm3,$Xm2
+ vxor $Xh3,$Xh3,$Xh1
+
+ blt Ltail_4x
+
+Loop_4x:
+ lvx_u $IN0,0,$inp
+ lvx_u $IN1,r8,$inp
+ subic. $len,$len,4
+ lvx_u $IN2,r9,$inp
+ lvx_u $IN3,r10,$inp
+ addi $inp,$inp,0x40
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+ le?vperm $IN3,$IN3,$IN3,$lemask
+ le?vperm $IN0,$IN0,$IN0,$lemask
+
+ vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
+ vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
+ vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
+ vpmsumd $Xl1,$IN1,$H3l
+ vpmsumd $Xm1,$IN1,$H3
+ vpmsumd $Xh1,$IN1,$H3h
+
+ vxor $Xl,$Xl,$Xl3
+ vxor $Xm,$Xm,$Xm3
+ vxor $Xh,$Xh,$Xh3
+ vperm $t0,$IN2,$IN3,$loperm
+ vperm $t1,$IN2,$IN3,$hiperm
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+ vpmsumd $Xl3,$t0,$H21l # H.lo·Xi+3.lo +H^2.lo·Xi+2.lo
+ vpmsumd $Xh3,$t1,$H21h # H.hi·Xi+3.hi +H^2.hi·Xi+2.hi
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xm2,$IN2,$H2 # H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
+ vpmsumd $Xm3,$IN3,$H # H.hi·Xi+3.lo +H.lo·Xi+3.hi
+ vpmsumd $Xl,$Xl,$xC2
+
+ vxor $Xl3,$Xl3,$Xl1
+ vxor $Xh3,$Xh3,$Xh1
+ vxor $Xh,$Xh,$IN0
+ vxor $Xm2,$Xm2,$Xm1
+ vxor $Xh,$Xh,$t1
+ vxor $Xm3,$Xm3,$Xm2
+ vxor $Xh,$Xh,$Xl
+ bge Loop_4x
+
+Ltail_4x:
+ vpmsumd $Xl,$Xh,$H4l # H^4.lo·Xi.lo
+ vpmsumd $Xm,$Xh,$H4 # H^4.hi·Xi.lo+H^4.lo·Xi.hi
+ vpmsumd $Xh,$Xh,$H4h # H^4.hi·Xi.hi
+
+ vxor $Xl,$Xl,$Xl3
+ vxor $Xm,$Xm,$Xm3
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xh,$Xh,$Xh3
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ addic. $len,$len,4
+ beq Ldone_4x
+
+ lvx_u $IN0,0,$inp
+ ${UCMP}i $len,2
+ li $len,-4
+ blt Lone
+ lvx_u $IN1,r8,$inp
+ beq Ltwo
+
+Lthree:
+ lvx_u $IN2,r9,$inp
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+ le?vperm $IN2,$IN2,$IN2,$lemask
+
+ vxor $Xh,$IN0,$Xl
+ vmr $H4l,$H3l
+ vmr $H4, $H3
+ vmr $H4h,$H3h
+
+ vperm $t0,$IN1,$IN2,$loperm
+ vperm $t1,$IN1,$IN2,$hiperm
+ vpmsumd $Xm2,$IN1,$H2 # H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
+ vpmsumd $Xm3,$IN2,$H # H.hi·Xi+2.lo +H.lo·Xi+2.hi
+ vpmsumd $Xl3,$t0,$H21l # H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
+ vpmsumd $Xh3,$t1,$H21h # H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
+
+ vxor $Xm3,$Xm3,$Xm2
+ b Ltail_4x
+
+.align 4
+Ltwo:
+ le?vperm $IN0,$IN0,$IN0,$lemask
+ le?vperm $IN1,$IN1,$IN1,$lemask
+
+ vxor $Xh,$IN0,$Xl
+ vperm $t0,$zero,$IN1,$loperm
+ vperm $t1,$zero,$IN1,$hiperm
+
+ vsldoi $H4l,$zero,$H2,8
+ vmr $H4, $H2
+ vsldoi $H4h,$H2,$zero,8
+
+ vpmsumd $Xl3,$t0, $H21l # H.lo·Xi+1.lo
+ vpmsumd $Xm3,$IN1,$H # H.hi·Xi+1.lo+H.lo·Xi+2.hi
+ vpmsumd $Xh3,$t1, $H21h # H.hi·Xi+1.hi
+
+ b Ltail_4x
+
+.align 4
+Lone:
+ le?vperm $IN0,$IN0,$IN0,$lemask
+
+ vsldoi $H4l,$zero,$H,8
+ vmr $H4, $H
+ vsldoi $H4h,$H,$zero,8
+
+ vxor $Xh,$IN0,$Xl
+ vxor $Xl3,$Xl3,$Xl3
+ vxor $Xm3,$Xm3,$Xm3
+ vxor $Xh3,$Xh3,$Xh3
+
+ b Ltail_4x
+
+Ldone_4x:
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ li r10,`15+6*$SIZE_T`
+ li r11,`31+6*$SIZE_T`
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,0,4,0
+ .long 0
+___
+}
+$code.=<<___;
+.size .gcm_ghash_p8,.-.gcm_ghash_p8
+
+.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o;
+ } else {
+ s/le\?/#le#/o or
+ s/be\?//o;
+ }
+ print $_,"\n";
+}
+
+close STDOUT or die "error closing STDOUT: $!"; # enforce flush
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c
@@ -228,6 +228,13 @@ void CRYPTO_ghash_init(gmult_func *out_m
*out_hash = gcm_ghash_neon;
return;
}
+#elif defined(GHASH_ASM_PPC64LE)
+ if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
+ gcm_init_p8(out_table, H);
+ *out_mult = gcm_gmult_p8;
+ *out_hash = gcm_ghash_p8;
+ return;
+ }
#endif
gcm_init_nohw(out_table, H);
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
@@ -215,5 +215,15 @@ TEST(GCMTest, ABI) {
}
}
#endif
+
+#if defined(GHASH_ASM_PPC64LE)
+ if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
+ CHECK_ABI(gcm_init_p8, Htable, kH);
+ CHECK_ABI(gcm_gmult_p8, X, Htable);
+ for (size_t blocks : kBlockCounts) {
+ CHECK_ABI(gcm_ghash_p8, X, Htable, buf, 16 * blocks);
+ }
+ }
+#endif // GHASH_ASM_PPC64LE
}
#endif // SUPPORTS_ABI_TEST && !OPENSSL_NO_ASM
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
@@ -325,6 +325,13 @@ void aes_gcm_dec_kernel(const uint8_t *i
const u128 Htable[16]);
#endif
+#elif defined(OPENSSL_PPC64LE)
+#define GHASH_ASM_PPC64LE
+#define GCM_FUNCREF
+void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_p8(uint8_t Xi[16], const u128 Htable[16]);
+void gcm_ghash_p8(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp,
+ size_t len);
#endif
#endif // OPENSSL_NO_ASM
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/rand/getrandom_fillin.h
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/rand/getrandom_fillin.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/rand/getrandom_fillin.h
@@ -30,6 +30,8 @@
#define EXPECTED_NR_getrandom 278
#elif defined(OPENSSL_ARM)
#define EXPECTED_NR_getrandom 384
+#elif defined(OPENSSL_PPC64LE)
+#define EXPECTED_NR_getrandom 359
#elif defined(OPENSSL_RISCV64)
#define EXPECTED_NR_getrandom 278
#endif
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c
@@ -431,6 +431,11 @@ void RAND_bytes_with_additional_data(uin
// Take a read lock around accesses to |state->drbg|. This is needed to
// avoid returning bad entropy if we race with
// |rand_thread_state_clear_all|.
+ //
+ // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a
+ // bug on ppc64le. glibc may implement pthread locks by wrapping user code
+ // in a hardware transaction, but, on some older versions of glibc and the
+ // kernel, syscalls made with |syscall| did not abort the transaction.
CRYPTO_MUTEX_lock_read(&state->clear_drbg_lock);
#endif
if (!CTR_DRBG_reseed(&state->drbg, seed, reseed_additional_data,
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
@@ -23,6 +23,16 @@
extern "C" {
#endif
8 months ago
+#if defined(OPENSSL_PPC64LE)
+// POWER has an intrinsics-based implementation of SHA-1 and thus the functions
+// normally defined in assembly are available even with |OPENSSL_NO_ASM| in
+// this case.
+#define SHA1_ASM_PPC64
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *in,
+ size_t num_blocks);
+#endif
+
+
// Define SHA{n}[_{variant}]_ASM if sha{n}_block_data_order[_{variant}] is
// defined in assembly.
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/sha1-altivec.c
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/sha1-altivec.c
@@ -0,0 +1,361 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+// Altivec-optimized SHA1 in C. This is tested on ppc64le only.
+//
+// References:
+// https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+// http://arctic.org/~dean/crypto/sha1.html
+//
+// This code used the generic SHA-1 from OpenSSL as a basis and AltiVec
+// optimisations were added on top.
+
+#include <openssl/sha.h>
+
+#if defined(OPENSSL_PPC64LE)
+
+#include <altivec.h>
+
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *data, size_t num);
+
+static uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); }
+
+typedef vector unsigned int vec_uint32_t;
+typedef vector unsigned char vec_uint8_t;
+
+// Vector constants
+static const vec_uint8_t k_swap_endianness = {3, 2, 1, 0, 7, 6, 5, 4,
+ 11, 10, 9, 8, 15, 14, 13, 12};
+
+// Shift amounts for byte and bit shifts and rotations
+static const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32};
+static const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96,
+ 96, 96, 96, 96, 96, 96, 96, 96};
+
+#define K_00_19 0x5a827999UL
+#define K_20_39 0x6ed9eba1UL
+#define K_40_59 0x8f1bbcdcUL
+#define K_60_79 0xca62c1d6UL
+
+// Vector versions of the above.
+static const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19};
+static const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39};
+static const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59};
+static const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79};
+
+// vector message scheduling: compute message schedule for round i..i+3 where i
+// is divisible by 4. We return the schedule w[i..i+3] as a vector. In
+// addition, we also precompute sum w[i..+3] and an additive constant K. This
+// is done to offload some computation of f() in the integer execution units.
+//
+// Byte shifting code below may not be correct for big-endian systems.
+static vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data,
+ vec_uint32_t k) {
+ const vector unsigned char unaligned_data =
+ vec_vsx_ld(0, (const unsigned char*) data);
+ const vec_uint32_t v = (vec_uint32_t) unaligned_data;
+ const vec_uint32_t w = vec_perm(v, v, k_swap_endianness);
+ vec_st(w + k, 0, pre_added);
+ return w;
+}
+
+// Compute w[i..i+3] using these steps for i in [16, 20, 24, 28]
+//
+// w'[i ] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1
+// w'[i+1] = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1
+// w'[i+2] = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1
+// w'[i+3] = ( 0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1
+//
+// w[ i] = w'[ i]
+// w[i+1] = w'[i+1]
+// w[i+2] = w'[i+2]
+// w[i+3] = w'[i+3] ^ (w'[i] <<< 1)
+static vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4,
+ vec_uint32_t minus_8, vec_uint32_t minus_12,
+ vec_uint32_t minus_16, vec_uint32_t k) {
+ const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes);
+ const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8);
+ const vec_uint32_t k_1_bit = vec_splat_u32(1);
+ const vec_uint32_t w_prime =
+ vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit);
+ const vec_uint32_t w =
+ w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit);
+ vec_st(w + k, 0, pre_added);
+ return w;
+}
+
+// Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76]
+// w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2
+static vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4,
+ vec_uint32_t minus_8, vec_uint32_t minus_16,
+ vec_uint32_t minus_28, vec_uint32_t minus_32,
+ vec_uint32_t k) {
+ const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8);
+ const vec_uint32_t k_2_bits = vec_splat_u32(2);
+ const vec_uint32_t w =
+ vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits);
+ vec_st(w + k, 0, pre_added);
+ return w;
+}
+
+// As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified
+// to the code in F_00_19. Wei attributes these optimisations to Peter
+// Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define
+// F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) I've just become aware of another
+// tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a
+#define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d))
+#define F_20_39(b, c, d) ((b) ^ (c) ^ (d))
+#define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d)))
+#define F_60_79(b, c, d) F_20_39(b, c, d)
+
+// We pre-added the K constants during message scheduling.
+#define BODY_00_19(i, a, b, c, d, e, f) \
+ do { \
+ (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \
+ (b) = rotate((b), 30); \
+ } while (0)
+
+#define BODY_20_39(i, a, b, c, d, e, f) \
+ do { \
+ (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \
+ (b) = rotate((b), 30); \
+ } while (0)
+
+#define BODY_40_59(i, a, b, c, d, e, f) \
+ do { \
+ (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \
+ (b) = rotate((b), 30); \
+ } while (0)
+
+#define BODY_60_79(i, a, b, c, d, e, f) \
+ do { \
+ (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \
+ (b) = rotate((b), 30); \
+ } while (0)
+
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *data, size_t num) {
+ uint32_t A, B, C, D, E, T;
+
+ A = state[0];
+ B = state[1];
+ C = state[2];
+ D = state[3];
+ E = state[4];
+
+ for (;;) {
+ vec_uint32_t vw[20];
+ const uint32_t *w = (const uint32_t *)&vw;
+
+ vec_uint32_t k = K_00_19_x_4;
+ const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k);
+ BODY_00_19(0, A, B, C, D, E, T);
+ BODY_00_19(1, T, A, B, C, D, E);
+ BODY_00_19(2, E, T, A, B, C, D);
+ BODY_00_19(3, D, E, T, A, B, C);
+
+ const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k);
+ BODY_00_19(4, C, D, E, T, A, B);
+ BODY_00_19(5, B, C, D, E, T, A);
+ BODY_00_19(6, A, B, C, D, E, T);
+ BODY_00_19(7, T, A, B, C, D, E);
+
+ const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k);
+ BODY_00_19(8, E, T, A, B, C, D);
+ BODY_00_19(9, D, E, T, A, B, C);
+ BODY_00_19(10, C, D, E, T, A, B);
+ BODY_00_19(11, B, C, D, E, T, A);
+
+ const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k);
+ BODY_00_19(12, A, B, C, D, E, T);
+ BODY_00_19(13, T, A, B, C, D, E);
+ BODY_00_19(14, E, T, A, B, C, D);
+ BODY_00_19(15, D, E, T, A, B, C);
+
+ const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k);
+ BODY_00_19(16, C, D, E, T, A, B);
+ BODY_00_19(17, B, C, D, E, T, A);
+ BODY_00_19(18, A, B, C, D, E, T);
+ BODY_00_19(19, T, A, B, C, D, E);
+
+ k = K_20_39_x_4;
+ const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k);
+ BODY_20_39(20, E, T, A, B, C, D);
+ BODY_20_39(21, D, E, T, A, B, C);
+ BODY_20_39(22, C, D, E, T, A, B);
+ BODY_20_39(23, B, C, D, E, T, A);
+
+ const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k);
+ BODY_20_39(24, A, B, C, D, E, T);
+ BODY_20_39(25, T, A, B, C, D, E);
+ BODY_20_39(26, E, T, A, B, C, D);
+ BODY_20_39(27, D, E, T, A, B, C);
+
+ const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k);
+ BODY_20_39(28, C, D, E, T, A, B);
+ BODY_20_39(29, B, C, D, E, T, A);
+ BODY_20_39(30, A, B, C, D, E, T);
+ BODY_20_39(31, T, A, B, C, D, E);
+
+ const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k);
+ BODY_20_39(32, E, T, A, B, C, D);
+ BODY_20_39(33, D, E, T, A, B, C);
+ BODY_20_39(34, C, D, E, T, A, B);
+ BODY_20_39(35, B, C, D, E, T, A);
+
+ const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k);
+ BODY_20_39(36, A, B, C, D, E, T);
+ BODY_20_39(37, T, A, B, C, D, E);
+ BODY_20_39(38, E, T, A, B, C, D);
+ BODY_20_39(39, D, E, T, A, B, C);
+
+ k = K_40_59_x_4;
+ const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k);
+ BODY_40_59(40, C, D, E, T, A, B);
+ BODY_40_59(41, B, C, D, E, T, A);
+ BODY_40_59(42, A, B, C, D, E, T);
+ BODY_40_59(43, T, A, B, C, D, E);
+
+ const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k);
+ BODY_40_59(44, E, T, A, B, C, D);
+ BODY_40_59(45, D, E, T, A, B, C);
+ BODY_40_59(46, C, D, E, T, A, B);
+ BODY_40_59(47, B, C, D, E, T, A);
+
+ const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k);
+ BODY_40_59(48, A, B, C, D, E, T);
+ BODY_40_59(49, T, A, B, C, D, E);
+ BODY_40_59(50, E, T, A, B, C, D);
+ BODY_40_59(51, D, E, T, A, B, C);
+
+ const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k);
+ BODY_40_59(52, C, D, E, T, A, B);
+ BODY_40_59(53, B, C, D, E, T, A);
+ BODY_40_59(54, A, B, C, D, E, T);
+ BODY_40_59(55, T, A, B, C, D, E);
+
+ const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k);
+ BODY_40_59(56, E, T, A, B, C, D);
+ BODY_40_59(57, D, E, T, A, B, C);
+ BODY_40_59(58, C, D, E, T, A, B);
+ BODY_40_59(59, B, C, D, E, T, A);
+
+ k = K_60_79_x_4;
+ const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k);
+ BODY_60_79(60, A, B, C, D, E, T);
+ BODY_60_79(61, T, A, B, C, D, E);
+ BODY_60_79(62, E, T, A, B, C, D);
+ BODY_60_79(63, D, E, T, A, B, C);
+
+ const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k);
+ BODY_60_79(64, C, D, E, T, A, B);
+ BODY_60_79(65, B, C, D, E, T, A);
+ BODY_60_79(66, A, B, C, D, E, T);
+ BODY_60_79(67, T, A, B, C, D, E);
+
+ const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k);
+ BODY_60_79(68, E, T, A, B, C, D);
+ BODY_60_79(69, D, E, T, A, B, C);
+ BODY_60_79(70, C, D, E, T, A, B);
+ BODY_60_79(71, B, C, D, E, T, A);
+
+ const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k);
+ BODY_60_79(72, A, B, C, D, E, T);
+ BODY_60_79(73, T, A, B, C, D, E);
+ BODY_60_79(74, E, T, A, B, C, D);
+ BODY_60_79(75, D, E, T, A, B, C);
+
+ // We don't use the last value
+ (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k);
+ BODY_60_79(76, C, D, E, T, A, B);
+ BODY_60_79(77, B, C, D, E, T, A);
+ BODY_60_79(78, A, B, C, D, E, T);
+ BODY_60_79(79, T, A, B, C, D, E);
+
+ const uint32_t mask = 0xffffffffUL;
+ state[0] = (state[0] + E) & mask;
+ state[1] = (state[1] + T) & mask;
+ state[2] = (state[2] + A) & mask;
+ state[3] = (state[3] + B) & mask;
+ state[4] = (state[4] + C) & mask;
+
+ data += 64;
+ if (--num == 0) {
+ break;
+ }
+
+ A = state[0];
+ B = state[1];
+ C = state[2];
+ D = state[3];
+ E = state[4];
+ }
+}
+
+#endif // OPENSSL_PPC64LE
+
+#undef K_00_19
+#undef K_20_39
+#undef K_40_59
+#undef K_60_79
+#undef F_00_19
+#undef F_20_39
+#undef F_40_59
+#undef F_60_79
+#undef BODY_00_19
+#undef BODY_20_39
+#undef BODY_40_59
+#undef BODY_60_79
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/internal.h
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/internal.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/internal.h
@@ -181,7 +181,7 @@ extern "C" {
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \
- defined(OPENSSL_AARCH64)
+ defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
// OPENSSL_cpuid_setup initializes the platform-specific feature cache.
void OPENSSL_cpuid_setup(void);
#endif
8 months ago
@@ -1638,6 +1638,16 @@ OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA51
#endif // OPENSSL_ARM || OPENSSL_AARCH64
+#if defined(OPENSSL_PPC64LE)
+
+// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports
+// the Vector.AES category of instructions.
+int CRYPTO_is_PPC64LE_vcrypto_capable(void);
+
+extern unsigned long OPENSSL_ppc64le_hwcap2;
+
+#endif // OPENSSL_PPC64LE
+
#if defined(BORINGSSL_DISPATCH_TEST)
// Runtime CPU dispatch testing support
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/perlasm/ppc-xlate.pl
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/perlasm/ppc-xlate.pl
@@ -0,0 +1,320 @@
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+my %GLOBALS;
+my %TYPES;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $type = sub {
+ my ($dir,$name,$type) = @_;
+
+ $TYPES{$name} = $type;
+ if ($flavour =~ /linux/) {
+ $name =~ s|^\.||;
+ ".type $name,$type";
+ } else {
+ "";
+ }
+};
+my $globl = sub {
+ my $junk = shift;
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $type = \$TYPES{$name};
+ my $ret;
+
+ $name =~ s|^\.||;
+
+ SWITCH: for ($flavour) {
+ /aix/ && do { if (!$$type) {
+ $$type = "\@function";
+ }
+ if ($$type =~ /function/) {
+ $name = ".$name";
+ }
+ last;
+ };
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+ /linux.*(32|64le)/
+ && do { $ret .= ".globl $name";
+ if (!$$type) {
+ $ret .= "\n.type $name,\@function";
+ $$type = "\@function";
+ }
+ last;
+ };
+ /linux.*64/ && do { $ret .= ".globl $name";
+ if (!$$type) {
+ $ret .= "\n.type $name,\@function";
+ $$type = "\@function";
+ }
+ if ($$type =~ /function/) {
+ $ret .= "\n.section \".opd\",\"aw\"";
+ $ret .= "\n.align 3";
+ $ret .= "\n$name:";
+ $ret .= "\n.quad .$name,.TOC.\@tocbase,0";
+ $ret .= "\n.previous";
+ $name = ".$name";
+ }
+ last;
+ };
+ }
+
+ $ret = ".globl $name" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $text = sub {
+ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
+ $ret;
+};
+my $machine = sub {
+ my $junk = shift;
+ my $arch = shift;
+ if ($flavour =~ /osx/)
+ { $arch =~ s/\"//g;
+ $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
+ }
+ ".machine $arch";
+};
+my $size = sub {
+ if ($flavour =~ /linux/)
+ { shift;
+ my $name = shift;
+ my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name;
+ my $ret = ".size $$real,.-$$real";
+ $name =~ s|^\.||;
+ if ($$real ne $name) {
+ $ret .= "\n.size $name,.-$$real";
+ }
+ $ret;
+ }
+ else
+ { ""; }
+};
+my $asciz = sub {
+ shift;
+ my $line = join(",",@_);
+ if ($line =~ /^"(.*)"$/)
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
+ else
+ { ""; }
+};
+my $quad = sub {
+ shift;
+ my @ret;
+ my ($hi,$lo);
+ for (@_) {
+ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
+ elsif (/^([0-9]+)$/o)
+ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
+ else
+ { $hi=undef; $lo=$_; }
+
+ if (defined($hi))
+ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
+ else
+ { push(@ret,".quad $lo"); }
+ }
+ join("\n",@ret);
+};
+
+################################################################
+# simplified mnemonics not handled by at least one assembler
+################################################################
+my $cmplw = sub {
+ my $f = shift;
+ my $cr = 0; $cr = shift if ($#_>1);
+ # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
+ ($flavour =~ /linux.*32/) ?
+ " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
+ " cmplw ".join(',',$cr,@_);
+};
+my $bdnz = sub {
+ my $f = shift;
+ my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
+ " bc $bo,0,".shift;
+} if ($flavour!~/linux/);
+my $bltlr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
+ " bclr $bo,0";
+};
+my $bnelr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+my $beqlr = sub {
+ my $f = shift;
+ my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
+# arguments is 64, with "operand out of range" error.
+my $extrdi = sub {
+ my ($f,$ra,$rs,$n,$b) = @_;
+ $b = ($b+$n)&63; $n = 64-$n;
+ " rldicl $ra,$rs,$b,$n";
+};
+my $vmr = sub {
+ my ($f,$vx,$vy) = @_;
+ " vor $vx,$vy,$vy";
+};
+
+# Some ABIs specify vrsave, special-purpose register #256, as reserved
+# for system use.
+my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $mtspr = sub {
+ my ($f,$idx,$ra) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " or $ra,$ra,$ra";
+ } else {
+ " mtspr $idx,$ra";
+ }
+};
+my $mfspr = sub {
+ my ($f,$rd,$idx) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " li $rd,-1";
+ } else {
+ " mfspr $rd,$idx";
+ }
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+ my ($f, $vrt, $ra, $rb, $op) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
+my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
+my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
+my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
+my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
+my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+ my ($f, $vrt, $vra, $vrb, $op) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+my $vcipher = sub { vcrypto_op(@_, 1288); };
+my $vcipherlast = sub { vcrypto_op(@_, 1289); };
+my $vncipher = sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb = sub { vcrypto_op(@_, 1032); };
+my $vpmsumd = sub { vcrypto_op(@_, 1224); };
+my $vpmsubh = sub { vcrypto_op(@_, 1096); };
+my $vpmsumw = sub { vcrypto_op(@_, 1160); };
+my $vaddudm = sub { vcrypto_op(@_, 192); };
+
+my $mtsle = sub {
+ my ($f, $arg) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
+
+# PowerISA 3.0 stuff
+my $maddhdu = sub {
+ my ($f, $rt, $ra, $rb, $rc) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49;
+};
+my $maddld = sub {
+ my ($f, $rt, $ra, $rb, $rc) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51;
+};
+
+my $darn = sub {
+ my ($f, $rt, $l) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1);
+};
+
+print <<___;
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__) && defined(__ELF__)
+___
+
+while($line=<>) {
+
+ $line =~ s|[#!;].*$||; # get rid of asm-style comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|\.L(\w+)|L$1|g; # common denominator for Locallabel
+ $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+ if ($label) {
+ my $xlated = ($GLOBALS{$label} or $label);
+ print "$xlated:";
+ if ($flavour =~ /linux.*64le/) {
+ if ($TYPES{$label} =~ /function/) {
+ printf "\n.localentry %s,0\n",$xlated;
+ }
+ }
+ }
+ }
+
+ {
+ $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+ my $c = $1; $c = "\t" if ($c eq "");
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+print <<___;
+#endif // !OPENSSL_NO_ASM && __powerpc64__ && __ELF__
+#if defined(__ELF__)
+// See https://www.airs.com/blog/archives/518.
+.section .note.GNU-stack,"",\%progbits
+#endif
+___
+
+close STDOUT or die "error closing STDOUT: $!";
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/test/abi_test.h
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/test/abi_test.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/test/abi_test.h
@@ -179,7 +179,78 @@ struct alignas(16) Reg128 {
CALLER_STATE_REGISTER(uint64_t, x28) \
CALLER_STATE_REGISTER(uint64_t, x29)
-#endif // X86_64 || X86 || ARM || AARCH64
+#elif defined(OPENSSL_PPC64LE)
+
+// CRReg only compares the CR2-CR4 bits of a CR register.
+struct CRReg {
+ uint32_t masked() const { return value & 0x00fff000; }
+ bool operator==(CRReg r) const { return masked() == r.masked(); }
+ bool operator!=(CRReg r) const { return masked() != r.masked(); }
+ uint32_t value;
+};
+
+// References:
+// ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+//
+// Note vector and floating-point registers on POWER have two different names.
+// Originally, there were 32 floating-point registers and 32 vector registers,
+// labelled f0-f31 and v0-v31 respectively. Later, VSX (Vector Scalar Extension)
+// unified them into 64 registers vs0-vs63. f0-f31 map to the lower halves of
+// vs0-vs31. v0-v31 map to vs32-vs63. The ABI was defined in terms of pre-VSX
+// names, so we use those names here. In particular, f14-f31 are
+// callee-saved, but the upper halves of vs14-vs31 are not.
+#define LOOP_CALLER_STATE_REGISTERS() \
+ CALLER_STATE_REGISTER(Reg128, v20) \
+ CALLER_STATE_REGISTER(Reg128, v21) \
+ CALLER_STATE_REGISTER(Reg128, v22) \
+ CALLER_STATE_REGISTER(Reg128, v23) \
+ CALLER_STATE_REGISTER(Reg128, v24) \
+ CALLER_STATE_REGISTER(Reg128, v25) \
+ CALLER_STATE_REGISTER(Reg128, v26) \
+ CALLER_STATE_REGISTER(Reg128, v27) \
+ CALLER_STATE_REGISTER(Reg128, v28) \
+ CALLER_STATE_REGISTER(Reg128, v29) \
+ CALLER_STATE_REGISTER(Reg128, v30) \
+ CALLER_STATE_REGISTER(Reg128, v31) \
+ CALLER_STATE_REGISTER(uint64_t, r14) \
+ CALLER_STATE_REGISTER(uint64_t, r15) \
+ CALLER_STATE_REGISTER(uint64_t, r16) \
+ CALLER_STATE_REGISTER(uint64_t, r17) \
+ CALLER_STATE_REGISTER(uint64_t, r18) \
+ CALLER_STATE_REGISTER(uint64_t, r19) \
+ CALLER_STATE_REGISTER(uint64_t, r20) \
+ CALLER_STATE_REGISTER(uint64_t, r21) \
+ CALLER_STATE_REGISTER(uint64_t, r22) \
+ CALLER_STATE_REGISTER(uint64_t, r23) \
+ CALLER_STATE_REGISTER(uint64_t, r24) \
+ CALLER_STATE_REGISTER(uint64_t, r25) \
+ CALLER_STATE_REGISTER(uint64_t, r26) \
+ CALLER_STATE_REGISTER(uint64_t, r27) \
+ CALLER_STATE_REGISTER(uint64_t, r28) \
+ CALLER_STATE_REGISTER(uint64_t, r29) \
+ CALLER_STATE_REGISTER(uint64_t, r30) \
+ CALLER_STATE_REGISTER(uint64_t, r31) \
+ CALLER_STATE_REGISTER(uint64_t, f14) \
+ CALLER_STATE_REGISTER(uint64_t, f15) \
+ CALLER_STATE_REGISTER(uint64_t, f16) \
+ CALLER_STATE_REGISTER(uint64_t, f17) \
+ CALLER_STATE_REGISTER(uint64_t, f18) \
+ CALLER_STATE_REGISTER(uint64_t, f19) \
+ CALLER_STATE_REGISTER(uint64_t, f20) \
+ CALLER_STATE_REGISTER(uint64_t, f21) \
+ CALLER_STATE_REGISTER(uint64_t, f22) \
+ CALLER_STATE_REGISTER(uint64_t, f23) \
+ CALLER_STATE_REGISTER(uint64_t, f24) \
+ CALLER_STATE_REGISTER(uint64_t, f25) \
+ CALLER_STATE_REGISTER(uint64_t, f26) \
+ CALLER_STATE_REGISTER(uint64_t, f27) \
+ CALLER_STATE_REGISTER(uint64_t, f28) \
+ CALLER_STATE_REGISTER(uint64_t, f29) \
+ CALLER_STATE_REGISTER(uint64_t, f30) \
+ CALLER_STATE_REGISTER(uint64_t, f31) \
+ CALLER_STATE_REGISTER(CRReg, cr)
+
+#endif // X86_64 || X86 || ARM || AARCH64 || PPC64LE
// Enable ABI testing if all of the following are true.
//
@@ -231,6 +302,12 @@ inline crypto_word_t ToWord(T t) {
// on 32-bit architectures for simplicity.
static_assert(sizeof(T) == 4, "parameter types must be word-sized");
return (crypto_word_t)t;
+#elif defined(OPENSSL_PPC64LE)
+ // ELFv2, section 2.2.2.3 says the parameter save area sign- or zero-extends
+ // parameters passed in memory. Section 2.2.3 is unclear on how to handle
+ // register parameters, but section 2.2.2.3 additionally says that the memory
+ // copy of a parameter is identical to the register one.
+ return (crypto_word_t)t;
#elif defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)
// AAPCS64, section 5.4.2, clauses C.7 and C.14 says any remaining bits in
// aarch are unspecified. iOS64 contradicts this and says the callee extends
@@ -285,9 +362,9 @@ inline crypto_word_t ToWord(T t) {
template <typename R, typename... Args>
inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...),
typename DeductionGuard<Args>::Type... args) {
- // We only support up to 8 arguments, so all arguments on aarch64 are passed
- // in registers. This is simpler and avoids the iOS discrepancy around packing
- // small arguments on the stack. (See the iOS64 reference.)
+ // We only support up to 8 arguments, so all arguments on aarch64 and ppc64le
+ // are passed in registers. This is simpler and avoids the iOS discrepancy
+ // around packing small arguments on the stack. (See the iOS64 reference.)
static_assert(sizeof...(args) <= 8,
"too many arguments for abi_test_trampoline");
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/test/asm/trampoline-ppc.pl
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/test/asm/trampoline-ppc.pl
@@ -0,0 +1,262 @@
+#!/usr/bin/env perl
+# Copyright (c) 2019, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on ppc64le. See
+# that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections or equivalent is used.
+#
+# References:
+#
+# ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+
+use strict;
+
+my $flavour = shift;
+my $output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+my $xlate;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT = *OUT;
+
+unless ($flavour =~ /linux.*64le/) {
+ die "This file only supports the ELFv2 ABI, used by ppc64le";
+}
+
+my $code = "";
+
+sub load_or_store_regs {
+ # $op is "l" or "st".
+ my ($op, $base_reg, $base_offset) = @_;
+ # Vector registers.
+ foreach (20..31) {
+ my $offset = $base_offset + ($_ - 20) * 16;
+ # Vector registers only support indexed register addressing.
+ $code .= "\tli\tr11, $offset\n";
+ $code .= "\t${op}vx\tv$_, r11, $base_reg\n";
+ }
+ # Save general registers.
+ foreach (14..31) {
+ my $offset = $base_offset + 192 + ($_ - 14) * 8;
+ $code .= "\t${op}d\tr$_, $offset($base_reg)\n";
+ }
+ # Save floating point registers.
+ foreach (14..31) {
+ my $offset = $base_offset + 336 + ($_ - 14) * 8;
+ $code .= "\t${op}fd\tf$_, $offset($base_reg)\n";
+ }
+}
+
+sub load_regs {
+ my ($base_reg, $base_offset) = @_;
+ load_or_store_regs("l", $base_reg, $base_offset);
+}
+
+sub store_regs {
+ my ($base_reg, $base_offset) = @_;
+ load_or_store_regs("st", $base_reg, $base_offset);
+}
+
+my ($func, $state, $argv, $argc) = ("r3", "r4", "r5", "r6");
+$code .= <<____;
+.machine "any"
+.text
+
+# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+# with |argv|, then saves the callee-saved registers into |state|. It returns
+# the result of |func|. The |unwind| argument is unused.
+# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
+# const uint64_t *argv, size_t argc,
+# uint64_t unwind);
+.globl abi_test_trampoline
+.align 5
+abi_test_trampoline:
+ # LR is saved into the caller's stack frame.
+ mflr r0
+ std r0, 16(r1)
+
+ # Allocate 66*8 = 528 bytes of stack frame. From the top of the stack
+ # to the bottom, the stack frame is:
+ #
+ # 0(r1) - Back chain pointer
+ # 8(r1) - CR save area
+ # 16(r1) - LR save area (for |func|)
+ # 24(r1) - TOC pointer save area
+ # 32(r1) - Saved copy of |state|
+ # 40(r1) - Padding
+ # 48(r1) - Vector register save area (v20-v31, 12 registers)
+ # 240(r1) - General register save area (r14-r31, 18 registers)
+ # 384(r1) - Floating point register save area (f14-f31, 18 registers)
+ #
+ # Note the layouts of the register save areas and CallerState match.
+ #
+ # In the ELFv2 ABI, the parameter save area is optional if the function
+ # is non-variadic and all parameters fit in registers. We only support
+ # such functions, so we omit it to test that |func| does not rely on it.
+ stdu r1, -528(r1)
+
+ mfcr r0
+ std r0, 8(r1) # Save CR
+ std r2, 24(r1) # Save TOC
+ std $state, 32(r1) # Save |state|
+____
+# Save registers to the stack.
+store_regs("r1", 48);
+# Load registers from the caller.
+load_regs($state, 0);
+$code .= <<____;
+ # Load CR from |state|.
+ ld r0, 480($state)
+ mtcr r0
+
+ # Move parameters into temporary registers so they are not clobbered.
+ addi r11, $argv, -8 # Adjust for ldu below
+ mr r12, $func
+
+ # Load parameters into registers.
+ cmpdi $argc, 0
+ beq .Largs_done
+ mtctr $argc
+ ldu r3, 8(r11)
+ bdz .Largs_done
+ ldu r4, 8(r11)
+ bdz .Largs_done
+ ldu r5, 8(r11)
+ bdz .Largs_done
+ ldu r6, 8(r11)
+ bdz .Largs_done
+ ldu r7, 8(r11)
+ bdz .Largs_done
+ ldu r8, 8(r11)
+ bdz .Largs_done
+ ldu r9, 8(r11)
+ bdz .Largs_done
+ ldu r10, 8(r11)
+
+.Largs_done:
+ li r2, 0 # Clear TOC to test |func|'s global entry point
+ mtctr r12
+ bctrl
+ ld r2, 24(r1) # Restore TOC
+
+ ld $state, 32(r1) # Reload |state|
+____
+# Output resulting registers to the caller.
+store_regs($state, 0);
+# Restore registers from the stack.
+load_regs("r1", 48);
+$code .= <<____;
+ mfcr r0
+ std r0, 480($state) # Output CR to caller
+ ld r0, 8(r1)
+ mtcrf 0b00111000, r0 # Restore CR2-CR4
+ addi r1, r1, 528
+ ld r0, 16(r1) # Restore LR
+ mtlr r0
+ blr
+.size abi_test_trampoline,.-abi_test_trampoline
+____
+
+# abi_test_clobber_* clobbers the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach (0..31) {
+ # r1 is the stack pointer. r13 is the thread pointer.
+ next if ($_ == 1 || $_ == 13);
+ $code .= <<____;
+.globl abi_test_clobber_r$_
+.align 5
+abi_test_clobber_r$_:
+ li r$_, 0
+ blr
+.size abi_test_clobber_r$_,.-abi_test_clobber_r$_
+____
+}
+
+foreach (0..31) {
+ $code .= <<____;
+.globl abi_test_clobber_f$_
+.align 4
+abi_test_clobber_f$_:
+ li r0, 0
+ # Use the red zone.
+ std r0, -8(r1)
+ lfd f$_, -8(r1)
+ blr
+.size abi_test_clobber_f$_,.-abi_test_clobber_f$_
+____
+}
+
+foreach (0..31) {
+ $code .= <<____;
+.globl abi_test_clobber_v$_
+.align 4
+abi_test_clobber_v$_:
+ vxor v$_, v$_, v$_
+ blr
+.size abi_test_clobber_v$_,.-abi_test_clobber_v$_
+____
+}
+
+foreach (0..7) {
+ # PPC orders CR fields in big-endian, so the mask is reversed from what one
+ # would expect.
+ my $mask = 1 << (7 - $_);
+ $code .= <<____;
+.globl abi_test_clobber_cr$_
+.align 4
+abi_test_clobber_cr$_:
+ # Flip the bits on cr$_ rather than setting to zero. With a four-bit
+ # register, zeroing it will do nothing 1 in 16 times.
+ mfcr r0
+ not r0, r0
+ mtcrf $mask, r0
+ blr
+.size abi_test_clobber_cr$_,.-abi_test_clobber_cr$_
+____
+}
+
+$code .= <<____;
+.globl abi_test_clobber_ctr
+.align 4
+abi_test_clobber_ctr:
+ li r0, 0
+ mtctr r0
+ blr
+.size abi_test_clobber_ctr,.-abi_test_clobber_ctr
+
+.globl abi_test_clobber_lr
+.align 4
+abi_test_clobber_lr:
+ mflr r0
+ mtctr r0
+ li r0, 0
+ mtlr r0
+ bctr
+.size abi_test_clobber_lr,.-abi_test_clobber_lr
+
+____
+
+print $code;
+close STDOUT or die "error closing STDOUT: $!";
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/include/openssl/target.h
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/include/openssl/target.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/include/openssl/target.h
@@ -34,6 +34,9 @@
#elif defined(__ARMEL__) || defined(_M_ARM)
#define OPENSSL_32_BIT
#define OPENSSL_ARM
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && defined(_LITTLE_ENDIAN)
+#define OPENSSL_64_BIT
+#define OPENSSL_PPC64LE
#elif defined(__MIPSEL__) && !defined(__LP64__)
#define OPENSSL_32_BIT
#define OPENSSL_MIPS
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
@@ -37,6 +37,8 @@ int main(int argc, char **argv) {
puts("ARM (32-bit)");
#elif defined(OPENSSL_AARCH64)
puts("aarch64 (64-bit)");
+#elif defined(OPENSSL_PPC64LE)
+ puts("PPC64LE (64-bit)");
#else
#error "FIPS build not supported on this architecture"
#endif
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate.go
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/fipstools/delocate/delocate.go
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate.go
@@ -54,7 +54,8 @@ type stringWriter interface {
type processorType int
const (
- x86_64 processorType = iota + 1
+ ppc64le processorType = iota + 1
+ x86_64
aarch64
)
@@ -67,6 +68,8 @@ type delocation struct {
// symbols is the set of symbols defined in the module.
symbols map[string]struct{}
+ // localEntrySymbols is the set of symbols with .localentry directives.
+ localEntrySymbols map[string]struct{}
// redirectors maps from out-call symbol name to the name of a
// redirector function for that symbol. E.g. “memcpy” ->
// “bcm_redirector_memcpy”.
@@ -75,6 +78,9 @@ type delocation struct {
// should be used to reference it. E.g. “P384_data_storage” ->
// “P384_data_storage”.
bssAccessorsNeeded map[string]string
+ // tocLoaders is a set of symbol names for which TOC helper functions
+ // are required. (ppc64le only.)
+ tocLoaders map[string]struct{}
// gotExternalsNeeded is a set of symbol names for which we need
// “delta” symbols: symbols that contain the offset from their location
// to the memory in question.
@@ -151,6 +157,8 @@ func (d *delocation) processInput(input
switch d.processor {
case x86_64:
statement, err = d.processIntelInstruction(statement, node.up)
+ case ppc64le:
+ statement, err = d.processPPCInstruction(statement, node.up)
case aarch64:
statement, err = d.processAarch64Instruction(statement, node.up)
default:
@@ -247,7 +255,7 @@ func (d *delocation) processDirective(st
d.writeNode(statement)
break
- case ".debug", ".note":
+ case ".debug", ".note", ".toc":
d.writeNode(statement)
break
@@ -336,6 +344,10 @@ func (d *delocation) processLabelContain
d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
}
+ if name == ".localentry" {
+ d.output.WriteString(localEntryName(args[0]) + ":\n")
+ }
+
return statement, nil
}
@@ -646,6 +658,191 @@ func (d *delocation) processAarch64Instr
return statement, nil
}
+/* ppc64le
+
+[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st,
+ 2017
+
+(Also useful is “Power ISA Version 2.07 B”. Note that version three of that
+document is /not/ good as that's POWER9 specific.)
+
+ppc64le doesn't have IP-relative addressing and does a lot to work around this.
+Rather than reference a PLT and GOT direction, it has a single structure called
+the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
+.got, .plt, .bss, etc sections [PABI;3.3].
+
+A pointer to the TOC is maintained in r2 and the following pattern is used to
+load the address of an element into a register:
+
+ addis <address register>, 2, foo@toc@ha
+ addi <address register>, <address register>, foo@toc@l
+
+The “addis” instruction shifts a signed constant left 16 bits and adds the
+result to its second argument, saving the result in the first argument. The
+“addi” instruction does the same, but without shifting. Thus the “@toc@ha"
+suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means
+“the bottom 16 bits of the offset”. However, note that both values are signed,
+thus offsets in the top half of a 64KB chunk will have an @ha value that's one
+greater than expected and a negative @l value.
+
+The TOC is specific to a “module” (basically an executable or shared object).
+This means that there's not a single TOC in a process and that r2 needs to
+change as control moves between modules. Thus functions have two entry points:
+the “global” entry point and the “local” entry point. Jumps from within the
+same module can use the local entry while jumps from other modules must use the
+global entry. The global entry establishes the correct value of r2 before
+running the function and the local entry skips that code.
+
+The global entry point for a function is defined by its label. The local entry
+is a power-of-two number of bytes from the global entry, set by the
+“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset
+of 1 or 2 bytes is treated as an offset of zero.)
+
+In order to help the global entry code set r2 to point to the local TOC, r12 is
+set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
+the global entry will typically use an addis+addi pair to add a known offset to
+r12 and store it in r2. For example:
+
+foo:
+ addis 2, 12, .TOC. - foo@ha
+ addi 2, 2, .TOC. - foo@l
+
+(It's worth noting that the '@' operator binds very loosely, so the 3rd
+arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
+
+When calling a function, the compiler doesn't know whether that function is in
+the same module or not. Thus it doesn't know whether r12 needs to be set nor
+whether r2 will be clobbered on return. Rather than always assume the worst,
+the linker fixes stuff up once it knows that a call is going out of module:
+
+Firstly, calling, say, memcpy (which we assume to be in a different module)
+won't actually jump directly to memcpy, or even a PLT resolution function.
+It'll call a synthesised function that:
+ a) saves r2 in the caller's stack frame
+ b) loads the address of memcpy@PLT into r12
+ c) jumps to r12.
+
+As this synthesised function loads memcpy@PLT, a call to memcpy from the
+compiled code just references “memcpy” directly, not “memcpy@PLT”.
+
+Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
+calls must be followed by a nop. If the call ends up going out-of-module, the
+linker will rewrite that nop to load r2 from the stack.
+
+Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
+red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
+followed as called functions will write into their parent's stack frame. For
+example, the synthesised out-of-module trampolines will save r2 24 bytes into
+the caller's frame and all non-leaf functions save the return address 16 bytes
+into the caller's frame.
+
+A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
+result in zero and all writes are discarded. POWER does something a little like
+that, but r0 is only special in certain argument positions for certain
+instructions. You just have to read the manual to know which they are.
+
+
+Delocation is easier than Intel because there's just TOC references, but it's
+also harder because there's no IP-relative addressing.
+
+Jumps are IP-relative however, and have a 24-bit immediate value. So we can
+jump to functions that set a register to the needed value. (r3 is the
+return-value register and so that's what is generally used here.) */
+
+// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
+// source to relative and writing the result to target.
+func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
+ instruction := skipWS(statement.up).up
+ assertNodeType(instruction, ruleInstructionName)
+ name1 := d.contents(instruction)
+ args1 := instructionArgs(instruction.next)
+
+ statement = statement.next
+ instruction = skipWS(statement.up).up
+ assertNodeType(instruction, ruleInstructionName)
+ name2 := d.contents(instruction)
+ args2 := instructionArgs(instruction.next)
+
+ if name1 != "addis" ||
+ len(args1) != 3 ||
+ name2 != "addi" ||
+ len(args2) != 3 {
+ return "", "", "", false
+ }
+
+ target = d.contents(args1[0])
+ relative = d.contents(args1[1])
+ source1 := d.contents(args1[2])
+ source2 := d.contents(args2[2])
+
+ if !strings.HasSuffix(source1, "@ha") ||
+ !strings.HasSuffix(source2, "@l") ||
+ source1[:len(source1)-3] != source2[:len(source2)-2] ||
+ d.contents(args2[0]) != target ||
+ d.contents(args2[1]) != target {
+ return "", "", "", false
+ }
+
+ source = source1[:len(source1)-3]
+ ok = true
+ return
+}
+
+// establishTOC writes the global entry prelude for a function. The standard
+// prelude involves relocations so this version moves the relocation outside
+// the integrity-checked area.
+func establishTOC(w stringWriter) {
+ w.WriteString("999:\n")
+ w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
+ w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
+ w.WriteString("\tld 12, 0(2)\n")
+ w.WriteString("\tadd 2, 2, 12\n")
+}
+
+// loadTOCFuncName returns the name of a synthesized function that sets r3 to
+// the value of “symbol+offset”.
+func loadTOCFuncName(symbol, offset string) string {
+ symbol = strings.Replace(symbol, ".", "_dot_", -1)
+ ret := ".Lbcm_loadtoc_" + symbol
+ if len(offset) != 0 {
+ offset = strings.Replace(offset, "+", "_plus_", -1)
+ offset = strings.Replace(offset, "-", "_minus_", -1)
+ ret += "_" + offset
+ }
+ return ret
+}
+
+func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
+ d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
+
+ return func(k func()) {
+ w.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
+ w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
+ w.WriteString("\tstd " + dest + ", -8(1)\n")
+ // The TOC loader will use r3, so stash it if necessary.
+ if dest != "3" {
+ w.WriteString("\tstd 3, -16(1)\n")
+ }
+
+ // Because loadTOCFuncName returns a “.L” name, we don't need a
+ // nop after this call.
+ w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
+
+ // Cycle registers around. We need r3 -> destReg, -8(1) ->
+ // lr and, optionally, -16(1) -> r3.
+ w.WriteString("\tstd 3, -24(1)\n")
+ w.WriteString("\tld 3, -8(1)\n")
+ w.WriteString("\tmtlr 3\n")
+ w.WriteString("\tld " + dest + ", -24(1)\n")
+ if dest != "3" {
+ w.WriteString("\tld 3, -16(1)\n")
+ }
+ w.WriteString("\taddi 1, 1, 288\n")
+
+ k()
+ }
+}
+
func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
for symRef != nil && symRef.pegRule == ruleOffset {
offset := d.contents(symRef)
@@ -700,6 +897,215 @@ func (d *delocation) parseMemRef(memRef
return
}
+func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
+ assertNodeType(instruction, ruleInstructionName)
+ instructionName := d.contents(instruction)
+ isBranch := instructionName[0] == 'b'
+
+ argNodes := instructionArgs(instruction.next)
+
+ var wrappers wrapperStack
+ var args []string
+ changed := false
+
+Args:
+ for i, arg := range argNodes {
+ fullArg := arg
+ isIndirect := false
+
+ if arg.pegRule == ruleIndirectionIndicator {
+ arg = arg.next
+ isIndirect = true
+ }
+
+ switch arg.pegRule {
+ case ruleRegisterOrConstant, ruleLocalLabelRef:
+ args = append(args, d.contents(fullArg))
+
+ case ruleTOCRefLow:
+ return nil, errors.New("Found low TOC reference outside preamble pattern")
+
+ case ruleTOCRefHigh:
+ target, _, relative, ok := d.isPPC64LEAPair(statement)
+ if !ok {
+ return nil, errors.New("Found high TOC reference outside preamble pattern")
+ }
+
+ if relative != "12" {
+ return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
+ }
+
+ if target != "2" {
+ return nil, fmt.Errorf("preamble is setting %q, not r2", target)
+ }
+
+ statement = statement.next
+ establishTOC(d.output)
+ instructionName = ""
+ changed = true
+ break Args
+
+ case ruleMemoryRef:
+ symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
+ changed = didChange
+
+ if len(symbol) > 0 {
+ if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
+ symbol = localEntryName(symbol)
+ changed = true
+ } else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
+ symbol = localTargetName(symbol)
+ changed = true
+ } else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
+ changed = true
+ d.redirectors[symbol] = redirectorName(symbol)
+ symbol = redirectorName(symbol)
+ // TODO(davidben): This should sanity-check the next
+ // instruction is a nop and ideally remove it.
+ wrappers = append(wrappers, func(k func()) {
+ k()
+ // Like the linker's PLT stubs, redirector functions
+ // expect callers to restore r2.
+ d.output.WriteString("\tld 2, 24(1)\n")
+ })
+ }
+ }
+
+ switch section {
+ case "":
+
+ case "tls":
+ // This section identifier just tells the
+ // assembler to use r13, the pointer to the
+ // thread-local data [PABI;3.7.3.3].
+
+ case "toc@ha":
+ // Delete toc@ha instructions. Per
+ // [PABI;3.6.3], the linker is allowed to erase
+ // toc@ha instructions. We take advantage of
+ // this by unconditionally erasing the toc@ha
+ // instructions and doing the full lookup when
+ // processing toc@l.
+ //
+ // Note that any offset here applies before @ha
+ // and @l. That is, 42+foo@toc@ha is
+ // #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
+ // corresponding toc@l references are required
+ // by the ABI to have the same offset. The
+ // offset will be incorporated in full when
+ // those are processed.
+ if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
+ return nil, errors.New("can't process toc@ha reference")
+ }
+ changed = true
+ instructionName = ""
+ break Args
+
+ case "toc@l":
+ // Per [PAB;3.6.3], this instruction must take
+ // as input a register which was the output of
+ // a toc@ha computation and compute the actual
+ // address of some symbol. The toc@ha
+ // computation was elided, so we ignore that
+ // input register and compute the address
+ // directly.
+ changed = true
+
+ // For all supported toc@l instructions, the
+ // destination register is the first argument.
+ destReg := args[0]
+
+ wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
+ switch instructionName {
+ case "addi":
+ // The original instruction was:
+ // addi destReg, tocHaReg, offset+symbol@toc@l
+ instructionName = ""
+
+ case "ld", "lhz", "lwz":
+ // The original instruction was:
+ // l?? destReg, offset+symbol@toc@l(tocHaReg)
+ //
+ // We transform that into the
+ // equivalent dereference of destReg:
+ // l?? destReg, 0(destReg)
+ origInstructionName := instructionName
+ instructionName = ""
+
+ assertNodeType(memRef, ruleBaseIndexScale)
+ assertNodeType(memRef.up, ruleRegisterOrConstant)
+ if memRef.next != nil || memRef.up.next != nil {
+ return nil, errors.New("expected single register in BaseIndexScale for ld argument")
+ }
+
+ baseReg := destReg
+ if baseReg == "0" {
+ // Register zero is special as the base register for a load.
+ // Avoid it by spilling and using r3 instead.
+ baseReg = "3"
+ wrappers = append(wrappers, func(k func()) {
+ d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
+ d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
+ d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
+ k()
+ d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
+ d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
+ })
+ }
+
+ wrappers = append(wrappers, func(k func()) {
+ d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
+ })
+ default:
+ return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
+ }
+
+ default:
+ return nil, fmt.Errorf("Unknown section type %q", section)
+ }
+
+ argStr := ""
+ if isIndirect {
+ argStr += "*"
+ }
+ argStr += symbol
+ if len(offset) > 0 {
+ argStr += offset
+ }
+ if len(section) > 0 {
+ argStr += "@"
+ argStr += section
+ }
+
+ for ; memRef != nil; memRef = memRef.next {
+ argStr += d.contents(memRef)
+ }
+
+ args = append(args, argStr)
+
+ default:
+ panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
+ }
+ }
+
+ if changed {
+ d.writeCommentedNode(statement)
+
+ var replacement string
+ if len(instructionName) > 0 {
+ replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
+ }
+
+ wrappers.do(func() {
+ d.output.WriteString(replacement)
+ })
+ } else {
+ d.writeNode(statement)
+ }
+
+ return statement, nil
+}
+
/* Intel */
type instructionType int
@@ -1323,6 +1729,8 @@ func writeAarch64Function(w stringWriter
func transform(w stringWriter, inputs []inputFile) error {
// symbols contains all defined symbols.
symbols := make(map[string]struct{})
+ // localEntrySymbols contains all symbols with a .localentry directive.
+ localEntrySymbols := make(map[string]struct{})
// fileNumbers is the set of IDs seen in .file directives.
fileNumbers := make(map[int]struct{})
// maxObservedFileNumber contains the largest seen file number in a
@@ -1346,6 +1754,25 @@ func transform(w stringWriter, inputs []
}, ruleStatement, ruleLabel, ruleSymbolName)
forEachPath(input.ast.up, func(node *node32) {
+ node = node.up
+ assertNodeType(node, ruleLabelContainingDirectiveName)
+ directive := input.contents[node.begin:node.end]
+ if directive != ".localentry" {
+ return
+ }
+ // Extract the first argument.
+ node = skipWS(node.next)
+ assertNodeType(node, ruleSymbolArgs)
+ node = node.up
+ assertNodeType(node, ruleSymbolArg)
+ symbol := input.contents[node.begin:node.end]
+ if _, ok := localEntrySymbols[symbol]; ok {
+ panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
+ }
+ localEntrySymbols[symbol] = struct{}{}
+ }, ruleStatement, ruleLabelContainingDirective)
+
+ forEachPath(input.ast.up, func(node *node32) {
assertNodeType(node, ruleLocationDirective)
directive := input.contents[node.begin:node.end]
if !strings.HasPrefix(directive, ".file") {
@@ -1393,11 +1820,13 @@ func transform(w stringWriter, inputs []
d := &delocation{
symbols: symbols,
+ localEntrySymbols: localEntrySymbols,
processor: processor,
commentIndicator: commentIndicator,
output: w,
redirectors: make(map[string]string),
bssAccessorsNeeded: make(map[string]string),
+ tocLoaders: make(map[string]struct{}),
gotExternalsNeeded: make(map[string]struct{}),
gotOffsetsNeeded: make(map[string]struct{}),
gotOffOffsetsNeeded: make(map[string]struct{}),
@@ -1432,6 +1861,22 @@ func transform(w stringWriter, inputs []
for _, name := range redirectorNames {
redirector := d.redirectors[name]
switch d.processor {
+ case ppc64le:
+ w.WriteString(".section \".toc\", \"aw\"\n")
+ w.WriteString(".Lredirector_toc_" + name + ":\n")
+ w.WriteString(".quad " + name + "\n")
+ w.WriteString(".text\n")
+ w.WriteString(".type " + redirector + ", @function\n")
+ w.WriteString(redirector + ":\n")
+ // |name| will clobber r2, so save it. This is matched by a restore in
+ // redirector calls.
+ w.WriteString("\tstd 2, 24(1)\n")
+ // Load and call |name|'s global entry point.
+ w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
+ w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
+ w.WriteString("\tmtctr 12\n")
+ w.WriteString("\tbctr\n")
+
case aarch64:
writeAarch64Function(w, redirector, func(w stringWriter) {
w.WriteString("\tb " + name + "\n")
@@ -1456,6 +1901,13 @@ func transform(w stringWriter, inputs []
target := d.bssAccessorsNeeded[name]
switch d.processor {
+ case ppc64le:
+ w.WriteString(".type " + funcName + ", @function\n")
+ w.WriteString(funcName + ":\n")
+ w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
+ w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
+ w.WriteString("\tblr\n")
+
case x86_64:
w.WriteString(".type " + funcName + ", @function\n")
w.WriteString(funcName + ":\n")
@@ -1471,6 +1923,26 @@ func transform(w stringWriter, inputs []
}
switch d.processor {
+ case ppc64le:
+ loadTOCNames := sortedSet(d.tocLoaders)
+ for _, symbolAndOffset := range loadTOCNames {
+ parts := strings.SplitN(symbolAndOffset, "\x00", 2)
+ symbol, offset := parts[0], parts[1]
+
+ funcName := loadTOCFuncName(symbol, offset)
+ ref := symbol + offset
+
+ w.WriteString(".type " + funcName[2:] + ", @function\n")
+ w.WriteString(funcName[2:] + ":\n")
+ w.WriteString(funcName + ":\n")
+ w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
+ w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
+ w.WriteString("\tblr\n")
+ }
+
+ w.WriteString(".LBORINGSSL_external_toc:\n")
+ w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
+
case aarch64:
externalNames := sortedSet(d.gotExternalsNeeded)
for _, symbol := range externalNames {
@@ -1781,6 +2253,10 @@ func localTargetName(name string) string
return ".L" + name + "_local_target"
}
+func localEntryName(name string) string {
+ return ".L" + name + "_local_entry"
+}
+
func isSynthesized(symbol string) bool {
return strings.HasSuffix(symbol, "_bss_get") ||
symbol == "OPENSSL_ia32cap_get" ||
@@ -1836,6 +2312,8 @@ func detectProcessor(input inputFile) pr
switch instructionName {
case "movq", "call", "leaq":
return x86_64
+ case "addis", "addi", "mflr":
+ return ppc64le
case "str", "bl", "ldr", "st1":
return aarch64
}
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
@@ -12,7 +12,7 @@
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-# This is a rough parser for x86-64 and aarch64 assembly designed to work with
+# This is a rough parser for x86-64 and ppc64le assembly designed to work with
# https://github.com/pointlander/peg. delocate.go has a go:generate line for
# rebuilding delocate.peg.go from this file.
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
@@ -39,6 +39,11 @@ func (test *delocateTest) Path(file stri
var delocateTests = []delocateTest{
{"generic-FileDirectives", []string{"in.s"}, "out.s"},
+ {"ppc64le-GlobalEntry", []string{"in.s"}, "out.s"},
+ {"ppc64le-LoadToR0", []string{"in.s"}, "out.s"},
+ {"ppc64le-Sample2", []string{"in.s"}, "out.s"},
+ {"ppc64le-Sample", []string{"in.s"}, "out.s"},
+ {"ppc64le-TOCWithOffset", []string{"in.s"}, "out.s"},
{"x86_64-Basic", []string{"in.s"}, "out.s"},
{"x86_64-BSS", []string{"in.s"}, "out.s"},
{"x86_64-GOTRewrite", []string{"in.s"}, "out.s"},
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/in.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/in.s
@@ -0,0 +1,9 @@
+ .text
+foo:
+.LCF0:
+0:
+ addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry foo,.-foo
+.LVL0:
+ bl
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/out.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/out.s
@@ -0,0 +1,62 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+ .text
+.Lfoo_local_target:
+foo:
+.LCF0:
+
+0:
+
+999:
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
+ ld 12, 0(2)
+ add 2, 2, 12
+# WAS addi 2,2,.TOC.-.LCF0@l
+ .localentry foo,.-foo
+.Lfoo_local_entry:
+.LVL0:
+
+ bl
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/in.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/in.s
@@ -0,0 +1,4 @@
+ .text
+foo:
+ addis 22,2,bar@toc@ha
+ ld 0,bar@toc@l(22)
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/out.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/out.s
@@ -0,0 +1,72 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+ .text
+.Lfoo_local_target:
+foo:
+# WAS addis 22,2,bar@toc@ha
+# WAS ld 0,bar@toc@l(22)
+ addi 1, 1, -288
+ mflr 0
+ std 0, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc_bar
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 0, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ addi 1, 1, -288
+ std 3, -8(1)
+ mr 3, 0
+ ld 0, 0(3)
+ ld 3, -8(1)
+ addi 1, 1, 288
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.type bcm_loadtoc_bar, @function
+bcm_loadtoc_bar:
+.Lbcm_loadtoc_bar:
+ addis 3, 2, bar@toc@ha
+ addi 3, 3, bar@toc@l
+ blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/in.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/in.s
@@ -0,0 +1,161 @@
+ .file "foo.c"
+ .abiversion 2
+ .section ".toc","aw"
+ .section ".text"
+ .section .rodata
+ .align 3
+ .type kString, @object
+ .size kString, 12
+kString:
+ .string "hello world"
+ .globl kExportedString
+ .align 3
+ .type kExportedString, @object
+ .size kExportedString, 26
+kExportedString:
+ .string "hello world, more visibly"
+ .align 2
+ .type kGiantArray, @object
+ .size kGiantArray, 400000
+kGiantArray:
+ .long 1
+ .long 0
+ .zero 399992
+ .lcomm bss,20,4
+ .type bss, @object
+ .align 3
+.LC1:
+ .string "kString is %p\n"
+ .align 3
+.LC2:
+ .string "kExportedString is %p\n"
+ .align 3
+.LC4:
+ .string "function is %p\n"
+ .align 3
+.LC5:
+ .string "exported_function is %p\n"
+ .align 3
+.LC7:
+ .string "&kString[5] is %p\n"
+ .align 3
+.LC9:
+ .string "&kGiantArray[0x12345] is %p\n"
+ .section ".toc","aw"
+.LC0:
+ .quad stderr
+.LC3:
+ .quad kExportedString
+.LC6:
+ .quad exported_function
+.LC8:
+ .quad kString+5
+.LC10:
+ .quad kGiantArray+298260
+ .section ".text"
+ .align 2
+ .type function, @function
+function:
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry function,.-function
+ mflr 0
+ std 0,16(1)
+ std 31,-8(1)
+ stdu 1,-112(1)
+ mr 31,1
+ addis 10,2,.LC0@toc@ha
+ ld 9,.LC0@toc@l(10)
+ ld 9,0(9)
+ mr 3,9
+ addis 4,2,.LC1@toc@ha
+ addi 4,4,.LC1@toc@l
+ addis 5,2,kString@toc@ha
+ addi 5,5,kString@toc@l
+ bl fprintf
+ nop
+ addis 10,2,.LC0@toc@ha
+ ld 9,.LC0@toc@l(10)
+ ld 9,0(9)
+ mr 3,9
+ addis 4,2,.LC2@toc@ha
+ addi 4,4,.LC2@toc@l
+ addis 9,2,.LC3@toc@ha
+ ld 5,.LC3@toc@l(9)
+ bl fprintf
+ nop
+ addis 10,2,.LC0@toc@ha
+ ld 9,.LC0@toc@l(10)
+ ld 9,0(9)
+ mr 3,9
+ addis 4,2,.LC4@toc@ha
+ addi 4,4,.LC4@toc@l
+ addis 5,2,function@toc@ha
+ addi 5,5,function@toc@l
+ bl fprintf
+ nop
+ addis 10,2,.LC0@toc@ha
+ ld 9,.LC0@toc@l(10)
+ ld 9,0(9)
+ mr 3,9
+ addis 4,2,.LC5@toc@ha
+ addi 4,4,.LC5@toc@l
+ addis 9,2,.LC6@toc@ha
+ ld 5,.LC6@toc@l(9)
+ bl fprintf
+ nop
+ addis 10,2,.LC0@toc@ha
+ ld 9,.LC0@toc@l(10)
+ ld 9,0(9)
+ mr 3,9
+ addis 4,2,.LC7@toc@ha
+ addi 4,4,.LC7@toc@l
+ addis 9,2,.LC8@toc@ha
+ ld 5,.LC8@toc@l(9)
+ bl fprintf
+ nop
+ addis 10,2,.LC0@toc@ha
+ ld 9,.LC0@toc@l(10)
+ ld 9,0(9)
+ mr 3,9
+ addis 4,2,.LC9@toc@ha
+ addi 4,4,.LC9@toc@l
+ addis 9,2,.LC10@toc@ha
+ ld 5,.LC10@toc@l(9)
+ bl fprintf
+ nop
+ bl exported_function
+ nop
+ mr 3,9
+ addi 1,31,112
+ ld 0,16(1)
+ mtlr 0
+ ld 31,-8(1)
+ blr
+ .long 0
+ .byte 0,0,0,1,128,1,0,1
+ .size function,.-function
+ .align 2
+ .globl exported_function
+ .type exported_function, @function
+exported_function:
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry exported_function,.-exported_function
+ mflr 0
+ std 0,16(1)
+ std 31,-8(1)
+ stdu 1,-48(1)
+ mr 31,1
+ bl function
+ mr 3,9
+ addi 1,31,48
+ ld 0,16(1)
+ mtlr 0
+ ld 31,-8(1)
+ blr
+ .long 0
+ .byte 0,0,0,1,128,1,0,1
+ .size exported_function,.-exported_function
+ .ident "GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
+ .section .note.GNU-stack,"",@progbits
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/out.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/out.s
@@ -0,0 +1,552 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+ .file "foo.c"
+ .abiversion 2
+ .section ".toc","aw"
+# WAS .section ".text"
+.text
+# WAS .section .rodata
+.text
+ .align 3
+ .type kString, @object
+ .size kString, 12
+.LkString_local_target:
+kString:
+ .string "hello world"
+ .globl kExportedString
+ .align 3
+ .type kExportedString, @object
+ .size kExportedString, 26
+.LkExportedString_local_target:
+kExportedString:
+ .string "hello world, more visibly"
+ .align 2
+ .type kGiantArray, @object
+ .size kGiantArray, 400000
+.LkGiantArray_local_target:
+kGiantArray:
+ .long 1
+ .long 0
+ .zero 399992
+ .lcomm bss,20,4
+ .type bss, @object
+ .align 3
+.LC1:
+
+ .string "kString is %p\n"
+ .align 3
+.LC2:
+
+ .string "kExportedString is %p\n"
+ .align 3
+.LC4:
+
+ .string "function is %p\n"
+ .align 3
+.LC5:
+
+ .string "exported_function is %p\n"
+ .align 3
+.LC7:
+
+ .string "&kString[5] is %p\n"
+ .align 3
+.LC9:
+
+ .string "&kGiantArray[0x12345] is %p\n"
+ .section ".toc","aw"
+.LC0:
+
+ .quad stderr
+.LC3:
+
+ .quad kExportedString
+.LC6:
+
+ .quad exported_function
+.LC8:
+
+ .quad kString+5
+.LC10:
+
+ .quad kGiantArray+298260
+# WAS .section ".text"
+.text
+ .align 2
+ .type function, @function
+.Lfunction_local_target:
+function:
+0:
+999:
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
+ ld 12, 0(2)
+ add 2, 2, 12
+# WAS addi 2,2,.TOC.-0b@l
+ .localentry function,.-function
+.Lfunction_local_entry:
+ mflr 0
+ std 0,16(1)
+ std 31,-8(1)
+ stdu 1,-112(1)
+ mr 31,1
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+ addi 1, 1, -288
+ mflr 9
+ std 9, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 9, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 9, 0(9)
+ ld 9,0(9)
+ mr 3,9
+# WAS addis 4,2,.LC1@toc@ha
+# WAS addi 4,4,.LC1@toc@l
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC1
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addis 5,2,kString@toc@ha
+# WAS addi 5,5,kString@toc@l
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LkString_local_target
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS bl fprintf
+ bl bcm_redirector_fprintf
+ ld 2, 24(1)
+ nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+ addi 1, 1, -288
+ mflr 9
+ std 9, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 9, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 9, 0(9)
+ ld 9,0(9)
+ mr 3,9
+# WAS addis 4,2,.LC2@toc@ha
+# WAS addi 4,4,.LC2@toc@l
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC2
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addis 9,2,.LC3@toc@ha
+# WAS ld 5,.LC3@toc@l(9)
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC3
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 5, 0(5)
+# WAS bl fprintf
+ bl bcm_redirector_fprintf
+ ld 2, 24(1)
+ nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+ addi 1, 1, -288
+ mflr 9
+ std 9, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 9, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 9, 0(9)
+ ld 9,0(9)
+ mr 3,9
+# WAS addis 4,2,.LC4@toc@ha
+# WAS addi 4,4,.LC4@toc@l
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC4
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addis 5,2,function@toc@ha
+# WAS addi 5,5,function@toc@l
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_Lfunction_local_target
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS bl fprintf
+ bl bcm_redirector_fprintf
+ ld 2, 24(1)
+ nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+ addi 1, 1, -288
+ mflr 9
+ std 9, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 9, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 9, 0(9)
+ ld 9,0(9)
+ mr 3,9
+# WAS addis 4,2,.LC5@toc@ha
+# WAS addi 4,4,.LC5@toc@l
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC5
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addis 9,2,.LC6@toc@ha
+# WAS ld 5,.LC6@toc@l(9)
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC6
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 5, 0(5)
+# WAS bl fprintf
+ bl bcm_redirector_fprintf
+ ld 2, 24(1)
+ nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+ addi 1, 1, -288
+ mflr 9
+ std 9, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 9, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 9, 0(9)
+ ld 9,0(9)
+ mr 3,9
+# WAS addis 4,2,.LC7@toc@ha
+# WAS addi 4,4,.LC7@toc@l
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC7
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addis 9,2,.LC8@toc@ha
+# WAS ld 5,.LC8@toc@l(9)
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC8
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 5, 0(5)
+# WAS bl fprintf
+ bl bcm_redirector_fprintf
+ ld 2, 24(1)
+ nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+ addi 1, 1, -288
+ mflr 9
+ std 9, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 9, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 9, 0(9)
+ ld 9,0(9)
+ mr 3,9
+# WAS addis 4,2,.LC9@toc@ha
+# WAS addi 4,4,.LC9@toc@l
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC9
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addis 9,2,.LC10@toc@ha
+# WAS ld 5,.LC10@toc@l(9)
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC10
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 5, 0(5)
+# WAS bl fprintf
+ bl bcm_redirector_fprintf
+ ld 2, 24(1)
+ nop
+# WAS bl exported_function
+ bl .Lexported_function_local_entry
+ nop
+ mr 3,9
+ addi 1,31,112
+ ld 0,16(1)
+ mtlr 0
+ ld 31,-8(1)
+ blr
+ .long 0
+ .byte 0,0,0,1,128,1,0,1
+ .size function,.-function
+ .align 2
+ .globl exported_function
+ .type exported_function, @function
+.Lexported_function_local_target:
+exported_function:
+0:
+999:
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
+ ld 12, 0(2)
+ add 2, 2, 12
+# WAS addi 2,2,.TOC.-0b@l
+ .localentry exported_function,.-exported_function
+.Lexported_function_local_entry:
+ mflr 0
+ std 0,16(1)
+ std 31,-8(1)
+ stdu 1,-48(1)
+ mr 31,1
+# WAS bl function
+ bl .Lfunction_local_entry
+ mr 3,9
+ addi 1,31,48
+ ld 0,16(1)
+ mtlr 0
+ ld 31,-8(1)
+ blr
+ .long 0
+ .byte 0,0,0,1,128,1,0,1
+ .size exported_function,.-exported_function
+ .ident "GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
+ .section .note.GNU-stack,"",@progbits
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.section ".toc", "aw"
+.Lredirector_toc_fprintf:
+.quad fprintf
+.text
+.type bcm_redirector_fprintf, @function
+bcm_redirector_fprintf:
+ std 2, 24(1)
+ addis 12, 2, .Lredirector_toc_fprintf@toc@ha
+ ld 12, .Lredirector_toc_fprintf@toc@l(12)
+ mtctr 12
+ bctr
+.type bss_bss_get, @function
+bss_bss_get:
+ addis 3, 2, bss@toc@ha
+ addi 3, 3, bss@toc@l
+ blr
+.type bcm_loadtoc__dot_LC0, @function
+bcm_loadtoc__dot_LC0:
+.Lbcm_loadtoc__dot_LC0:
+ addis 3, 2, .LC0@toc@ha
+ addi 3, 3, .LC0@toc@l
+ blr
+.type bcm_loadtoc__dot_LC1, @function
+bcm_loadtoc__dot_LC1:
+.Lbcm_loadtoc__dot_LC1:
+ addis 3, 2, .LC1@toc@ha
+ addi 3, 3, .LC1@toc@l
+ blr
+.type bcm_loadtoc__dot_LC10, @function
+bcm_loadtoc__dot_LC10:
+.Lbcm_loadtoc__dot_LC10:
+ addis 3, 2, .LC10@toc@ha
+ addi 3, 3, .LC10@toc@l
+ blr
+.type bcm_loadtoc__dot_LC2, @function
+bcm_loadtoc__dot_LC2:
+.Lbcm_loadtoc__dot_LC2:
+ addis 3, 2, .LC2@toc@ha
+ addi 3, 3, .LC2@toc@l
+ blr
+.type bcm_loadtoc__dot_LC3, @function
+bcm_loadtoc__dot_LC3:
+.Lbcm_loadtoc__dot_LC3:
+ addis 3, 2, .LC3@toc@ha
+ addi 3, 3, .LC3@toc@l
+ blr
+.type bcm_loadtoc__dot_LC4, @function
+bcm_loadtoc__dot_LC4:
+.Lbcm_loadtoc__dot_LC4:
+ addis 3, 2, .LC4@toc@ha
+ addi 3, 3, .LC4@toc@l
+ blr
+.type bcm_loadtoc__dot_LC5, @function
+bcm_loadtoc__dot_LC5:
+.Lbcm_loadtoc__dot_LC5:
+ addis 3, 2, .LC5@toc@ha
+ addi 3, 3, .LC5@toc@l
+ blr
+.type bcm_loadtoc__dot_LC6, @function
+bcm_loadtoc__dot_LC6:
+.Lbcm_loadtoc__dot_LC6:
+ addis 3, 2, .LC6@toc@ha
+ addi 3, 3, .LC6@toc@l
+ blr
+.type bcm_loadtoc__dot_LC7, @function
+bcm_loadtoc__dot_LC7:
+.Lbcm_loadtoc__dot_LC7:
+ addis 3, 2, .LC7@toc@ha
+ addi 3, 3, .LC7@toc@l
+ blr
+.type bcm_loadtoc__dot_LC8, @function
+bcm_loadtoc__dot_LC8:
+.Lbcm_loadtoc__dot_LC8:
+ addis 3, 2, .LC8@toc@ha
+ addi 3, 3, .LC8@toc@l
+ blr
+.type bcm_loadtoc__dot_LC9, @function
+bcm_loadtoc__dot_LC9:
+.Lbcm_loadtoc__dot_LC9:
+ addis 3, 2, .LC9@toc@ha
+ addi 3, 3, .LC9@toc@l
+ blr
+.type bcm_loadtoc__dot_Lfunction_local_target, @function
+bcm_loadtoc__dot_Lfunction_local_target:
+.Lbcm_loadtoc__dot_Lfunction_local_target:
+ addis 3, 2, .Lfunction_local_target@toc@ha
+ addi 3, 3, .Lfunction_local_target@toc@l
+ blr
+.type bcm_loadtoc__dot_LkString_local_target, @function
+bcm_loadtoc__dot_LkString_local_target:
+.Lbcm_loadtoc__dot_LkString_local_target:
+ addis 3, 2, .LkString_local_target@toc@ha
+ addi 3, 3, .LkString_local_target@toc@l
+ blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/in.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/in.s
@@ -0,0 +1,226 @@
+ .file "foo.c"
+ .abiversion 2
+ .section ".toc","aw"
+ .section ".text"
+ .section ".toc","aw"
+.LC0:
+ .quad stderr
+.LC3:
+ .quad kExportedString
+.LC6:
+ .quad exported_function
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl exported_function
+ .type exported_function, @function
+exported_function:
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry exported_function,.-exported_function
+ mflr 0
+ std 19,-104(1)
+ std 20,-96(1)
+ std 21,-88(1)
+ std 22,-80(1)
+ addis 21,2,.LC1@toc@ha
+ addis 22,2,.LC2@toc@ha
+ std 23,-72(1)
+ std 24,-64(1)
+ addis 23,2,.LC4@toc@ha
+ addis 24,2,function@toc@ha
+ std 25,-56(1)
+ std 26,-48(1)
+ addis 25,2,.LC5@toc@ha
+ addis 26,2,.LC7@toc@ha
+ std 27,-40(1)
+ std 28,-32(1)
+ addis 28,2,.LC8@toc@ha
+ addi 21,21,.LC1@toc@l
+ std 29,-24(1)
+ std 30,-16(1)
+ addis 29,2,.LANCHOR0@toc@ha
+ addi 22,22,.LC2@toc@l
+ std 31,-8(1)
+ std 0,16(1)
+ addi 29,29,.LANCHOR0@toc@l
+ addi 23,23,.LC4@toc@l
+ stdu 1,-208(1)
+ addis 31,2,.LC0@toc@ha # gpr load fusion, type long
+ ld 31,.LC0@toc@l(31)
+ addis 19,2,.LC3@toc@ha # gpr load fusion, type long
+ ld 19,.LC3@toc@l(19)
+ addis 30,29,0x5
+ addi 24,24,function@toc@l
+ addis 20,2,.LC6@toc@ha # gpr load fusion, type long
+ ld 20,.LC6@toc@l(20)
+ addi 25,25,.LC5@toc@l
+ addi 26,26,.LC7@toc@l
+ addi 27,29,5
+ addi 28,28,.LC8@toc@l
+ addi 30,30,-29404
+ .p2align 4,,15
+.L2:
+ ld 3,0(31)
+ mr 5,21
+ mr 6,29
+ li 4,1
+ bl __fprintf_chk
+ nop
+ ld 3,0(31)
+ mr 5,22
+ mr 6,19
+ li 4,1
+ bl __fprintf_chk
+ nop
+ ld 3,0(31)
+ mr 5,23
+ mr 6,24
+ li 4,1
+ bl __fprintf_chk
+ nop
+ ld 3,0(31)
+ mr 5,25
+ mr 6,20
+ li 4,1
+ bl __fprintf_chk
+ nop
+ ld 3,0(31)
+ mr 5,26
+ mr 6,27
+ li 4,1
+ bl __fprintf_chk
+ nop
+ ld 3,0(31)
+ li 4,1
+ mr 5,28
+ mr 6,30
+ bl __fprintf_chk
+ nop
+ b .L2
+ .long 0
+ .byte 0,0,0,1,128,13,0,0
+ .size exported_function,.-exported_function
+ .section ".toc","aw"
+ .set .LC11,.LC0
+ .set .LC12,.LC3
+ .set .LC13,.LC6
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .type function, @function
+function:
+0: addis 2,12,.TOC.-0b@ha
+ addi 2,2,.TOC.-0b@l
+ .localentry function,.-function
+ mflr 0
+ std 31,-8(1)
+ addis 31,2,.LC11@toc@ha # gpr load fusion, type long
+ ld 31,.LC11@toc@l(31)
+ addis 5,2,.LC1@toc@ha
+ std 30,-16(1)
+ addis 30,2,.LANCHOR0@toc@ha
+ addi 5,5,.LC1@toc@l
+ addi 30,30,.LANCHOR0@toc@l
+ li 4,1
+ mr 6,30
+ std 0,16(1)
+ stdu 1,-112(1)
+ ld 3,0(31)
+ bl __fprintf_chk
+ nop
+ addis 6,2,.LC12@toc@ha # gpr load fusion, type long
+ ld 6,.LC12@toc@l(6)
+ ld 3,0(31)
+ addis 5,2,.LC2@toc@ha
+ li 4,1
+ addi 5,5,.LC2@toc@l
+ bl __fprintf_chk
+ nop
+ ld 3,0(31)
+ addis 5,2,.LC4@toc@ha
+ addis 6,2,function@toc@ha
+ addi 5,5,.LC4@toc@l
+ addi 6,6,function@toc@l
+ li 4,1
+ bl __fprintf_chk
+ nop
+ addis 6,2,.LC13@toc@ha # gpr load fusion, type long
+ ld 6,.LC13@toc@l(6)
+ ld 3,0(31)
+ addis 5,2,.LC5@toc@ha
+ li 4,1
+ addi 5,5,.LC5@toc@l
+ bl __fprintf_chk
+ nop
+ ld 3,0(31)
+ addis 5,2,.LC7@toc@ha
+ addi 6,30,5
+ addi 5,5,.LC7@toc@l
+ li 4,1
+ bl __fprintf_chk
+ nop
+ ld 3,0(31)
+ addis 6,30,0x5
+ addis 5,2,.LC8@toc@ha
+ li 4,1
+ addi 5,5,.LC8@toc@l
+ addi 6,6,-29404
+ bl __fprintf_chk
+ nop
+ bl exported_function
+ nop
+ addi 1,1,112
+ ld 0,16(1)
+ ld 30,-16(1)
+ ld 31,-8(1)
+ mtlr 0
+ blr
+ .long 0
+ .byte 0,0,0,1,128,2,0,0
+ .size function,.-function
+ .globl kExportedString
+ .section .rodata
+ .align 4
+ .set .LANCHOR0,. + 0
+ .type kString, @object
+ .size kString, 12
+kString:
+ .string "hello world"
+ .zero 4
+ .type kGiantArray, @object
+ .size kGiantArray, 400000
+kGiantArray:
+ .long 1
+ .long 0
+ .zero 399992
+ .type kExportedString, @object
+ .size kExportedString, 26
+kExportedString:
+ .string "hello world, more visibly"
+ .section .rodata.str1.8,"aMS",@progbits,1
+ .align 3
+.LC1:
+ .string "kString is %p\n"
+ .zero 1
+.LC2:
+ .string "kExportedString is %p\n"
+ .zero 1
+.LC4:
+ .string "function is %p\n"
+.LC5:
+ .string "exported_function is %p\n"
+ .zero 7
+.LC7:
+ .string "&kString[5] is %p\n"
+ .zero 5
+.LC8:
+ .string "&kGiantArray[0x12345] is %p\n"
+ .section ".bss"
+ .align 2
+ .type bss, @object
+ .size bss, 20
+bss:
+ .zero 20
+ .ident "GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
+ .section .note.GNU-stack,"",@progbits
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/out.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/out.s
@@ -0,0 +1,677 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+ .file "foo.c"
+ .abiversion 2
+ .section ".toc","aw"
+# WAS .section ".text"
+.text
+ .section ".toc","aw"
+.LC0:
+
+ .quad stderr
+.LC3:
+
+ .quad kExportedString
+.LC6:
+
+ .quad exported_function
+# WAS .section ".text"
+.text
+ .align 2
+ .p2align 4,,15
+ .globl exported_function
+ .type exported_function, @function
+.Lexported_function_local_target:
+exported_function:
+0:
+999:
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
+ ld 12, 0(2)
+ add 2, 2, 12
+# WAS addi 2,2,.TOC.-0b@l
+ .localentry exported_function,.-exported_function
+.Lexported_function_local_entry:
+ mflr 0
+ std 19,-104(1)
+ std 20,-96(1)
+ std 21,-88(1)
+ std 22,-80(1)
+# WAS addis 21,2,.LC1@toc@ha
+# WAS addis 22,2,.LC2@toc@ha
+ std 23,-72(1)
+ std 24,-64(1)
+# WAS addis 23,2,.LC4@toc@ha
+# WAS addis 24,2,function@toc@ha
+ std 25,-56(1)
+ std 26,-48(1)
+# WAS addis 25,2,.LC5@toc@ha
+# WAS addis 26,2,.LC7@toc@ha
+ std 27,-40(1)
+ std 28,-32(1)
+# WAS addis 28,2,.LC8@toc@ha
+# WAS addi 21,21,.LC1@toc@l
+ addi 1, 1, -288
+ mflr 21
+ std 21, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC1
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 21, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ std 29,-24(1)
+ std 30,-16(1)
+# WAS addis 29,2,.LANCHOR0@toc@ha
+# WAS addi 22,22,.LC2@toc@l
+ addi 1, 1, -288
+ mflr 22
+ std 22, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC2
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 22, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ std 31,-8(1)
+ std 0,16(1)
+# WAS addi 29,29,.LANCHOR0@toc@l
+ addi 1, 1, -288
+ mflr 29
+ std 29, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LANCHOR0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 29, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addi 23,23,.LC4@toc@l
+ addi 1, 1, -288
+ mflr 23
+ std 23, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC4
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 23, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ stdu 1,-208(1)
+# WAS addis 31,2,.LC0@toc@ha # gpr load fusion, type long
+# WAS ld 31,.LC0@toc@l(31)
+ addi 1, 1, -288
+ mflr 31
+ std 31, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 31, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 31, 0(31)
+# WAS addis 19,2,.LC3@toc@ha # gpr load fusion, type long
+# WAS ld 19,.LC3@toc@l(19)
+ addi 1, 1, -288
+ mflr 19
+ std 19, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC3
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 19, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 19, 0(19)
+ addis 30,29,0x5
+# WAS addi 24,24,function@toc@l
+ addi 1, 1, -288
+ mflr 24
+ std 24, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_Lfunction_local_target
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 24, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addis 20,2,.LC6@toc@ha # gpr load fusion, type long
+# WAS ld 20,.LC6@toc@l(20)
+ addi 1, 1, -288
+ mflr 20
+ std 20, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC6
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 20, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 20, 0(20)
+# WAS addi 25,25,.LC5@toc@l
+ addi 1, 1, -288
+ mflr 25
+ std 25, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC5
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 25, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addi 26,26,.LC7@toc@l
+ addi 1, 1, -288
+ mflr 26
+ std 26, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC7
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 26, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ addi 27,29,5
+# WAS addi 28,28,.LC8@toc@l
+ addi 1, 1, -288
+ mflr 28
+ std 28, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC8
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 28, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ addi 30,30,-29404
+ .p2align 4,,15
+.L2:
+
+ ld 3,0(31)
+ mr 5,21
+ mr 6,29
+ li 4,1
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ ld 3,0(31)
+ mr 5,22
+ mr 6,19
+ li 4,1
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ ld 3,0(31)
+ mr 5,23
+ mr 6,24
+ li 4,1
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ ld 3,0(31)
+ mr 5,25
+ mr 6,20
+ li 4,1
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ ld 3,0(31)
+ mr 5,26
+ mr 6,27
+ li 4,1
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ ld 3,0(31)
+ li 4,1
+ mr 5,28
+ mr 6,30
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ b .L2
+ .long 0
+ .byte 0,0,0,1,128,13,0,0
+ .size exported_function,.-exported_function
+ .section ".toc","aw"
+ .set .LC11,.LC0
+ .set .LC12,.LC3
+ .set .LC13,.LC6
+# WAS .section ".text"
+.text
+ .align 2
+ .p2align 4,,15
+ .type function, @function
+.Lfunction_local_target:
+function:
+0:
+999:
+ addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+ addi 2, 2, .LBORINGSSL_external_toc-999b@l
+ ld 12, 0(2)
+ add 2, 2, 12
+# WAS addi 2,2,.TOC.-0b@l
+ .localentry function,.-function
+.Lfunction_local_entry:
+ mflr 0
+ std 31,-8(1)
+# WAS addis 31,2,.LC11@toc@ha # gpr load fusion, type long
+# WAS ld 31,.LC11@toc@l(31)
+ addi 1, 1, -288
+ mflr 31
+ std 31, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC11
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 31, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 31, 0(31)
+# WAS addis 5,2,.LC1@toc@ha
+ std 30,-16(1)
+# WAS addis 30,2,.LANCHOR0@toc@ha
+# WAS addi 5,5,.LC1@toc@l
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC1
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addi 30,30,.LANCHOR0@toc@l
+ addi 1, 1, -288
+ mflr 30
+ std 30, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LANCHOR0
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 30, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ li 4,1
+ mr 6,30
+ std 0,16(1)
+ stdu 1,-112(1)
+ ld 3,0(31)
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+# WAS addis 6,2,.LC12@toc@ha # gpr load fusion, type long
+# WAS ld 6,.LC12@toc@l(6)
+ addi 1, 1, -288
+ mflr 6
+ std 6, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC12
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 6, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 6, 0(6)
+ ld 3,0(31)
+# WAS addis 5,2,.LC2@toc@ha
+ li 4,1
+# WAS addi 5,5,.LC2@toc@l
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC2
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ ld 3,0(31)
+# WAS addis 5,2,.LC4@toc@ha
+# WAS addis 6,2,function@toc@ha
+# WAS addi 5,5,.LC4@toc@l
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC4
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS addi 6,6,function@toc@l
+ addi 1, 1, -288
+ mflr 6
+ std 6, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_Lfunction_local_target
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 6, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ li 4,1
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+# WAS addis 6,2,.LC13@toc@ha # gpr load fusion, type long
+# WAS ld 6,.LC13@toc@l(6)
+ addi 1, 1, -288
+ mflr 6
+ std 6, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC13
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 6, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 6, 0(6)
+ ld 3,0(31)
+# WAS addis 5,2,.LC5@toc@ha
+ li 4,1
+# WAS addi 5,5,.LC5@toc@l
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC5
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ ld 3,0(31)
+# WAS addis 5,2,.LC7@toc@ha
+ addi 6,30,5
+# WAS addi 5,5,.LC7@toc@l
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC7
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ li 4,1
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+ ld 3,0(31)
+ addis 6,30,0x5
+# WAS addis 5,2,.LC8@toc@ha
+ li 4,1
+# WAS addi 5,5,.LC8@toc@l
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_LC8
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ addi 6,6,-29404
+# WAS bl __fprintf_chk
+ bl bcm_redirector___fprintf_chk
+ ld 2, 24(1)
+ nop
+# WAS bl exported_function
+ bl .Lexported_function_local_entry
+ nop
+ addi 1,1,112
+ ld 0,16(1)
+ ld 30,-16(1)
+ ld 31,-8(1)
+ mtlr 0
+ blr
+ .long 0
+ .byte 0,0,0,1,128,2,0,0
+ .size function,.-function
+ .globl kExportedString
+# WAS .section .rodata
+.text
+ .align 4
+ .set .LANCHOR0,. + 0
+ .type kString, @object
+ .size kString, 12
+.LkString_local_target:
+kString:
+ .string "hello world"
+ .zero 4
+ .type kGiantArray, @object
+ .size kGiantArray, 400000
+.LkGiantArray_local_target:
+kGiantArray:
+ .long 1
+ .long 0
+ .zero 399992
+ .type kExportedString, @object
+ .size kExportedString, 26
+.LkExportedString_local_target:
+kExportedString:
+ .string "hello world, more visibly"
+# WAS .section .rodata.str1.8,"aMS",@progbits,1
+.text
+ .align 3
+.LC1:
+
+ .string "kString is %p\n"
+ .zero 1
+.LC2:
+
+ .string "kExportedString is %p\n"
+ .zero 1
+.LC4:
+
+ .string "function is %p\n"
+.LC5:
+
+ .string "exported_function is %p\n"
+ .zero 7
+.LC7:
+
+ .string "&kString[5] is %p\n"
+ .zero 5
+.LC8:
+
+ .string "&kGiantArray[0x12345] is %p\n"
+ .section ".bss"
+ .align 2
+ .type bss, @object
+ .size bss, 20
+bss:
+.Lbss_local_target:
+
+ .zero 20
+ .ident "GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
+ .section .note.GNU-stack,"",@progbits
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.section ".toc", "aw"
+.Lredirector_toc___fprintf_chk:
+.quad __fprintf_chk
+.text
+.type bcm_redirector___fprintf_chk, @function
+bcm_redirector___fprintf_chk:
+ std 2, 24(1)
+ addis 12, 2, .Lredirector_toc___fprintf_chk@toc@ha
+ ld 12, .Lredirector_toc___fprintf_chk@toc@l(12)
+ mtctr 12
+ bctr
+.type bss_bss_get, @function
+bss_bss_get:
+ addis 3, 2, .Lbss_local_target@toc@ha
+ addi 3, 3, .Lbss_local_target@toc@l
+ blr
+.type bcm_loadtoc__dot_LANCHOR0, @function
+bcm_loadtoc__dot_LANCHOR0:
+.Lbcm_loadtoc__dot_LANCHOR0:
+ addis 3, 2, .LANCHOR0@toc@ha
+ addi 3, 3, .LANCHOR0@toc@l
+ blr
+.type bcm_loadtoc__dot_LC0, @function
+bcm_loadtoc__dot_LC0:
+.Lbcm_loadtoc__dot_LC0:
+ addis 3, 2, .LC0@toc@ha
+ addi 3, 3, .LC0@toc@l
+ blr
+.type bcm_loadtoc__dot_LC1, @function
+bcm_loadtoc__dot_LC1:
+.Lbcm_loadtoc__dot_LC1:
+ addis 3, 2, .LC1@toc@ha
+ addi 3, 3, .LC1@toc@l
+ blr
+.type bcm_loadtoc__dot_LC11, @function
+bcm_loadtoc__dot_LC11:
+.Lbcm_loadtoc__dot_LC11:
+ addis 3, 2, .LC11@toc@ha
+ addi 3, 3, .LC11@toc@l
+ blr
+.type bcm_loadtoc__dot_LC12, @function
+bcm_loadtoc__dot_LC12:
+.Lbcm_loadtoc__dot_LC12:
+ addis 3, 2, .LC12@toc@ha
+ addi 3, 3, .LC12@toc@l
+ blr
+.type bcm_loadtoc__dot_LC13, @function
+bcm_loadtoc__dot_LC13:
+.Lbcm_loadtoc__dot_LC13:
+ addis 3, 2, .LC13@toc@ha
+ addi 3, 3, .LC13@toc@l
+ blr
+.type bcm_loadtoc__dot_LC2, @function
+bcm_loadtoc__dot_LC2:
+.Lbcm_loadtoc__dot_LC2:
+ addis 3, 2, .LC2@toc@ha
+ addi 3, 3, .LC2@toc@l
+ blr
+.type bcm_loadtoc__dot_LC3, @function
+bcm_loadtoc__dot_LC3:
+.Lbcm_loadtoc__dot_LC3:
+ addis 3, 2, .LC3@toc@ha
+ addi 3, 3, .LC3@toc@l
+ blr
+.type bcm_loadtoc__dot_LC4, @function
+bcm_loadtoc__dot_LC4:
+.Lbcm_loadtoc__dot_LC4:
+ addis 3, 2, .LC4@toc@ha
+ addi 3, 3, .LC4@toc@l
+ blr
+.type bcm_loadtoc__dot_LC5, @function
+bcm_loadtoc__dot_LC5:
+.Lbcm_loadtoc__dot_LC5:
+ addis 3, 2, .LC5@toc@ha
+ addi 3, 3, .LC5@toc@l
+ blr
+.type bcm_loadtoc__dot_LC6, @function
+bcm_loadtoc__dot_LC6:
+.Lbcm_loadtoc__dot_LC6:
+ addis 3, 2, .LC6@toc@ha
+ addi 3, 3, .LC6@toc@l
+ blr
+.type bcm_loadtoc__dot_LC7, @function
+bcm_loadtoc__dot_LC7:
+.Lbcm_loadtoc__dot_LC7:
+ addis 3, 2, .LC7@toc@ha
+ addi 3, 3, .LC7@toc@l
+ blr
+.type bcm_loadtoc__dot_LC8, @function
+bcm_loadtoc__dot_LC8:
+.Lbcm_loadtoc__dot_LC8:
+ addis 3, 2, .LC8@toc@ha
+ addi 3, 3, .LC8@toc@l
+ blr
+.type bcm_loadtoc__dot_Lfunction_local_target, @function
+bcm_loadtoc__dot_Lfunction_local_target:
+.Lbcm_loadtoc__dot_Lfunction_local_target:
+ addis 3, 2, .Lfunction_local_target@toc@ha
+ addi 3, 3, .Lfunction_local_target@toc@l
+ blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/in.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/in.s
@@ -0,0 +1,23 @@
+ .text
+foo:
+ # TOC references may have offsets.
+ addis 3, 2, 5+foo@toc@ha
+ addi 3, 3, 10+foo@toc@l
+
+ addis 3, 2, 15+foo@toc@ha
+ addi 3, 3, 20+foo@toc@l
+
+ addis 4, 2, foo@toc@ha
+ addi 4, 4, foo@toc@l
+
+ addis 5, 2, 5+foo@toc@ha
+ ld 5, 10+foo@toc@l(5)
+
+ addis 4, 2, foo-10@toc@ha
+ addi 4, 4, foo-10@toc@l
+
+ addis 4, 2, foo@toc@ha+25
+ addi 4, 4, foo@toc@l+25
+
+ addis 4, 2, 1+foo-2@toc@ha+3
+ addi 4, 4, 1+foo-2@toc@l+3
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/out.s
===================================================================
--- /dev/null
8 months ago
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/out.s
@@ -0,0 +1,178 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+ .text
+.Lfoo_local_target:
+foo:
+ # TOC references may have offsets.
+# WAS addis 3, 2, 5+foo@toc@ha
+# WAS addi 3, 3, 10+foo@toc@l
+ addi 1, 1, -288
+ mflr 3
+ std 3, -8(1)
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_10
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 3, -24(1)
+ addi 1, 1, 288
+
+# WAS addis 3, 2, 15+foo@toc@ha
+# WAS addi 3, 3, 20+foo@toc@l
+ addi 1, 1, -288
+ mflr 3
+ std 3, -8(1)
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_20
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 3, -24(1)
+ addi 1, 1, 288
+
+# WAS addis 4, 2, foo@toc@ha
+# WAS addi 4, 4, foo@toc@l
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+
+# WAS addis 5, 2, 5+foo@toc@ha
+# WAS ld 5, 10+foo@toc@l(5)
+ addi 1, 1, -288
+ mflr 5
+ std 5, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_10
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 5, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+ ld 5, 0(5)
+
+# WAS addis 4, 2, foo-10@toc@ha
+# WAS addi 4, 4, foo-10@toc@l
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__minus_10
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+
+# WAS addis 4, 2, foo@toc@ha+25
+# WAS addi 4, 4, foo@toc@l+25
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_25
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+
+# WAS addis 4, 2, 1+foo-2@toc@ha+3
+# WAS addi 4, 4, 1+foo-2@toc@l+3
+ addi 1, 1, -288
+ mflr 4
+ std 4, -8(1)
+ std 3, -16(1)
+ bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3
+ std 3, -24(1)
+ ld 3, -8(1)
+ mtlr 3
+ ld 4, -24(1)
+ ld 3, -16(1)
+ addi 1, 1, 288
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.type bcm_loadtoc__dot_Lfoo_local_target, @function
+bcm_loadtoc__dot_Lfoo_local_target:
+.Lbcm_loadtoc__dot_Lfoo_local_target:
+ addis 3, 2, .Lfoo_local_target@toc@ha
+ addi 3, 3, .Lfoo_local_target@toc@l
+ blr
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3, @function
+bcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3:
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3:
+ addis 3, 2, .Lfoo_local_target+1-2+3@toc@ha
+ addi 3, 3, .Lfoo_local_target+1-2+3@toc@l
+ blr
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_10, @function
+bcm_loadtoc__dot_Lfoo_local_target__plus_10:
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_10:
+ addis 3, 2, .Lfoo_local_target+10@toc@ha
+ addi 3, 3, .Lfoo_local_target+10@toc@l
+ blr
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_20, @function
+bcm_loadtoc__dot_Lfoo_local_target__plus_20:
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_20:
+ addis 3, 2, .Lfoo_local_target+20@toc@ha
+ addi 3, 3, .Lfoo_local_target+20@toc@l
+ blr
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_25, @function
+bcm_loadtoc__dot_Lfoo_local_target__plus_25:
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_25:
+ addis 3, 2, .Lfoo_local_target+25@toc@ha
+ addi 3, 3, .Lfoo_local_target+25@toc@l
+ blr
+.type bcm_loadtoc__dot_Lfoo_local_target__minus_10, @function
+bcm_loadtoc__dot_Lfoo_local_target__minus_10:
+.Lbcm_loadtoc__dot_Lfoo_local_target__minus_10:
+ addis 3, 2, .Lfoo_local_target-10@toc@ha
+ addi 3, 3, .Lfoo_local_target-10@toc@l
+ blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c
===================================================================
8 months ago
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c
@@ -416,6 +416,10 @@ static void sha1_block_data_order(uint32
return;
}
#endif
+#if defined(SHA1_ASM_PPC64)
+ sha1_block_data_order_ppc64(state, data, num);
+ return;
+#endif
sha1_block_data_order_nohw(state, data, num);
}
8 months ago
Index: chromium-125.0.6422.41/third_party/boringssl/src/build.json
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/build.json
+++ chromium-125.0.6422.41/third_party/boringssl/src/build.json
@@ -118,6 +118,10 @@
{"src": "crypto/fipsmodule/sha/asm/sha512-armv4.pl"},
{"src": "crypto/fipsmodule/aes/asm/vpaes-armv7.pl"}
],
+ "perlasm_ppc64le": [
+ {"src": "crypto/fipsmodule/aes/asm/aesp8-ppc.pl"},
+ {"src": "crypto/fipsmodule/modes/asm/ghashp8-ppc.pl"}
+ ],
"perlasm_x86": [
{"src": "crypto/fipsmodule/aes/asm/aesni-x86.pl"},
{"src": "crypto/fipsmodule/bn/asm/bn-586.pl"},
@@ -221,6 +225,7 @@
"crypto/cpu_arm_freebsd.c",
"crypto/cpu_arm_linux.c",
"crypto/cpu_intel.c",
+ "crypto/cpu_ppc64le.c",
"crypto/crypto.c",
"crypto/curve25519/curve25519.c",
"crypto/curve25519/curve25519_64_adx.c",
@@ -765,6 +770,9 @@
"perlasm_arm": [
{"src": "crypto/test/asm/trampoline-armv4.pl"}
],
+ "perlasm_ppc64le": [
+ {"src": "crypto/test/asm/trampoline-ppc.pl"}
+ ],
"perlasm_x86": [
{"src": "crypto/test/asm/trampoline-x86.pl"}
],
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/pregenerate/build.go
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/pregenerate/build.go
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/pregenerate/build.go
@@ -38,6 +38,7 @@ type InputTarget struct {
// architecture.
PerlasmAarch64 []PerlasmSource `json:"perlasm_aarch64,omitempty"`
PerlasmArm []PerlasmSource `json:"perlasm_arm,omitempty"`
+ PerlasmPPC64LE []PerlasmSource `json:"perlasm_ppc64le,omitempty"`
PerlasmX86 []PerlasmSource `json:"perlasm_x86,omitempty"`
PerlasmX86_64 []PerlasmSource `json:"perlasm_x86_64,omitempty"`
}
@@ -116,6 +117,9 @@ func (in *InputTarget) Pregenerate(name
for _, p := range in.PerlasmArm {
addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"linux32"})
}
+ for _, p := range in.PerlasmPPC64LE {
+ addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"linux64le"})
+ }
for _, p := range in.PerlasmX86 {
addPerlasmTask(&out.Asm, &p, "-apple.S", []string{"macosx", "-fPIC", "-DOPENSSL_IA32_SSE2"})
addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"elf", "-fPIC", "-DOPENSSL_IA32_SSE2"})
Index: chromium-125.0.6422.41/third_party/boringssl/BUILD.generated.gni
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/BUILD.generated.gni
+++ chromium-125.0.6422.41/third_party/boringssl/BUILD.generated.gni
@@ -93,6 +93,7 @@ crypto_sources = [
"src/crypto/cpu_arm_linux.c",
"src/crypto/cpu_arm_linux.h",
"src/crypto/cpu_intel.c",
+ "src/crypto/cpu_ppc64le.c",
"src/crypto/crypto.c",
"src/crypto/curve25519/curve25519.c",
"src/crypto/curve25519/curve25519_64_adx.c",
@@ -334,6 +335,7 @@ crypto_sources_asm = [
"src/gen/bcm/aesv8-gcm-armv8-apple.S",
"src/gen/bcm/aesv8-gcm-armv8-linux.S",
"src/gen/bcm/aesv8-gcm-armv8-win.S",
+ "src/gen/bcm/aesp8-ppc-linux.S",
"src/gen/bcm/armv4-mont-linux.S",
"src/gen/bcm/armv8-mont-apple.S",
"src/gen/bcm/armv8-mont-linux.S",
@@ -350,6 +352,7 @@ crypto_sources_asm = [
"src/gen/bcm/ghash-neon-armv8-apple.S",
"src/gen/bcm/ghash-neon-armv8-linux.S",
"src/gen/bcm/ghash-neon-armv8-win.S",
+ "src/gen/bcm/ghashp8-ppc-linux.S",
"src/gen/bcm/ghash-ssse3-x86-apple.S",
"src/gen/bcm/ghash-ssse3-x86-linux.S",
"src/gen/bcm/ghash-ssse3-x86_64-apple.S",
Index: chromium-125.0.6422.41/third_party/boringssl/README.ppc64le
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/README.ppc64le
@@ -0,0 +1,8 @@
+==============================================================
+To recreate boringssl pregenerated files patch for ppc64le:
+
+cd third_party/boringssl/src
+cp -Rp gen gen.orig
+go run ./util/pregenerate
+cd ../../../../
+diff -urN chromium-*/third_party/boringssl/src/gen.orig chromium-*/third_party/boringssl/src/gen