chromium/0001-Add-PPC64-support-for-...

Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/abi_self_test.cc
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/abi_self_test.cc
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/abi_self_test.cc
@@ -521,3 +521,289 @@ TEST(ABITest, AArch64) {
   CHECK_ABI_NO_UNWIND(abi_test_clobber_v15_upper);
 }
 #endif   // OPENSSL_AARCH64 && SUPPORTS_ABI_TEST
+
+#if defined(OPENSSL_PPC64LE) && defined(SUPPORTS_ABI_TEST)
+extern "C" {
+void abi_test_clobber_r0(void);
+// r1 is the stack pointer.
+void abi_test_clobber_r2(void);
+void abi_test_clobber_r3(void);
+void abi_test_clobber_r4(void);
+void abi_test_clobber_r5(void);
+void abi_test_clobber_r6(void);
+void abi_test_clobber_r7(void);
+void abi_test_clobber_r8(void);
+void abi_test_clobber_r9(void);
+void abi_test_clobber_r10(void);
+void abi_test_clobber_r11(void);
+void abi_test_clobber_r12(void);
+// r13 is the thread pointer.
+void abi_test_clobber_r14(void);
+void abi_test_clobber_r15(void);
+void abi_test_clobber_r16(void);
+void abi_test_clobber_r17(void);
+void abi_test_clobber_r18(void);
+void abi_test_clobber_r19(void);
+void abi_test_clobber_r20(void);
+void abi_test_clobber_r21(void);
+void abi_test_clobber_r22(void);
+void abi_test_clobber_r23(void);
+void abi_test_clobber_r24(void);
+void abi_test_clobber_r25(void);
+void abi_test_clobber_r26(void);
+void abi_test_clobber_r27(void);
+void abi_test_clobber_r28(void);
+void abi_test_clobber_r29(void);
+void abi_test_clobber_r30(void);
+void abi_test_clobber_r31(void);
+
+void abi_test_clobber_f0(void);
+void abi_test_clobber_f1(void);
+void abi_test_clobber_f2(void);
+void abi_test_clobber_f3(void);
+void abi_test_clobber_f4(void);
+void abi_test_clobber_f5(void);
+void abi_test_clobber_f6(void);
+void abi_test_clobber_f7(void);
+void abi_test_clobber_f8(void);
+void abi_test_clobber_f9(void);
+void abi_test_clobber_f10(void);
+void abi_test_clobber_f11(void);
+void abi_test_clobber_f12(void);
+void abi_test_clobber_f13(void);
+void abi_test_clobber_f14(void);
+void abi_test_clobber_f15(void);
+void abi_test_clobber_f16(void);
+void abi_test_clobber_f17(void);
+void abi_test_clobber_f18(void);
+void abi_test_clobber_f19(void);
+void abi_test_clobber_f20(void);
+void abi_test_clobber_f21(void);
+void abi_test_clobber_f22(void);
+void abi_test_clobber_f23(void);
+void abi_test_clobber_f24(void);
+void abi_test_clobber_f25(void);
+void abi_test_clobber_f26(void);
+void abi_test_clobber_f27(void);
+void abi_test_clobber_f28(void);
+void abi_test_clobber_f29(void);
+void abi_test_clobber_f30(void);
+void abi_test_clobber_f31(void);
+
+void abi_test_clobber_v0(void);
+void abi_test_clobber_v1(void);
+void abi_test_clobber_v2(void);
+void abi_test_clobber_v3(void);
+void abi_test_clobber_v4(void);
+void abi_test_clobber_v5(void);
+void abi_test_clobber_v6(void);
+void abi_test_clobber_v7(void);
+void abi_test_clobber_v8(void);
+void abi_test_clobber_v9(void);
+void abi_test_clobber_v10(void);
+void abi_test_clobber_v11(void);
+void abi_test_clobber_v12(void);
+void abi_test_clobber_v13(void);
+void abi_test_clobber_v14(void);
+void abi_test_clobber_v15(void);
+void abi_test_clobber_v16(void);
+void abi_test_clobber_v17(void);
+void abi_test_clobber_v18(void);
+void abi_test_clobber_v19(void);
+void abi_test_clobber_v20(void);
+void abi_test_clobber_v21(void);
+void abi_test_clobber_v22(void);
+void abi_test_clobber_v23(void);
+void abi_test_clobber_v24(void);
+void abi_test_clobber_v25(void);
+void abi_test_clobber_v26(void);
+void abi_test_clobber_v27(void);
+void abi_test_clobber_v28(void);
+void abi_test_clobber_v29(void);
+void abi_test_clobber_v30(void);
+void abi_test_clobber_v31(void);
+
+void abi_test_clobber_cr0(void);
+void abi_test_clobber_cr1(void);
+void abi_test_clobber_cr2(void);
+void abi_test_clobber_cr3(void);
+void abi_test_clobber_cr4(void);
+void abi_test_clobber_cr5(void);
+void abi_test_clobber_cr6(void);
+void abi_test_clobber_cr7(void);
+
+void abi_test_clobber_ctr(void);
+void abi_test_clobber_lr(void);
+
+}  // extern "C"
+
+TEST(ABITest, PPC64LE) {
+  // abi_test_trampoline hides unsaved registers from the caller, so we can
+  // safely call the abi_test_clobber_* functions below.
+  abi_test::internal::CallerState state;
+  RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
+  CHECK_ABI_NO_UNWIND(abi_test_trampoline,
+                      reinterpret_cast<crypto_word_t>(abi_test_clobber_r14),
+                      &state, nullptr, 0, 0 /* no breakpoint */);
+
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r0);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r2);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r3);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r4);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r5);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r6);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r7);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r8);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r9);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r10);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r11);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_r12);
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r14),
+                          "r14 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r15),
+                          "r15 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r16),
+                          "r16 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r17),
+                          "r17 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r18),
+                          "r18 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r19),
+                          "r19 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r20),
+                          "r20 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r21),
+                          "r21 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r22),
+                          "r22 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r23),
+                          "r23 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r24),
+                          "r24 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r25),
+                          "r25 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r26),
+                          "r26 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r27),
+                          "r27 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r28),
+                          "r28 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r29),
+                          "r29 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r30),
+                          "r30 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r31),
+                          "r31 was not restored after return");
+
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f0);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f1);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f2);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f3);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f4);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f5);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f6);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f7);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f8);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f9);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f10);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f11);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f12);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_f13);
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f14),
+                          "f14 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f15),
+                          "f15 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f16),
+                          "f16 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f17),
+                          "f17 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f18),
+                          "f18 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f19),
+                          "f19 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f20),
+                          "f20 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f21),
+                          "f21 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f22),
+                          "f22 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f23),
+                          "f23 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f24),
+                          "f24 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f25),
+                          "f25 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f26),
+                          "f26 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f27),
+                          "f27 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f28),
+                          "f28 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f29),
+                          "f29 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f30),
+                          "f30 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_f31),
+                          "f31 was not restored after return");
+
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v0);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v1);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v2);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v3);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v4);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v5);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v6);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v7);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v8);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v9);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v10);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v11);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v12);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v13);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v14);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v15);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v16);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v17);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v18);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_v19);
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v20),
+                          "v20 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v21),
+                          "v21 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v22),
+                          "v22 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v23),
+                          "v23 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v24),
+                          "v24 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v25),
+                          "v25 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v26),
+                          "v26 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v27),
+                          "v27 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v28),
+                          "v28 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v29),
+                          "v29 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v30),
+                          "v30 was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_v31),
+                          "v31 was not restored after return");
+
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr0);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr1);
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr2),
+                          "cr was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr3),
+                          "cr was not restored after return");
+  EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_cr4),
+                          "cr was not restored after return");
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr5);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr6);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_cr7);
+
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_ctr);
+  CHECK_ABI_NO_UNWIND(abi_test_clobber_lr);
+}
+#endif   // OPENSSL_PPC64LE && SUPPORTS_ABI_TEST
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/cpu_ppc64le.c
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/cpu_ppc64le.c
@@ -0,0 +1,38 @@
+/* Copyright (c) 2016, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/base.h>
+
+#if defined(OPENSSL_PPC64LE)
+
+#include <sys/auxv.h>
+
+#include "internal.h"
+
+
+#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
+// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
+// ABI for Linux Supplement”.
+#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
+#endif
+
+void OPENSSL_cpuid_setup(void) {
+  OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
+}
+
+int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
+  return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
+}
+
+#endif  // OPENSSL_PPC64LE
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/crypto.c
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/crypto.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/crypto.c
@@ -25,10 +25,12 @@ static_assert(sizeof(ossl_ssize_t) == si
               "ossl_ssize_t should be the same size as size_t");
 
 #if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \
-    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) ||            \
-     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
-// x86, x86_64, and the ARMs need to record the result of a cpuid/getauxval call
-// for the asm to work correctly, unless compiled without asm code.
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
+     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
+     defined(OPENSSL_PPC64LE))
+// x86, x86_64, the ARMs and ppc64le need to record the result of a
+// cpuid/getauxval call for the asm to work correctly, unless compiled without
+// asm code.
 #define NEED_CPUID
 
 #else
@@ -39,7 +41,8 @@ static_assert(sizeof(ossl_ssize_t) == si
 #define BORINGSSL_NO_STATIC_INITIALIZER
 #endif
 
-#endif  // !NO_ASM && !STATIC_ARMCAP && (X86 || X86_64 || ARM || AARCH64)
+#endif  // !NO_ASM && !STATIC_ARMCAP &&
+        // (X86 || X86_64 || ARM || AARCH64 || PPC64LE)
 
 
 // Our assembly does not use the GOT to reference symbols, which means
@@ -83,6 +86,10 @@ uint32_t OPENSSL_get_ia32cap(int idx) {
   return OPENSSL_ia32cap_P[idx];
 }
 
+#elif defined(OPENSSL_PPC64LE)
+
+HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0;
+
 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
 
 #include <openssl/arm_arch.h>
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/aes/asm/aesp8-ppc.pl
@@ -0,0 +1,3809 @@
+#! /usr/bin/env perl
+# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for AES instructions as per PowerISA
+# specification version 2.07, first implemented by POWER8 processor.
+# The module is endian-agnostic in sense that it supports both big-
+# and little-endian cases. Data alignment in parallelizable modes is
+# handled with VSX loads and stores, which implies MSR.VSX flag being
+# set. It should also be noted that ISA specification doesn't prohibit
+# alignment exceptions for these instructions on page boundaries.
+# Initially alignment was handled in pure AltiVec/VMX way [when data
+# is aligned programmatically, which in turn guarantees exception-
+# free execution], but it turned to hamper performance when vcipher
+# instructions are interleaved. It's reckoned that eventual
+# misalignment penalties at page boundaries are in average lower
+# than additional overhead in pure AltiVec approach.
+#
+# May 2016
+#
+# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
+# systems were measured.
+#
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+#		CBC en-/decrypt	CTR	XTS
+# POWER8[le]	3.96/0.72	0.74	1.1
+# POWER8[be]	3.75/0.65	0.66	1.0
+# POWER9[le]	4.02/0.86	0.84	1.05
+# POWER9[be]	3.99/0.78	0.79	0.97
+
+$flavour = shift;
+$output = shift;
+
+if ($flavour =~ /64/) {
+	$SIZE_T	=8;
+	$LRSAVE	=2*$SIZE_T;
+	$STU	="stdu";
+	$POP	="ld";
+	$PUSH	="std";
+	$UCMP	="cmpld";
+	$SHL	="sldi";
+} elsif ($flavour =~ /32/) {
+	$SIZE_T	=4;
+	$LRSAVE	=$SIZE_T;
+	$STU	="stwu";
+	$POP	="lwz";
+	$PUSH	="stw";
+	$UCMP	="cmplw";
+	$SHL	="slwi";
+} else { die "nonsense $flavour"; }
+
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
+*STDOUT=*OUT;
+
+$FRAME=8*$SIZE_T;
+$prefix="aes_hw";
+
+$sp="r1";
+$vrsave="r12";
+
+#########################################################################
+{{{	# Key setup procedures						#
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
+
+$code.=<<___;
+.machine	"any"
+
+.text
+
+.align	7
+Lrcon:
+.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
+.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
+.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
+.long	0,0,0,0						?asis
+Lconsts:
+	mflr	r0
+	bcl	20,31,\$+4
+	mflr	$ptr	 #vvvvv "distance between . and rcon
+	addi	$ptr,$ptr,-0x48
+	mtlr	r0
+	blr
+	.long	0
+	.byte	0,12,0x14,0,0,0,0,0
+.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+
+.globl	.${prefix}_set_encrypt_key
+.align	5
+.${prefix}_set_encrypt_key:
+Lset_encrypt_key:
+	mflr		r11
+	$PUSH		r11,$LRSAVE($sp)
+
+	li		$ptr,-1
+	${UCMP}i	$inp,0
+	beq-		Lenc_key_abort		# if ($inp==0) return -1;
+	${UCMP}i	$out,0
+	beq-		Lenc_key_abort		# if ($out==0) return -1;
+	li		$ptr,-2
+	cmpwi		$bits,128
+	blt-		Lenc_key_abort
+	cmpwi		$bits,256
+	bgt-		Lenc_key_abort
+	andi.		r0,$bits,0x3f
+	bne-		Lenc_key_abort
+
+	lis		r0,0xfff0
+	mfspr		$vrsave,256
+	mtspr		256,r0
+
+	bl		Lconsts
+	mtlr		r11
+
+	neg		r9,$inp
+	lvx		$in0,0,$inp
+	addi		$inp,$inp,15		# 15 is not typo
+	lvsr		$key,0,r9		# borrow $key
+	li		r8,0x20
+	cmpwi		$bits,192
+	lvx		$in1,0,$inp
+	le?vspltisb	$mask,0x0f		# borrow $mask
+	lvx		$rcon,0,$ptr
+	le?vxor		$key,$key,$mask		# adjust for byte swap
+	lvx		$mask,r8,$ptr
+	addi		$ptr,$ptr,0x10
+	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
+	li		$cnt,8
+	vxor		$zero,$zero,$zero
+	mtctr		$cnt
+
+	?lvsr		$outperm,0,$out
+	vspltisb	$outmask,-1
+	lvx		$outhead,0,$out
+	?vperm		$outmask,$zero,$outmask,$outperm
+
+	blt		Loop128
+	addi		$inp,$inp,8
+	beq		L192
+	addi		$inp,$inp,8
+	b		L256
+
+.align	4
+Loop128:
+	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in0,$in0,$key
+	bdnz		Loop128
+
+	lvx		$rcon,0,$ptr		# last two round keys
+
+	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in0,$in0,$key
+
+	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vxor		$in0,$in0,$key
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+
+	addi		$inp,$out,15		# 15 is not typo
+	addi		$out,$out,0x50
+
+	li		$rounds,10
+	b		Ldone
+
+.align	4
+L192:
+	lvx		$tmp,0,$inp
+	li		$cnt,4
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
+	vspltisb	$key,8			# borrow $key
+	mtctr		$cnt
+	vsububm		$mask,$mask,$key	# adjust the mask
+
+Loop192:
+	vperm		$key,$in1,$in1,$mask	# roate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	vcipherlast	$key,$key,$rcon
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+
+	 vsldoi		$stage,$zero,$in1,8
+	vspltw		$tmp,$in0,3
+	vxor		$tmp,$tmp,$in1
+	vsldoi		$in1,$zero,$in1,12	# >>32
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in1,$in1,$tmp
+	vxor		$in0,$in0,$key
+	vxor		$in1,$in1,$key
+	 vsldoi		$stage,$stage,$in0,8
+
+	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$stage,$stage,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	 vsldoi		$stage,$in0,$in1,8
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	 vperm		$outtail,$stage,$stage,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vspltw		$tmp,$in0,3
+	vxor		$tmp,$tmp,$in1
+	vsldoi		$in1,$zero,$in1,12	# >>32
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in1,$in1,$tmp
+	vxor		$in0,$in0,$key
+	vxor		$in1,$in1,$key
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+	 addi		$inp,$out,15		# 15 is not typo
+	 addi		$out,$out,16
+	bdnz		Loop192
+
+	li		$rounds,12
+	addi		$out,$out,0x20
+	b		Ldone
+
+.align	4
+L256:
+	lvx		$tmp,0,$inp
+	li		$cnt,7
+	li		$rounds,14
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
+	mtctr		$cnt
+
+Loop256:
+	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
+	vsldoi		$tmp,$zero,$in0,12	# >>32
+	 vperm		$outtail,$in1,$in1,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	vcipherlast	$key,$key,$rcon
+	 stvx		$stage,0,$out
+	 addi		$out,$out,16
+
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in0,$in0,$tmp
+	 vadduwm	$rcon,$rcon,$rcon
+	vxor		$in0,$in0,$key
+	 vperm		$outtail,$in0,$in0,$outperm	# rotate
+	 vsel		$stage,$outhead,$outtail,$outmask
+	 vmr		$outhead,$outtail
+	 stvx		$stage,0,$out
+	 addi		$inp,$out,15		# 15 is not typo
+	 addi		$out,$out,16
+	bdz		Ldone
+
+	vspltw		$key,$in0,3		# just splat
+	vsldoi		$tmp,$zero,$in1,12	# >>32
+	vsbox		$key,$key
+
+	vxor		$in1,$in1,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in1,$in1,$tmp
+	vsldoi		$tmp,$zero,$tmp,12	# >>32
+	vxor		$in1,$in1,$tmp
+
+	vxor		$in1,$in1,$key
+	b		Loop256
+
+.align	4
+Ldone:
+	lvx		$in1,0,$inp		# redundant in aligned case
+	vsel		$in1,$outhead,$in1,$outmask
+	stvx		$in1,0,$inp
+	li		$ptr,0
+	mtspr		256,$vrsave
+	stw		$rounds,0($out)
+
+Lenc_key_abort:
+	mr		r3,$ptr
+	blr
+	.long		0
+	.byte		0,12,0x14,1,0,0,3,0
+	.long		0
+.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
+
+.globl	.${prefix}_set_decrypt_key
+.align	5
+.${prefix}_set_decrypt_key:
+	$STU		$sp,-$FRAME($sp)
+	mflr		r10
+	$PUSH		r10,`$FRAME+$LRSAVE`($sp)
+	bl		Lset_encrypt_key
+	mtlr		r10
+
+	cmpwi		r3,0
+	bne-		Ldec_key_abort
+
+	slwi		$cnt,$rounds,4
+	subi		$inp,$out,240		# first round key
+	srwi		$rounds,$rounds,1
+	add		$out,$inp,$cnt		# last round key
+	mtctr		$rounds
+
+Ldeckey:
+	lwz		r0, 0($inp)
+	lwz		r6, 4($inp)
+	lwz		r7, 8($inp)
+	lwz		r8, 12($inp)
+	addi		$inp,$inp,16
+	lwz		r9, 0($out)
+	lwz		r10,4($out)
+	lwz		r11,8($out)
+	lwz		r12,12($out)
+	stw		r0, 0($out)
+	stw		r6, 4($out)
+	stw		r7, 8($out)
+	stw		r8, 12($out)
+	subi		$out,$out,16
+	stw		r9, -16($inp)
+	stw		r10,-12($inp)
+	stw		r11,-8($inp)
+	stw		r12,-4($inp)
+	bdnz		Ldeckey
+
+	xor		r3,r3,r3		# return value
+Ldec_key_abort:
+	addi		$sp,$sp,$FRAME
+	blr
+	.long		0
+	.byte		0,12,4,1,0x80,0,3,0
+	.long		0
+.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
+___
+}}}
+#########################################################################
+{{{	# Single block en- and decrypt procedures			#
+sub gen_block () {
+my $dir = shift;
+my $n   = $dir eq "de" ? "n" : "";
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
+
+$code.=<<___;
+.globl	.${prefix}_${dir}crypt
+.align	5
+.${prefix}_${dir}crypt:
+	lwz		$rounds,240($key)
+	lis		r0,0xfc00
+	mfspr		$vrsave,256
+	li		$idx,15			# 15 is not typo
+	mtspr		256,r0
+
+	lvx		v0,0,$inp
+	neg		r11,$out
+	lvx		v1,$idx,$inp
+	lvsl		v2,0,$inp		# inpperm
+	le?vspltisb	v4,0x0f
+	?lvsl		v3,0,r11		# outperm
+	le?vxor		v2,v2,v4
+	li		$idx,16
+	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
+	lvx		v1,0,$key
+	?lvsl		v5,0,$key		# keyperm
+	srwi		$rounds,$rounds,1
+	lvx		v2,$idx,$key
+	addi		$idx,$idx,16
+	subi		$rounds,$rounds,1
+	?vperm		v1,v1,v2,v5		# align round key
+
+	vxor		v0,v0,v1
+	lvx		v1,$idx,$key
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+Loop_${dir}c:
+	?vperm		v2,v2,v1,v5
+	v${n}cipher	v0,v0,v2
+	lvx		v2,$idx,$key
+	addi		$idx,$idx,16
+	?vperm		v1,v1,v2,v5
+	v${n}cipher	v0,v0,v1
+	lvx		v1,$idx,$key
+	addi		$idx,$idx,16
+	bdnz		Loop_${dir}c
+
+	?vperm		v2,v2,v1,v5
+	v${n}cipher	v0,v0,v2
+	lvx		v2,$idx,$key
+	?vperm		v1,v1,v2,v5
+	v${n}cipherlast	v0,v0,v1
+
+	vspltisb	v2,-1
+	vxor		v1,v1,v1
+	li		$idx,15			# 15 is not typo
+	?vperm		v2,v1,v2,v3		# outmask
+	le?vxor		v3,v3,v4
+	lvx		v1,0,$out		# outhead
+	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
+	vsel		v1,v1,v0,v2
+	lvx		v4,$idx,$out
+	stvx		v1,0,$out
+	vsel		v0,v0,v4,v2
+	stvx		v0,$idx,$out
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,3,0
+	.long		0
+.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+#########################################################################
+{{{	# CBC en- and decrypt procedures				#
+my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
+						map("v$_",(4..10));
+$code.=<<___;
+.globl	.${prefix}_cbc_encrypt
+.align	5
+.${prefix}_cbc_encrypt:
+	${UCMP}i	$len,16
+	bltlr-
+
+	cmpwi		$enc,0			# test direction
+	lis		r0,0xffe0
+	mfspr		$vrsave,256
+	mtspr		256,r0
+
+	li		$idx,15
+	vxor		$rndkey0,$rndkey0,$rndkey0
+	le?vspltisb	$tmp,0x0f
+
+	lvx		$ivec,0,$ivp		# load [unaligned] iv
+	lvsl		$inpperm,0,$ivp
+	lvx		$inptail,$idx,$ivp
+	le?vxor		$inpperm,$inpperm,$tmp
+	vperm		$ivec,$ivec,$inptail,$inpperm
+
+	neg		r11,$inp
+	?lvsl		$keyperm,0,$key		# prepare for unaligned key
+	lwz		$rounds,240($key)
+
+	lvsr		$inpperm,0,r11		# prepare for unaligned load
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,15		# 15 is not typo
+	le?vxor		$inpperm,$inpperm,$tmp
+
+	?lvsr		$outperm,0,$out		# prepare for unaligned store
+	vspltisb	$outmask,-1
+	lvx		$outhead,0,$out
+	?vperm		$outmask,$rndkey0,$outmask,$outperm
+	le?vxor		$outperm,$outperm,$tmp
+
+	srwi		$rounds,$rounds,1
+	li		$idx,16
+	subi		$rounds,$rounds,1
+	beq		Lcbc_dec
+
+Lcbc_enc:
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+	mtctr		$rounds
+	subi		$len,$len,16		# len-=16
+
+	lvx		$rndkey0,0,$key
+	 vperm		$inout,$inout,$inptail,$inpperm
+	lvx		$rndkey1,$idx,$key
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key
+	addi		$idx,$idx,16
+	vxor		$inout,$inout,$ivec
+
+Loop_cbc_enc:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key
+	addi		$idx,$idx,16
+	bdnz		Loop_cbc_enc
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipherlast	$ivec,$inout,$rndkey0
+	${UCMP}i	$len,16
+
+	vperm		$tmp,$ivec,$ivec,$outperm
+	vsel		$inout,$outhead,$tmp,$outmask
+	vmr		$outhead,$tmp
+	stvx		$inout,0,$out
+	addi		$out,$out,16
+	bge		Lcbc_enc
+
+	b		Lcbc_done
+
+.align	4
+Lcbc_dec:
+	${UCMP}i	$len,128
+	bge		_aesp8_cbc_decrypt8x
+	vmr		$tmp,$inptail
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+	mtctr		$rounds
+	subi		$len,$len,16		# len-=16
+
+	lvx		$rndkey0,0,$key
+	 vperm		$tmp,$tmp,$inptail,$inpperm
+	lvx		$rndkey1,$idx,$key
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$tmp,$rndkey0
+	lvx		$rndkey0,$idx,$key
+	addi		$idx,$idx,16
+
+Loop_cbc_dec:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vncipher	$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key
+	addi		$idx,$idx,16
+	bdnz		Loop_cbc_dec
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vncipherlast	$inout,$inout,$rndkey0
+	${UCMP}i	$len,16
+
+	vxor		$inout,$inout,$ivec
+	vmr		$ivec,$tmp
+	vperm		$tmp,$inout,$inout,$outperm
+	vsel		$inout,$outhead,$tmp,$outmask
+	vmr		$outhead,$tmp
+	stvx		$inout,0,$out
+	addi		$out,$out,16
+	bge		Lcbc_dec
+
+Lcbc_done:
+	addi		$out,$out,-1
+	lvx		$inout,0,$out		# redundant in aligned case
+	vsel		$inout,$outhead,$inout,$outmask
+	stvx		$inout,0,$out
+
+	neg		$enc,$ivp		# write [unaligned] iv
+	li		$idx,15			# 15 is not typo
+	vxor		$rndkey0,$rndkey0,$rndkey0
+	vspltisb	$outmask,-1
+	le?vspltisb	$tmp,0x0f
+	?lvsl		$outperm,0,$enc
+	?vperm		$outmask,$rndkey0,$outmask,$outperm
+	le?vxor		$outperm,$outperm,$tmp
+	lvx		$outhead,0,$ivp
+	vperm		$ivec,$ivec,$ivec,$outperm
+	vsel		$inout,$outhead,$ivec,$outmask
+	lvx		$inptail,$idx,$ivp
+	stvx		$inout,0,$ivp
+	vsel		$inout,$ivec,$inptail,$outmask
+	stvx		$inout,$idx,$ivp
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,6,0
+	.long		0
+___
+#########################################################################
+{{	# Optimized CBC decrypt procedure				#
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+    $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
+my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
+			# v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
+
+$code.=<<___;
+.align	5
+_aesp8_cbc_decrypt8x:
+	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+	li		r10,`$FRAME+8*16+15`
+	li		r11,`$FRAME+8*16+31`
+	stvx		v20,r10,$sp		# ABI says so
+	addi		r10,r10,32
+	stvx		v21,r11,$sp
+	addi		r11,r11,32
+	stvx		v22,r10,$sp
+	addi		r10,r10,32
+	stvx		v23,r11,$sp
+	addi		r11,r11,32
+	stvx		v24,r10,$sp
+	addi		r10,r10,32
+	stvx		v25,r11,$sp
+	addi		r11,r11,32
+	stvx		v26,r10,$sp
+	addi		r10,r10,32
+	stvx		v27,r11,$sp
+	addi		r11,r11,32
+	stvx		v28,r10,$sp
+	addi		r10,r10,32
+	stvx		v29,r11,$sp
+	addi		r11,r11,32
+	stvx		v30,r10,$sp
+	stvx		v31,r11,$sp
+	li		r0,-1
+	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
+	li		$x10,0x10
+	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	li		$x20,0x20
+	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	li		$x30,0x30
+	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	li		$x40,0x40
+	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	li		$x50,0x50
+	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	li		$x60,0x60
+	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	li		$x70,0x70
+	mtspr		256,r0
+
+	subi		$rounds,$rounds,3	# -4 in total
+	subi		$len,$len,128		# bias
+
+	lvx		$rndkey0,$x00,$key	# load key schedule
+	lvx		v30,$x10,$key
+	addi		$key,$key,0x20
+	lvx		v31,$x00,$key
+	?vperm		$rndkey0,$rndkey0,v30,$keyperm
+	addi		$key_,$sp,`$FRAME+15`
+	mtctr		$rounds
+
+Load_cbc_dec_key:
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v30,$x10,$key
+	addi		$key,$key,0x20
+	stvx		v24,$x00,$key_		# off-load round[1]
+	?vperm		v25,v31,v30,$keyperm
+	lvx		v31,$x00,$key
+	stvx		v25,$x10,$key_		# off-load round[2]
+	addi		$key_,$key_,0x20
+	bdnz		Load_cbc_dec_key
+
+	lvx		v26,$x10,$key
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v27,$x20,$key
+	stvx		v24,$x00,$key_		# off-load round[3]
+	?vperm		v25,v31,v26,$keyperm
+	lvx		v28,$x30,$key
+	stvx		v25,$x10,$key_		# off-load round[4]
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	?vperm		v26,v26,v27,$keyperm
+	lvx		v29,$x40,$key
+	?vperm		v27,v27,v28,$keyperm
+	lvx		v30,$x50,$key
+	?vperm		v28,v28,v29,$keyperm
+	lvx		v31,$x60,$key
+	?vperm		v29,v29,v30,$keyperm
+	lvx		$out0,$x70,$key		# borrow $out0
+	?vperm		v30,v30,v31,$keyperm
+	lvx		v24,$x00,$key_		# pre-load round[1]
+	?vperm		v31,v31,$out0,$keyperm
+	lvx		v25,$x10,$key_		# pre-load round[2]
+
+	#lvx		$inptail,0,$inp		# "caller" already did this
+	#addi		$inp,$inp,15		# 15 is not typo
+	subi		$inp,$inp,15		# undo "caller"
+
+	 le?li		$idx,8
+	lvx_u		$in0,$x00,$inp		# load first 8 "words"
+	 le?lvsl	$inpperm,0,$idx
+	 le?vspltisb	$tmp,0x0f
+	lvx_u		$in1,$x10,$inp
+	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
+	lvx_u		$in2,$x20,$inp
+	 le?vperm	$in0,$in0,$in0,$inpperm
+	lvx_u		$in3,$x30,$inp
+	 le?vperm	$in1,$in1,$in1,$inpperm
+	lvx_u		$in4,$x40,$inp
+	 le?vperm	$in2,$in2,$in2,$inpperm
+	vxor		$out0,$in0,$rndkey0
+	lvx_u		$in5,$x50,$inp
+	 le?vperm	$in3,$in3,$in3,$inpperm
+	vxor		$out1,$in1,$rndkey0
+	lvx_u		$in6,$x60,$inp
+	 le?vperm	$in4,$in4,$in4,$inpperm
+	vxor		$out2,$in2,$rndkey0
+	lvx_u		$in7,$x70,$inp
+	addi		$inp,$inp,0x80
+	 le?vperm	$in5,$in5,$in5,$inpperm
+	vxor		$out3,$in3,$rndkey0
+	 le?vperm	$in6,$in6,$in6,$inpperm
+	vxor		$out4,$in4,$rndkey0
+	 le?vperm	$in7,$in7,$in7,$inpperm
+	vxor		$out5,$in5,$rndkey0
+	vxor		$out6,$in6,$rndkey0
+	vxor		$out7,$in7,$rndkey0
+
+	mtctr		$rounds
+	b		Loop_cbc_dec8x
+.align	5
+Loop_cbc_dec8x:
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+	vncipher	$out5,$out5,v24
+	vncipher	$out6,$out6,v24
+	vncipher	$out7,$out7,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	vncipher	$out5,$out5,v25
+	vncipher	$out6,$out6,v25
+	vncipher	$out7,$out7,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_cbc_dec8x
+
+	subic		$len,$len,128		# $len-=128
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+	vncipher	$out5,$out5,v24
+	vncipher	$out6,$out6,v24
+	vncipher	$out7,$out7,v24
+
+	subfe.		r0,r0,r0		# borrow?-1:0
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	vncipher	$out5,$out5,v25
+	vncipher	$out6,$out6,v25
+	vncipher	$out7,$out7,v25
+
+	and		r0,r0,$len
+	vncipher	$out0,$out0,v26
+	vncipher	$out1,$out1,v26
+	vncipher	$out2,$out2,v26
+	vncipher	$out3,$out3,v26
+	vncipher	$out4,$out4,v26
+	vncipher	$out5,$out5,v26
+	vncipher	$out6,$out6,v26
+	vncipher	$out7,$out7,v26
+
+	add		$inp,$inp,r0		# $inp is adjusted in such
+						# way that at exit from the
+						# loop inX-in7 are loaded
+						# with last "words"
+	vncipher	$out0,$out0,v27
+	vncipher	$out1,$out1,v27
+	vncipher	$out2,$out2,v27
+	vncipher	$out3,$out3,v27
+	vncipher	$out4,$out4,v27
+	vncipher	$out5,$out5,v27
+	vncipher	$out6,$out6,v27
+	vncipher	$out7,$out7,v27
+
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	vncipher	$out0,$out0,v28
+	vncipher	$out1,$out1,v28
+	vncipher	$out2,$out2,v28
+	vncipher	$out3,$out3,v28
+	vncipher	$out4,$out4,v28
+	vncipher	$out5,$out5,v28
+	vncipher	$out6,$out6,v28
+	vncipher	$out7,$out7,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+
+	vncipher	$out0,$out0,v29
+	vncipher	$out1,$out1,v29
+	vncipher	$out2,$out2,v29
+	vncipher	$out3,$out3,v29
+	vncipher	$out4,$out4,v29
+	vncipher	$out5,$out5,v29
+	vncipher	$out6,$out6,v29
+	vncipher	$out7,$out7,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+
+	vncipher	$out0,$out0,v30
+	 vxor		$ivec,$ivec,v31		# xor with last round key
+	vncipher	$out1,$out1,v30
+	 vxor		$in0,$in0,v31
+	vncipher	$out2,$out2,v30
+	 vxor		$in1,$in1,v31
+	vncipher	$out3,$out3,v30
+	 vxor		$in2,$in2,v31
+	vncipher	$out4,$out4,v30
+	 vxor		$in3,$in3,v31
+	vncipher	$out5,$out5,v30
+	 vxor		$in4,$in4,v31
+	vncipher	$out6,$out6,v30
+	 vxor		$in5,$in5,v31
+	vncipher	$out7,$out7,v30
+	 vxor		$in6,$in6,v31
+
+	vncipherlast	$out0,$out0,$ivec
+	vncipherlast	$out1,$out1,$in0
+	 lvx_u		$in0,$x00,$inp		# load next input block
+	vncipherlast	$out2,$out2,$in1
+	 lvx_u		$in1,$x10,$inp
+	vncipherlast	$out3,$out3,$in2
+	 le?vperm	$in0,$in0,$in0,$inpperm
+	 lvx_u		$in2,$x20,$inp
+	vncipherlast	$out4,$out4,$in3
+	 le?vperm	$in1,$in1,$in1,$inpperm
+	 lvx_u		$in3,$x30,$inp
+	vncipherlast	$out5,$out5,$in4
+	 le?vperm	$in2,$in2,$in2,$inpperm
+	 lvx_u		$in4,$x40,$inp
+	vncipherlast	$out6,$out6,$in5
+	 le?vperm	$in3,$in3,$in3,$inpperm
+	 lvx_u		$in5,$x50,$inp
+	vncipherlast	$out7,$out7,$in6
+	 le?vperm	$in4,$in4,$in4,$inpperm
+	 lvx_u		$in6,$x60,$inp
+	vmr		$ivec,$in7
+	 le?vperm	$in5,$in5,$in5,$inpperm
+	 lvx_u		$in7,$x70,$inp
+	 addi		$inp,$inp,0x80
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	le?vperm	$out1,$out1,$out1,$inpperm
+	stvx_u		$out0,$x00,$out
+	 le?vperm	$in6,$in6,$in6,$inpperm
+	 vxor		$out0,$in0,$rndkey0
+	le?vperm	$out2,$out2,$out2,$inpperm
+	stvx_u		$out1,$x10,$out
+	 le?vperm	$in7,$in7,$in7,$inpperm
+	 vxor		$out1,$in1,$rndkey0
+	le?vperm	$out3,$out3,$out3,$inpperm
+	stvx_u		$out2,$x20,$out
+	 vxor		$out2,$in2,$rndkey0
+	le?vperm	$out4,$out4,$out4,$inpperm
+	stvx_u		$out3,$x30,$out
+	 vxor		$out3,$in3,$rndkey0
+	le?vperm	$out5,$out5,$out5,$inpperm
+	stvx_u		$out4,$x40,$out
+	 vxor		$out4,$in4,$rndkey0
+	le?vperm	$out6,$out6,$out6,$inpperm
+	stvx_u		$out5,$x50,$out
+	 vxor		$out5,$in5,$rndkey0
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out6,$x60,$out
+	 vxor		$out6,$in6,$rndkey0
+	stvx_u		$out7,$x70,$out
+	addi		$out,$out,0x80
+	 vxor		$out7,$in7,$rndkey0
+
+	mtctr		$rounds
+	beq		Loop_cbc_dec8x		# did $len-=128 borrow?
+
+	addic.		$len,$len,128
+	beq		Lcbc_dec8x_done
+	nop
+	nop
+
+Loop_cbc_dec8x_tail:				# up to 7 "words" tail...
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+	vncipher	$out5,$out5,v24
+	vncipher	$out6,$out6,v24
+	vncipher	$out7,$out7,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	vncipher	$out5,$out5,v25
+	vncipher	$out6,$out6,v25
+	vncipher	$out7,$out7,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_cbc_dec8x_tail
+
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+	vncipher	$out5,$out5,v24
+	vncipher	$out6,$out6,v24
+	vncipher	$out7,$out7,v24
+
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	vncipher	$out5,$out5,v25
+	vncipher	$out6,$out6,v25
+	vncipher	$out7,$out7,v25
+
+	vncipher	$out1,$out1,v26
+	vncipher	$out2,$out2,v26
+	vncipher	$out3,$out3,v26
+	vncipher	$out4,$out4,v26
+	vncipher	$out5,$out5,v26
+	vncipher	$out6,$out6,v26
+	vncipher	$out7,$out7,v26
+
+	vncipher	$out1,$out1,v27
+	vncipher	$out2,$out2,v27
+	vncipher	$out3,$out3,v27
+	vncipher	$out4,$out4,v27
+	vncipher	$out5,$out5,v27
+	vncipher	$out6,$out6,v27
+	vncipher	$out7,$out7,v27
+
+	vncipher	$out1,$out1,v28
+	vncipher	$out2,$out2,v28
+	vncipher	$out3,$out3,v28
+	vncipher	$out4,$out4,v28
+	vncipher	$out5,$out5,v28
+	vncipher	$out6,$out6,v28
+	vncipher	$out7,$out7,v28
+
+	vncipher	$out1,$out1,v29
+	vncipher	$out2,$out2,v29
+	vncipher	$out3,$out3,v29
+	vncipher	$out4,$out4,v29
+	vncipher	$out5,$out5,v29
+	vncipher	$out6,$out6,v29
+	vncipher	$out7,$out7,v29
+
+	vncipher	$out1,$out1,v30
+	 vxor		$ivec,$ivec,v31		# last round key
+	vncipher	$out2,$out2,v30
+	 vxor		$in1,$in1,v31
+	vncipher	$out3,$out3,v30
+	 vxor		$in2,$in2,v31
+	vncipher	$out4,$out4,v30
+	 vxor		$in3,$in3,v31
+	vncipher	$out5,$out5,v30
+	 vxor		$in4,$in4,v31
+	vncipher	$out6,$out6,v30
+	 vxor		$in5,$in5,v31
+	vncipher	$out7,$out7,v30
+	 vxor		$in6,$in6,v31
+
+	cmplwi		$len,32			# switch($len)
+	blt		Lcbc_dec8x_one
+	nop
+	beq		Lcbc_dec8x_two
+	cmplwi		$len,64
+	blt		Lcbc_dec8x_three
+	nop
+	beq		Lcbc_dec8x_four
+	cmplwi		$len,96
+	blt		Lcbc_dec8x_five
+	nop
+	beq		Lcbc_dec8x_six
+
+Lcbc_dec8x_seven:
+	vncipherlast	$out1,$out1,$ivec
+	vncipherlast	$out2,$out2,$in1
+	vncipherlast	$out3,$out3,$in2
+	vncipherlast	$out4,$out4,$in3
+	vncipherlast	$out5,$out5,$in4
+	vncipherlast	$out6,$out6,$in5
+	vncipherlast	$out7,$out7,$in6
+	vmr		$ivec,$in7
+
+	le?vperm	$out1,$out1,$out1,$inpperm
+	le?vperm	$out2,$out2,$out2,$inpperm
+	stvx_u		$out1,$x00,$out
+	le?vperm	$out3,$out3,$out3,$inpperm
+	stvx_u		$out2,$x10,$out
+	le?vperm	$out4,$out4,$out4,$inpperm
+	stvx_u		$out3,$x20,$out
+	le?vperm	$out5,$out5,$out5,$inpperm
+	stvx_u		$out4,$x30,$out
+	le?vperm	$out6,$out6,$out6,$inpperm
+	stvx_u		$out5,$x40,$out
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out6,$x50,$out
+	stvx_u		$out7,$x60,$out
+	addi		$out,$out,0x70
+	b		Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_six:
+	vncipherlast	$out2,$out2,$ivec
+	vncipherlast	$out3,$out3,$in2
+	vncipherlast	$out4,$out4,$in3
+	vncipherlast	$out5,$out5,$in4
+	vncipherlast	$out6,$out6,$in5
+	vncipherlast	$out7,$out7,$in6
+	vmr		$ivec,$in7
+
+	le?vperm	$out2,$out2,$out2,$inpperm
+	le?vperm	$out3,$out3,$out3,$inpperm
+	stvx_u		$out2,$x00,$out
+	le?vperm	$out4,$out4,$out4,$inpperm
+	stvx_u		$out3,$x10,$out
+	le?vperm	$out5,$out5,$out5,$inpperm
+	stvx_u		$out4,$x20,$out
+	le?vperm	$out6,$out6,$out6,$inpperm
+	stvx_u		$out5,$x30,$out
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out6,$x40,$out
+	stvx_u		$out7,$x50,$out
+	addi		$out,$out,0x60
+	b		Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_five:
+	vncipherlast	$out3,$out3,$ivec
+	vncipherlast	$out4,$out4,$in3
+	vncipherlast	$out5,$out5,$in4
+	vncipherlast	$out6,$out6,$in5
+	vncipherlast	$out7,$out7,$in6
+	vmr		$ivec,$in7
+
+	le?vperm	$out3,$out3,$out3,$inpperm
+	le?vperm	$out4,$out4,$out4,$inpperm
+	stvx_u		$out3,$x00,$out
+	le?vperm	$out5,$out5,$out5,$inpperm
+	stvx_u		$out4,$x10,$out
+	le?vperm	$out6,$out6,$out6,$inpperm
+	stvx_u		$out5,$x20,$out
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out6,$x30,$out
+	stvx_u		$out7,$x40,$out
+	addi		$out,$out,0x50
+	b		Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_four:
+	vncipherlast	$out4,$out4,$ivec
+	vncipherlast	$out5,$out5,$in4
+	vncipherlast	$out6,$out6,$in5
+	vncipherlast	$out7,$out7,$in6
+	vmr		$ivec,$in7
+
+	le?vperm	$out4,$out4,$out4,$inpperm
+	le?vperm	$out5,$out5,$out5,$inpperm
+	stvx_u		$out4,$x00,$out
+	le?vperm	$out6,$out6,$out6,$inpperm
+	stvx_u		$out5,$x10,$out
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out6,$x20,$out
+	stvx_u		$out7,$x30,$out
+	addi		$out,$out,0x40
+	b		Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_three:
+	vncipherlast	$out5,$out5,$ivec
+	vncipherlast	$out6,$out6,$in5
+	vncipherlast	$out7,$out7,$in6
+	vmr		$ivec,$in7
+
+	le?vperm	$out5,$out5,$out5,$inpperm
+	le?vperm	$out6,$out6,$out6,$inpperm
+	stvx_u		$out5,$x00,$out
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out6,$x10,$out
+	stvx_u		$out7,$x20,$out
+	addi		$out,$out,0x30
+	b		Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_two:
+	vncipherlast	$out6,$out6,$ivec
+	vncipherlast	$out7,$out7,$in6
+	vmr		$ivec,$in7
+
+	le?vperm	$out6,$out6,$out6,$inpperm
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out6,$x00,$out
+	stvx_u		$out7,$x10,$out
+	addi		$out,$out,0x20
+	b		Lcbc_dec8x_done
+
+.align	5
+Lcbc_dec8x_one:
+	vncipherlast	$out7,$out7,$ivec
+	vmr		$ivec,$in7
+
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out7,0,$out
+	addi		$out,$out,0x10
+
+Lcbc_dec8x_done:
+	le?vperm	$ivec,$ivec,$ivec,$inpperm
+	stvx_u		$ivec,0,$ivp		# write [unaligned] iv
+
+	li		r10,`$FRAME+15`
+	li		r11,`$FRAME+31`
+	stvx		$inpperm,r10,$sp	# wipe copies of round keys
+	addi		r10,r10,32
+	stvx		$inpperm,r11,$sp
+	addi		r11,r11,32
+	stvx		$inpperm,r10,$sp
+	addi		r10,r10,32
+	stvx		$inpperm,r11,$sp
+	addi		r11,r11,32
+	stvx		$inpperm,r10,$sp
+	addi		r10,r10,32
+	stvx		$inpperm,r11,$sp
+	addi		r11,r11,32
+	stvx		$inpperm,r10,$sp
+	addi		r10,r10,32
+	stvx		$inpperm,r11,$sp
+	addi		r11,r11,32
+
+	mtspr		256,$vrsave
+	lvx		v20,r10,$sp		# ABI says so
+	addi		r10,r10,32
+	lvx		v21,r11,$sp
+	addi		r11,r11,32
+	lvx		v22,r10,$sp
+	addi		r10,r10,32
+	lvx		v23,r11,$sp
+	addi		r11,r11,32
+	lvx		v24,r10,$sp
+	addi		r10,r10,32
+	lvx		v25,r11,$sp
+	addi		r11,r11,32
+	lvx		v26,r10,$sp
+	addi		r10,r10,32
+	lvx		v27,r11,$sp
+	addi		r11,r11,32
+	lvx		v28,r10,$sp
+	addi		r10,r10,32
+	lvx		v29,r11,$sp
+	addi		r11,r11,32
+	lvx		v30,r10,$sp
+	lvx		v31,r11,$sp
+	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,6,6,0
+	.long		0
+.size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
+___
+}}	}}}
+
+#########################################################################
+{{{	# CTR procedure[s]						#
+my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
+						map("v$_",(4..11));
+my $dat=$tmp;
+
+$code.=<<___;
+.globl	.${prefix}_ctr32_encrypt_blocks
+.align	5
+.${prefix}_ctr32_encrypt_blocks:
+	${UCMP}i	$len,1
+	bltlr-
+
+	lis		r0,0xfff0
+	mfspr		$vrsave,256
+	mtspr		256,r0
+
+	li		$idx,15
+	vxor		$rndkey0,$rndkey0,$rndkey0
+	le?vspltisb	$tmp,0x0f
+
+	lvx		$ivec,0,$ivp		# load [unaligned] iv
+	lvsl		$inpperm,0,$ivp
+	lvx		$inptail,$idx,$ivp
+	 vspltisb	$one,1
+	le?vxor		$inpperm,$inpperm,$tmp
+	vperm		$ivec,$ivec,$inptail,$inpperm
+	 vsldoi		$one,$rndkey0,$one,1
+
+	neg		r11,$inp
+	?lvsl		$keyperm,0,$key		# prepare for unaligned key
+	lwz		$rounds,240($key)
+
+	lvsr		$inpperm,0,r11		# prepare for unaligned load
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,15		# 15 is not typo
+	le?vxor		$inpperm,$inpperm,$tmp
+
+	srwi		$rounds,$rounds,1
+	li		$idx,16
+	subi		$rounds,$rounds,1
+
+	${UCMP}i	$len,8
+	bge		_aesp8_ctr32_encrypt8x
+
+	?lvsr		$outperm,0,$out		# prepare for unaligned store
+	vspltisb	$outmask,-1
+	lvx		$outhead,0,$out
+	?vperm		$outmask,$rndkey0,$outmask,$outperm
+	le?vxor		$outperm,$outperm,$tmp
+
+	lvx		$rndkey0,0,$key
+	mtctr		$rounds
+	lvx		$rndkey1,$idx,$key
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$ivec,$rndkey0
+	lvx		$rndkey0,$idx,$key
+	addi		$idx,$idx,16
+	b		Loop_ctr32_enc
+
+.align	5
+Loop_ctr32_enc:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key
+	addi		$idx,$idx,16
+	bdnz		Loop_ctr32_enc
+
+	vadduwm		$ivec,$ivec,$one
+	 vmr		$dat,$inptail
+	 lvx		$inptail,0,$inp
+	 addi		$inp,$inp,16
+	 subic.		$len,$len,1		# blocks--
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key
+	 vperm		$dat,$dat,$inptail,$inpperm
+	 li		$idx,16
+	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
+	 lvx		$rndkey0,0,$key
+	vxor		$dat,$dat,$rndkey1	# last round key
+	vcipherlast	$inout,$inout,$dat
+
+	 lvx		$rndkey1,$idx,$key
+	 addi		$idx,$idx,16
+	vperm		$inout,$inout,$inout,$outperm
+	vsel		$dat,$outhead,$inout,$outmask
+	 mtctr		$rounds
+	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vmr		$outhead,$inout
+	 vxor		$inout,$ivec,$rndkey0
+	 lvx		$rndkey0,$idx,$key
+	 addi		$idx,$idx,16
+	stvx		$dat,0,$out
+	addi		$out,$out,16
+	bne		Loop_ctr32_enc
+
+	addi		$out,$out,-1
+	lvx		$inout,0,$out		# redundant in aligned case
+	vsel		$inout,$outhead,$inout,$outmask
+	stvx		$inout,0,$out
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,6,0
+	.long		0
+___
+#########################################################################
+{{	# Optimized CTR procedure					#
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+    $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
+my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
+			# v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
+my ($two,$three,$four)=($outhead,$outperm,$outmask);
+
+$code.=<<___;
+.align	5
+_aesp8_ctr32_encrypt8x:
+	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+	li		r10,`$FRAME+8*16+15`
+	li		r11,`$FRAME+8*16+31`
+	stvx		v20,r10,$sp		# ABI says so
+	addi		r10,r10,32
+	stvx		v21,r11,$sp
+	addi		r11,r11,32
+	stvx		v22,r10,$sp
+	addi		r10,r10,32
+	stvx		v23,r11,$sp
+	addi		r11,r11,32
+	stvx		v24,r10,$sp
+	addi		r10,r10,32
+	stvx		v25,r11,$sp
+	addi		r11,r11,32
+	stvx		v26,r10,$sp
+	addi		r10,r10,32
+	stvx		v27,r11,$sp
+	addi		r11,r11,32
+	stvx		v28,r10,$sp
+	addi		r10,r10,32
+	stvx		v29,r11,$sp
+	addi		r11,r11,32
+	stvx		v30,r10,$sp
+	stvx		v31,r11,$sp
+	li		r0,-1
+	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
+	li		$x10,0x10
+	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	li		$x20,0x20
+	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	li		$x30,0x30
+	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	li		$x40,0x40
+	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	li		$x50,0x50
+	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	li		$x60,0x60
+	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	li		$x70,0x70
+	mtspr		256,r0
+
+	subi		$rounds,$rounds,3	# -4 in total
+
+	lvx		$rndkey0,$x00,$key	# load key schedule
+	lvx		v30,$x10,$key
+	addi		$key,$key,0x20
+	lvx		v31,$x00,$key
+	?vperm		$rndkey0,$rndkey0,v30,$keyperm
+	addi		$key_,$sp,`$FRAME+15`
+	mtctr		$rounds
+
+Load_ctr32_enc_key:
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v30,$x10,$key
+	addi		$key,$key,0x20
+	stvx		v24,$x00,$key_		# off-load round[1]
+	?vperm		v25,v31,v30,$keyperm
+	lvx		v31,$x00,$key
+	stvx		v25,$x10,$key_		# off-load round[2]
+	addi		$key_,$key_,0x20
+	bdnz		Load_ctr32_enc_key
+
+	lvx		v26,$x10,$key
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v27,$x20,$key
+	stvx		v24,$x00,$key_		# off-load round[3]
+	?vperm		v25,v31,v26,$keyperm
+	lvx		v28,$x30,$key
+	stvx		v25,$x10,$key_		# off-load round[4]
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	?vperm		v26,v26,v27,$keyperm
+	lvx		v29,$x40,$key
+	?vperm		v27,v27,v28,$keyperm
+	lvx		v30,$x50,$key
+	?vperm		v28,v28,v29,$keyperm
+	lvx		v31,$x60,$key
+	?vperm		v29,v29,v30,$keyperm
+	lvx		$out0,$x70,$key		# borrow $out0
+	?vperm		v30,v30,v31,$keyperm
+	lvx		v24,$x00,$key_		# pre-load round[1]
+	?vperm		v31,v31,$out0,$keyperm
+	lvx		v25,$x10,$key_		# pre-load round[2]
+
+	vadduwm		$two,$one,$one
+	subi		$inp,$inp,15		# undo "caller"
+	$SHL		$len,$len,4
+
+	vadduwm		$out1,$ivec,$one	# counter values ...
+	vadduwm		$out2,$ivec,$two
+	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
+	 le?li		$idx,8
+	vadduwm		$out3,$out1,$two
+	vxor		$out1,$out1,$rndkey0
+	 le?lvsl	$inpperm,0,$idx
+	vadduwm		$out4,$out2,$two
+	vxor		$out2,$out2,$rndkey0
+	 le?vspltisb	$tmp,0x0f
+	vadduwm		$out5,$out3,$two
+	vxor		$out3,$out3,$rndkey0
+	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
+	vadduwm		$out6,$out4,$two
+	vxor		$out4,$out4,$rndkey0
+	vadduwm		$out7,$out5,$two
+	vxor		$out5,$out5,$rndkey0
+	vadduwm		$ivec,$out6,$two	# next counter value
+	vxor		$out6,$out6,$rndkey0
+	vxor		$out7,$out7,$rndkey0
+
+	mtctr		$rounds
+	b		Loop_ctr32_enc8x
+.align	5
+Loop_ctr32_enc8x:
+	vcipher 	$out0,$out0,v24
+	vcipher 	$out1,$out1,v24
+	vcipher 	$out2,$out2,v24
+	vcipher 	$out3,$out3,v24
+	vcipher 	$out4,$out4,v24
+	vcipher 	$out5,$out5,v24
+	vcipher 	$out6,$out6,v24
+	vcipher 	$out7,$out7,v24
+Loop_ctr32_enc8x_middle:
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vcipher 	$out0,$out0,v25
+	vcipher 	$out1,$out1,v25
+	vcipher 	$out2,$out2,v25
+	vcipher 	$out3,$out3,v25
+	vcipher 	$out4,$out4,v25
+	vcipher 	$out5,$out5,v25
+	vcipher 	$out6,$out6,v25
+	vcipher 	$out7,$out7,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_ctr32_enc8x
+
+	subic		r11,$len,256		# $len-256, borrow $key_
+	vcipher 	$out0,$out0,v24
+	vcipher 	$out1,$out1,v24
+	vcipher 	$out2,$out2,v24
+	vcipher 	$out3,$out3,v24
+	vcipher 	$out4,$out4,v24
+	vcipher 	$out5,$out5,v24
+	vcipher 	$out6,$out6,v24
+	vcipher 	$out7,$out7,v24
+
+	subfe		r0,r0,r0		# borrow?-1:0
+	vcipher 	$out0,$out0,v25
+	vcipher 	$out1,$out1,v25
+	vcipher 	$out2,$out2,v25
+	vcipher 	$out3,$out3,v25
+	vcipher 	$out4,$out4,v25
+	vcipher		$out5,$out5,v25
+	vcipher		$out6,$out6,v25
+	vcipher		$out7,$out7,v25
+
+	and		r0,r0,r11
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	vcipher		$out0,$out0,v26
+	vcipher		$out1,$out1,v26
+	vcipher		$out2,$out2,v26
+	vcipher		$out3,$out3,v26
+	vcipher		$out4,$out4,v26
+	vcipher		$out5,$out5,v26
+	vcipher		$out6,$out6,v26
+	vcipher		$out7,$out7,v26
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+
+	subic		$len,$len,129		# $len-=129
+	vcipher		$out0,$out0,v27
+	addi		$len,$len,1		# $len-=128 really
+	vcipher		$out1,$out1,v27
+	vcipher		$out2,$out2,v27
+	vcipher		$out3,$out3,v27
+	vcipher		$out4,$out4,v27
+	vcipher		$out5,$out5,v27
+	vcipher		$out6,$out6,v27
+	vcipher		$out7,$out7,v27
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+
+	vcipher		$out0,$out0,v28
+	 lvx_u		$in0,$x00,$inp		# load input
+	vcipher		$out1,$out1,v28
+	 lvx_u		$in1,$x10,$inp
+	vcipher		$out2,$out2,v28
+	 lvx_u		$in2,$x20,$inp
+	vcipher		$out3,$out3,v28
+	 lvx_u		$in3,$x30,$inp
+	vcipher		$out4,$out4,v28
+	 lvx_u		$in4,$x40,$inp
+	vcipher		$out5,$out5,v28
+	 lvx_u		$in5,$x50,$inp
+	vcipher		$out6,$out6,v28
+	 lvx_u		$in6,$x60,$inp
+	vcipher		$out7,$out7,v28
+	 lvx_u		$in7,$x70,$inp
+	 addi		$inp,$inp,0x80
+
+	vcipher		$out0,$out0,v29
+	 le?vperm	$in0,$in0,$in0,$inpperm
+	vcipher		$out1,$out1,v29
+	 le?vperm	$in1,$in1,$in1,$inpperm
+	vcipher		$out2,$out2,v29
+	 le?vperm	$in2,$in2,$in2,$inpperm
+	vcipher		$out3,$out3,v29
+	 le?vperm	$in3,$in3,$in3,$inpperm
+	vcipher		$out4,$out4,v29
+	 le?vperm	$in4,$in4,$in4,$inpperm
+	vcipher		$out5,$out5,v29
+	 le?vperm	$in5,$in5,$in5,$inpperm
+	vcipher		$out6,$out6,v29
+	 le?vperm	$in6,$in6,$in6,$inpperm
+	vcipher		$out7,$out7,v29
+	 le?vperm	$in7,$in7,$in7,$inpperm
+
+	add		$inp,$inp,r0		# $inp is adjusted in such
+						# way that at exit from the
+						# loop inX-in7 are loaded
+						# with last "words"
+	subfe.		r0,r0,r0		# borrow?-1:0
+	vcipher		$out0,$out0,v30
+	 vxor		$in0,$in0,v31		# xor with last round key
+	vcipher		$out1,$out1,v30
+	 vxor		$in1,$in1,v31
+	vcipher		$out2,$out2,v30
+	 vxor		$in2,$in2,v31
+	vcipher		$out3,$out3,v30
+	 vxor		$in3,$in3,v31
+	vcipher		$out4,$out4,v30
+	 vxor		$in4,$in4,v31
+	vcipher		$out5,$out5,v30
+	 vxor		$in5,$in5,v31
+	vcipher		$out6,$out6,v30
+	 vxor		$in6,$in6,v31
+	vcipher		$out7,$out7,v30
+	 vxor		$in7,$in7,v31
+
+	bne		Lctr32_enc8x_break	# did $len-129 borrow?
+
+	vcipherlast	$in0,$out0,$in0
+	vcipherlast	$in1,$out1,$in1
+	 vadduwm	$out1,$ivec,$one	# counter values ...
+	vcipherlast	$in2,$out2,$in2
+	 vadduwm	$out2,$ivec,$two
+	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
+	vcipherlast	$in3,$out3,$in3
+	 vadduwm	$out3,$out1,$two
+	 vxor		$out1,$out1,$rndkey0
+	vcipherlast	$in4,$out4,$in4
+	 vadduwm	$out4,$out2,$two
+	 vxor		$out2,$out2,$rndkey0
+	vcipherlast	$in5,$out5,$in5
+	 vadduwm	$out5,$out3,$two
+	 vxor		$out3,$out3,$rndkey0
+	vcipherlast	$in6,$out6,$in6
+	 vadduwm	$out6,$out4,$two
+	 vxor		$out4,$out4,$rndkey0
+	vcipherlast	$in7,$out7,$in7
+	 vadduwm	$out7,$out5,$two
+	 vxor		$out5,$out5,$rndkey0
+	le?vperm	$in0,$in0,$in0,$inpperm
+	 vadduwm	$ivec,$out6,$two	# next counter value
+	 vxor		$out6,$out6,$rndkey0
+	le?vperm	$in1,$in1,$in1,$inpperm
+	 vxor		$out7,$out7,$rndkey0
+	mtctr		$rounds
+
+	 vcipher	$out0,$out0,v24
+	stvx_u		$in0,$x00,$out
+	le?vperm	$in2,$in2,$in2,$inpperm
+	 vcipher	$out1,$out1,v24
+	stvx_u		$in1,$x10,$out
+	le?vperm	$in3,$in3,$in3,$inpperm
+	 vcipher	$out2,$out2,v24
+	stvx_u		$in2,$x20,$out
+	le?vperm	$in4,$in4,$in4,$inpperm
+	 vcipher	$out3,$out3,v24
+	stvx_u		$in3,$x30,$out
+	le?vperm	$in5,$in5,$in5,$inpperm
+	 vcipher	$out4,$out4,v24
+	stvx_u		$in4,$x40,$out
+	le?vperm	$in6,$in6,$in6,$inpperm
+	 vcipher	$out5,$out5,v24
+	stvx_u		$in5,$x50,$out
+	le?vperm	$in7,$in7,$in7,$inpperm
+	 vcipher	$out6,$out6,v24
+	stvx_u		$in6,$x60,$out
+	 vcipher	$out7,$out7,v24
+	stvx_u		$in7,$x70,$out
+	addi		$out,$out,0x80
+
+	b		Loop_ctr32_enc8x_middle
+
+.align	5
+Lctr32_enc8x_break:
+	cmpwi		$len,-0x60
+	blt		Lctr32_enc8x_one
+	nop
+	beq		Lctr32_enc8x_two
+	cmpwi		$len,-0x40
+	blt		Lctr32_enc8x_three
+	nop
+	beq		Lctr32_enc8x_four
+	cmpwi		$len,-0x20
+	blt		Lctr32_enc8x_five
+	nop
+	beq		Lctr32_enc8x_six
+	cmpwi		$len,0x00
+	blt		Lctr32_enc8x_seven
+
+Lctr32_enc8x_eight:
+	vcipherlast	$out0,$out0,$in0
+	vcipherlast	$out1,$out1,$in1
+	vcipherlast	$out2,$out2,$in2
+	vcipherlast	$out3,$out3,$in3
+	vcipherlast	$out4,$out4,$in4
+	vcipherlast	$out5,$out5,$in5
+	vcipherlast	$out6,$out6,$in6
+	vcipherlast	$out7,$out7,$in7
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	le?vperm	$out1,$out1,$out1,$inpperm
+	stvx_u		$out0,$x00,$out
+	le?vperm	$out2,$out2,$out2,$inpperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$inpperm
+	stvx_u		$out2,$x20,$out
+	le?vperm	$out4,$out4,$out4,$inpperm
+	stvx_u		$out3,$x30,$out
+	le?vperm	$out5,$out5,$out5,$inpperm
+	stvx_u		$out4,$x40,$out
+	le?vperm	$out6,$out6,$out6,$inpperm
+	stvx_u		$out5,$x50,$out
+	le?vperm	$out7,$out7,$out7,$inpperm
+	stvx_u		$out6,$x60,$out
+	stvx_u		$out7,$x70,$out
+	addi		$out,$out,0x80
+	b		Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_seven:
+	vcipherlast	$out0,$out0,$in1
+	vcipherlast	$out1,$out1,$in2
+	vcipherlast	$out2,$out2,$in3
+	vcipherlast	$out3,$out3,$in4
+	vcipherlast	$out4,$out4,$in5
+	vcipherlast	$out5,$out5,$in6
+	vcipherlast	$out6,$out6,$in7
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	le?vperm	$out1,$out1,$out1,$inpperm
+	stvx_u		$out0,$x00,$out
+	le?vperm	$out2,$out2,$out2,$inpperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$inpperm
+	stvx_u		$out2,$x20,$out
+	le?vperm	$out4,$out4,$out4,$inpperm
+	stvx_u		$out3,$x30,$out
+	le?vperm	$out5,$out5,$out5,$inpperm
+	stvx_u		$out4,$x40,$out
+	le?vperm	$out6,$out6,$out6,$inpperm
+	stvx_u		$out5,$x50,$out
+	stvx_u		$out6,$x60,$out
+	addi		$out,$out,0x70
+	b		Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_six:
+	vcipherlast	$out0,$out0,$in2
+	vcipherlast	$out1,$out1,$in3
+	vcipherlast	$out2,$out2,$in4
+	vcipherlast	$out3,$out3,$in5
+	vcipherlast	$out4,$out4,$in6
+	vcipherlast	$out5,$out5,$in7
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	le?vperm	$out1,$out1,$out1,$inpperm
+	stvx_u		$out0,$x00,$out
+	le?vperm	$out2,$out2,$out2,$inpperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$inpperm
+	stvx_u		$out2,$x20,$out
+	le?vperm	$out4,$out4,$out4,$inpperm
+	stvx_u		$out3,$x30,$out
+	le?vperm	$out5,$out5,$out5,$inpperm
+	stvx_u		$out4,$x40,$out
+	stvx_u		$out5,$x50,$out
+	addi		$out,$out,0x60
+	b		Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_five:
+	vcipherlast	$out0,$out0,$in3
+	vcipherlast	$out1,$out1,$in4
+	vcipherlast	$out2,$out2,$in5
+	vcipherlast	$out3,$out3,$in6
+	vcipherlast	$out4,$out4,$in7
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	le?vperm	$out1,$out1,$out1,$inpperm
+	stvx_u		$out0,$x00,$out
+	le?vperm	$out2,$out2,$out2,$inpperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$inpperm
+	stvx_u		$out2,$x20,$out
+	le?vperm	$out4,$out4,$out4,$inpperm
+	stvx_u		$out3,$x30,$out
+	stvx_u		$out4,$x40,$out
+	addi		$out,$out,0x50
+	b		Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_four:
+	vcipherlast	$out0,$out0,$in4
+	vcipherlast	$out1,$out1,$in5
+	vcipherlast	$out2,$out2,$in6
+	vcipherlast	$out3,$out3,$in7
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	le?vperm	$out1,$out1,$out1,$inpperm
+	stvx_u		$out0,$x00,$out
+	le?vperm	$out2,$out2,$out2,$inpperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$inpperm
+	stvx_u		$out2,$x20,$out
+	stvx_u		$out3,$x30,$out
+	addi		$out,$out,0x40
+	b		Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_three:
+	vcipherlast	$out0,$out0,$in5
+	vcipherlast	$out1,$out1,$in6
+	vcipherlast	$out2,$out2,$in7
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	le?vperm	$out1,$out1,$out1,$inpperm
+	stvx_u		$out0,$x00,$out
+	le?vperm	$out2,$out2,$out2,$inpperm
+	stvx_u		$out1,$x10,$out
+	stvx_u		$out2,$x20,$out
+	addi		$out,$out,0x30
+	b		Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_two:
+	vcipherlast	$out0,$out0,$in6
+	vcipherlast	$out1,$out1,$in7
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	le?vperm	$out1,$out1,$out1,$inpperm
+	stvx_u		$out0,$x00,$out
+	stvx_u		$out1,$x10,$out
+	addi		$out,$out,0x20
+	b		Lctr32_enc8x_done
+
+.align	5
+Lctr32_enc8x_one:
+	vcipherlast	$out0,$out0,$in7
+
+	le?vperm	$out0,$out0,$out0,$inpperm
+	stvx_u		$out0,0,$out
+	addi		$out,$out,0x10
+
+Lctr32_enc8x_done:
+	li		r10,`$FRAME+15`
+	li		r11,`$FRAME+31`
+	stvx		$inpperm,r10,$sp	# wipe copies of round keys
+	addi		r10,r10,32
+	stvx		$inpperm,r11,$sp
+	addi		r11,r11,32
+	stvx		$inpperm,r10,$sp
+	addi		r10,r10,32
+	stvx		$inpperm,r11,$sp
+	addi		r11,r11,32
+	stvx		$inpperm,r10,$sp
+	addi		r10,r10,32
+	stvx		$inpperm,r11,$sp
+	addi		r11,r11,32
+	stvx		$inpperm,r10,$sp
+	addi		r10,r10,32
+	stvx		$inpperm,r11,$sp
+	addi		r11,r11,32
+
+	mtspr		256,$vrsave
+	lvx		v20,r10,$sp		# ABI says so
+	addi		r10,r10,32
+	lvx		v21,r11,$sp
+	addi		r11,r11,32
+	lvx		v22,r10,$sp
+	addi		r10,r10,32
+	lvx		v23,r11,$sp
+	addi		r11,r11,32
+	lvx		v24,r10,$sp
+	addi		r10,r10,32
+	lvx		v25,r11,$sp
+	addi		r11,r11,32
+	lvx		v26,r10,$sp
+	addi		r10,r10,32
+	lvx		v27,r11,$sp
+	addi		r11,r11,32
+	lvx		v28,r10,$sp
+	addi		r10,r10,32
+	lvx		v29,r11,$sp
+	addi		r11,r11,32
+	lvx		v30,r10,$sp
+	lvx		v31,r11,$sp
+	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,6,6,0
+	.long		0
+.size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
+___
+}}	}}}
+
+#########################################################################
+{{{	# XTS procedures						#
+# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
+#                             const AES_KEY *key1, const AES_KEY *key2,	#
+#                             [const] unsigned char iv[16]);		#
+# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
+# input tweak value is assumed to be encrypted already, and last tweak	#
+# value, one suitable for consecutive call on same chunk of data, is	#
+# written back to original buffer. In addition, in "tweak chaining"	#
+# mode only complete input blocks are processed.			#
+
+my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
+my ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
+my ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
+my $taillen = $key2;
+
+   ($inp,$idx) = ($idx,$inp);				# reassign
+
+$code.=<<___;
+.globl	.${prefix}_xts_encrypt
+.align	5
+.${prefix}_xts_encrypt:
+	mr		$inp,r3				# reassign
+	li		r3,-1
+	${UCMP}i	$len,16
+	bltlr-
+
+	lis		r0,0xfff0
+	mfspr		r12,256				# save vrsave
+	li		r11,0
+	mtspr		256,r0
+
+	vspltisb	$seven,0x07			# 0x070707..07
+	le?lvsl		$leperm,r11,r11
+	le?vspltisb	$tmp,0x0f
+	le?vxor		$leperm,$leperm,$seven
+
+	li		$idx,15
+	lvx		$tweak,0,$ivp			# load [unaligned] iv
+	lvsl		$inpperm,0,$ivp
+	lvx		$inptail,$idx,$ivp
+	le?vxor		$inpperm,$inpperm,$tmp
+	vperm		$tweak,$tweak,$inptail,$inpperm
+
+	neg		r11,$inp
+	lvsr		$inpperm,0,r11			# prepare for unaligned load
+	lvx		$inout,0,$inp
+	addi		$inp,$inp,15			# 15 is not typo
+	le?vxor		$inpperm,$inpperm,$tmp
+
+	${UCMP}i	$key2,0				# key2==NULL?
+	beq		Lxts_enc_no_key2
+
+	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
+	lwz		$rounds,240($key2)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	lvx		$rndkey0,0,$key2
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+Ltweak_xts_enc:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	bdnz		Ltweak_xts_enc
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipherlast	$tweak,$tweak,$rndkey0
+
+	li		$ivp,0				# don't chain the tweak
+	b		Lxts_enc
+
+Lxts_enc_no_key2:
+	li		$idx,-16
+	and		$len,$len,$idx			# in "tweak chaining"
+							# mode only complete
+							# blocks are processed
+Lxts_enc:
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+
+	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
+	lwz		$rounds,240($key1)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	vslb		$eighty7,$seven,$seven		# 0x808080..80
+	vor		$eighty7,$eighty7,$seven	# 0x878787..87
+	vspltisb	$tmp,1				# 0x010101..01
+	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
+
+	${UCMP}i	$len,96
+	bge		_aesp8_xts_encrypt6x
+
+	andi.		$taillen,$len,15
+	subic		r0,$len,32
+	subi		$taillen,$taillen,16
+	subfe		r0,r0,r0
+	and		r0,r0,$taillen
+	add		$inp,$inp,r0
+
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	mtctr		$rounds
+	b		Loop_xts_enc
+
+.align	5
+Loop_xts_enc:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_enc
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak
+	vcipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+	addi		$out,$out,16
+
+	subic.		$len,$len,16
+	beq		Lxts_enc_done
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+
+	subic		r0,$len,32
+	subfe		r0,r0,r0
+	and		r0,r0,$taillen
+	add		$inp,$inp,r0
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$output,$output,$rndkey0	# just in case $len<16
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	mtctr		$rounds
+	${UCMP}i	$len,16
+	bge		Loop_xts_enc
+
+	vxor		$output,$output,$tweak
+	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
+	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
+	vspltisb	$tmp,-1
+	vperm		$inptail,$inptail,$tmp,$inpperm
+	vsel		$inout,$inout,$output,$inptail
+
+	subi		r11,$out,17
+	subi		$out,$out,16
+	mtctr		$len
+	li		$len,16
+Loop_xts_enc_steal:
+	lbzu		r0,1(r11)
+	stb		r0,16(r11)
+	bdnz		Loop_xts_enc_steal
+
+	mtctr		$rounds
+	b		Loop_xts_enc			# one more time...
+
+Lxts_enc_done:
+	${UCMP}i	$ivp,0
+	beq		Lxts_enc_ret
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	le?vperm	$tweak,$tweak,$tweak,$leperm
+	stvx_u		$tweak,0,$ivp
+
+Lxts_enc_ret:
+	mtspr		256,r12				# restore vrsave
+	li		r3,0
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,6,6,0
+	.long		0
+.size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
+
+.globl	.${prefix}_xts_decrypt
+.align	5
+.${prefix}_xts_decrypt:
+	mr		$inp,r3				# reassign
+	li		r3,-1
+	${UCMP}i	$len,16
+	bltlr-
+
+	lis		r0,0xfff8
+	mfspr		r12,256				# save vrsave
+	li		r11,0
+	mtspr		256,r0
+
+	andi.		r0,$len,15
+	neg		r0,r0
+	andi.		r0,r0,16
+	sub		$len,$len,r0
+
+	vspltisb	$seven,0x07			# 0x070707..07
+	le?lvsl		$leperm,r11,r11
+	le?vspltisb	$tmp,0x0f
+	le?vxor		$leperm,$leperm,$seven
+
+	li		$idx,15
+	lvx		$tweak,0,$ivp			# load [unaligned] iv
+	lvsl		$inpperm,0,$ivp
+	lvx		$inptail,$idx,$ivp
+	le?vxor		$inpperm,$inpperm,$tmp
+	vperm		$tweak,$tweak,$inptail,$inpperm
+
+	neg		r11,$inp
+	lvsr		$inpperm,0,r11			# prepare for unaligned load
+	lvx		$inout,0,$inp
+	addi		$inp,$inp,15			# 15 is not typo
+	le?vxor		$inpperm,$inpperm,$tmp
+
+	${UCMP}i	$key2,0				# key2==NULL?
+	beq		Lxts_dec_no_key2
+
+	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
+	lwz		$rounds,240($key2)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	lvx		$rndkey0,0,$key2
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+Ltweak_xts_dec:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipher		$tweak,$tweak,$rndkey0
+	lvx		$rndkey0,$idx,$key2
+	addi		$idx,$idx,16
+	bdnz		Ltweak_xts_dec
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vcipher		$tweak,$tweak,$rndkey1
+	lvx		$rndkey1,$idx,$key2
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vcipherlast	$tweak,$tweak,$rndkey0
+
+	li		$ivp,0				# don't chain the tweak
+	b		Lxts_dec
+
+Lxts_dec_no_key2:
+	neg		$idx,$len
+	andi.		$idx,$idx,15
+	add		$len,$len,$idx			# in "tweak chaining"
+							# mode only complete
+							# blocks are processed
+Lxts_dec:
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+
+	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
+	lwz		$rounds,240($key1)
+	srwi		$rounds,$rounds,1
+	subi		$rounds,$rounds,1
+	li		$idx,16
+
+	vslb		$eighty7,$seven,$seven		# 0x808080..80
+	vor		$eighty7,$eighty7,$seven	# 0x878787..87
+	vspltisb	$tmp,1				# 0x010101..01
+	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
+
+	${UCMP}i	$len,96
+	bge		_aesp8_xts_decrypt6x
+
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	mtctr		$rounds
+
+	${UCMP}i	$len,16
+	blt		Ltail_xts_dec
+	be?b		Loop_xts_dec
+
+.align	5
+Loop_xts_dec:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vncipher	$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_dec
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak
+	vncipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+	addi		$out,$out,16
+
+	subic.		$len,$len,16
+	beq		Lxts_dec_done
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$inout,$inout,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	mtctr		$rounds
+	${UCMP}i	$len,16
+	bge		Loop_xts_dec
+
+Ltail_xts_dec:
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak1,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak1,$tweak1,$tmp
+
+	subi		$inp,$inp,16
+	add		$inp,$inp,$len
+
+	vxor		$inout,$inout,$tweak		# :-(
+	vxor		$inout,$inout,$tweak1		# :-)
+
+Loop_xts_dec_short:
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vncipher	$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+	bdnz		Loop_xts_dec_short
+
+	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
+	vncipher	$inout,$inout,$rndkey1
+	lvx		$rndkey1,$idx,$key1
+	li		$idx,16
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+	vxor		$rndkey0,$rndkey0,$tweak1
+	vncipherlast	$output,$inout,$rndkey0
+
+	le?vperm	$tmp,$output,$output,$leperm
+	be?nop
+	le?stvx_u	$tmp,0,$out
+	be?stvx_u	$output,0,$out
+
+	vmr		$inout,$inptail
+	lvx		$inptail,0,$inp
+	#addi		$inp,$inp,16
+	lvx		$rndkey0,0,$key1
+	lvx		$rndkey1,$idx,$key1
+	addi		$idx,$idx,16
+	vperm		$inout,$inout,$inptail,$inpperm
+	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
+
+	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
+	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
+	vspltisb	$tmp,-1
+	vperm		$inptail,$inptail,$tmp,$inpperm
+	vsel		$inout,$inout,$output,$inptail
+
+	vxor		$rndkey0,$rndkey0,$tweak
+	vxor		$inout,$inout,$rndkey0
+	lvx		$rndkey0,$idx,$key1
+	addi		$idx,$idx,16
+
+	subi		r11,$out,1
+	mtctr		$len
+	li		$len,16
+Loop_xts_dec_steal:
+	lbzu		r0,1(r11)
+	stb		r0,16(r11)
+	bdnz		Loop_xts_dec_steal
+
+	mtctr		$rounds
+	b		Loop_xts_dec			# one more time...
+
+Lxts_dec_done:
+	${UCMP}i	$ivp,0
+	beq		Lxts_dec_ret
+
+	vsrab		$tmp,$tweak,$seven		# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	vxor		$tweak,$tweak,$tmp
+
+	le?vperm	$tweak,$tweak,$tweak,$leperm
+	stvx_u		$tweak,0,$ivp
+
+Lxts_dec_ret:
+	mtspr		256,r12				# restore vrsave
+	li		r3,0
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,6,6,0
+	.long		0
+.size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
+___
+#########################################################################
+{{	# Optimized XTS procedures					#
+my $key_=$key2;
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
+    $x00=0 if ($flavour =~ /osx/);
+my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
+my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
+my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
+my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
+			# v26-v31 last 6 round keys
+my ($keyperm)=($out0);	# aliases with "caller", redundant assignment
+my $taillen=$x70;
+
+$code.=<<___;
+.align	5
+_aesp8_xts_encrypt6x:
+	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+	mflr		r11
+	li		r7,`$FRAME+8*16+15`
+	li		r3,`$FRAME+8*16+31`
+	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+	stvx		v20,r7,$sp		# ABI says so
+	addi		r7,r7,32
+	stvx		v21,r3,$sp
+	addi		r3,r3,32
+	stvx		v22,r7,$sp
+	addi		r7,r7,32
+	stvx		v23,r3,$sp
+	addi		r3,r3,32
+	stvx		v24,r7,$sp
+	addi		r7,r7,32
+	stvx		v25,r3,$sp
+	addi		r3,r3,32
+	stvx		v26,r7,$sp
+	addi		r7,r7,32
+	stvx		v27,r3,$sp
+	addi		r3,r3,32
+	stvx		v28,r7,$sp
+	addi		r7,r7,32
+	stvx		v29,r3,$sp
+	addi		r3,r3,32
+	stvx		v30,r7,$sp
+	stvx		v31,r3,$sp
+	li		r0,-1
+	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
+	li		$x10,0x10
+	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	li		$x20,0x20
+	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	li		$x30,0x30
+	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	li		$x40,0x40
+	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	li		$x50,0x50
+	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	li		$x60,0x60
+	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	li		$x70,0x70
+	mtspr		256,r0
+
+	subi		$rounds,$rounds,3	# -4 in total
+
+	lvx		$rndkey0,$x00,$key1	# load key schedule
+	lvx		v30,$x10,$key1
+	addi		$key1,$key1,0x20
+	lvx		v31,$x00,$key1
+	?vperm		$rndkey0,$rndkey0,v30,$keyperm
+	addi		$key_,$sp,`$FRAME+15`
+	mtctr		$rounds
+
+Load_xts_enc_key:
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v30,$x10,$key1
+	addi		$key1,$key1,0x20
+	stvx		v24,$x00,$key_		# off-load round[1]
+	?vperm		v25,v31,v30,$keyperm
+	lvx		v31,$x00,$key1
+	stvx		v25,$x10,$key_		# off-load round[2]
+	addi		$key_,$key_,0x20
+	bdnz		Load_xts_enc_key
+
+	lvx		v26,$x10,$key1
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v27,$x20,$key1
+	stvx		v24,$x00,$key_		# off-load round[3]
+	?vperm		v25,v31,v26,$keyperm
+	lvx		v28,$x30,$key1
+	stvx		v25,$x10,$key_		# off-load round[4]
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	?vperm		v26,v26,v27,$keyperm
+	lvx		v29,$x40,$key1
+	?vperm		v27,v27,v28,$keyperm
+	lvx		v30,$x50,$key1
+	?vperm		v28,v28,v29,$keyperm
+	lvx		v31,$x60,$key1
+	?vperm		v29,v29,v30,$keyperm
+	lvx		$twk5,$x70,$key1	# borrow $twk5
+	?vperm		v30,v30,v31,$keyperm
+	lvx		v24,$x00,$key_		# pre-load round[1]
+	?vperm		v31,v31,$twk5,$keyperm
+	lvx		v25,$x10,$key_		# pre-load round[2]
+
+	 vperm		$in0,$inout,$inptail,$inpperm
+	 subi		$inp,$inp,31		# undo "caller"
+	vxor		$twk0,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out0,$in0,$twk0
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in1,$x10,$inp
+	vxor		$twk1,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in1,$in1,$in1,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out1,$in1,$twk1
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in2,$x20,$inp
+	 andi.		$taillen,$len,15
+	vxor		$twk2,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in2,$in2,$in2,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out2,$in2,$twk2
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in3,$x30,$inp
+	 sub		$len,$len,$taillen
+	vxor		$twk3,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in3,$in3,$in3,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out3,$in3,$twk3
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in4,$x40,$inp
+	 subi		$len,$len,0x60
+	vxor		$twk4,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in4,$in4,$in4,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out4,$in4,$twk4
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in5,$x50,$inp
+	 addi		$inp,$inp,0x60
+	vxor		$twk5,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in5,$in5,$in5,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out5,$in5,$twk5
+	vxor		$tweak,$tweak,$tmp
+
+	vxor		v31,v31,$rndkey0
+	mtctr		$rounds
+	b		Loop_xts_enc6x
+
+.align	5
+Loop_xts_enc6x:
+	vcipher		$out0,$out0,v24
+	vcipher		$out1,$out1,v24
+	vcipher		$out2,$out2,v24
+	vcipher		$out3,$out3,v24
+	vcipher		$out4,$out4,v24
+	vcipher		$out5,$out5,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vcipher		$out0,$out0,v25
+	vcipher		$out1,$out1,v25
+	vcipher		$out2,$out2,v25
+	vcipher		$out3,$out3,v25
+	vcipher		$out4,$out4,v25
+	vcipher		$out5,$out5,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_xts_enc6x
+
+	subic		$len,$len,96		# $len-=96
+	 vxor		$in0,$twk0,v31		# xor with last round key
+	vcipher		$out0,$out0,v24
+	vcipher		$out1,$out1,v24
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk0,$tweak,$rndkey0
+	 vaddubm	$tweak,$tweak,$tweak
+	vcipher		$out2,$out2,v24
+	vcipher		$out3,$out3,v24
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipher		$out4,$out4,v24
+	vcipher		$out5,$out5,v24
+
+	subfe.		r0,r0,r0		# borrow?-1:0
+	 vand		$tmp,$tmp,$eighty7
+	vcipher		$out0,$out0,v25
+	vcipher		$out1,$out1,v25
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out2,$out2,v25
+	vcipher		$out3,$out3,v25
+	 vxor		$in1,$twk1,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk1,$tweak,$rndkey0
+	vcipher		$out4,$out4,v25
+	vcipher		$out5,$out5,v25
+
+	and		r0,r0,$len
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipher		$out0,$out0,v26
+	vcipher		$out1,$out1,v26
+	 vand		$tmp,$tmp,$eighty7
+	vcipher		$out2,$out2,v26
+	vcipher		$out3,$out3,v26
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out4,$out4,v26
+	vcipher		$out5,$out5,v26
+
+	add		$inp,$inp,r0		# $inp is adjusted in such
+						# way that at exit from the
+						# loop inX-in5 are loaded
+						# with last "words"
+	 vxor		$in2,$twk2,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk2,$tweak,$rndkey0
+	 vaddubm	$tweak,$tweak,$tweak
+	vcipher		$out0,$out0,v27
+	vcipher		$out1,$out1,v27
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipher		$out2,$out2,v27
+	vcipher		$out3,$out3,v27
+	 vand		$tmp,$tmp,$eighty7
+	vcipher		$out4,$out4,v27
+	vcipher		$out5,$out5,v27
+
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out0,$out0,v28
+	vcipher		$out1,$out1,v28
+	 vxor		$in3,$twk3,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk3,$tweak,$rndkey0
+	vcipher		$out2,$out2,v28
+	vcipher		$out3,$out3,v28
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipher		$out4,$out4,v28
+	vcipher		$out5,$out5,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+	 vand		$tmp,$tmp,$eighty7
+
+	vcipher		$out0,$out0,v29
+	vcipher		$out1,$out1,v29
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out2,$out2,v29
+	vcipher		$out3,$out3,v29
+	 vxor		$in4,$twk4,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk4,$tweak,$rndkey0
+	vcipher		$out4,$out4,v29
+	vcipher		$out5,$out5,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+
+	vcipher		$out0,$out0,v30
+	vcipher		$out1,$out1,v30
+	 vand		$tmp,$tmp,$eighty7
+	vcipher		$out2,$out2,v30
+	vcipher		$out3,$out3,v30
+	 vxor		$tweak,$tweak,$tmp
+	vcipher		$out4,$out4,v30
+	vcipher		$out5,$out5,v30
+	 vxor		$in5,$twk5,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk5,$tweak,$rndkey0
+
+	vcipherlast	$out0,$out0,$in0
+	 lvx_u		$in0,$x00,$inp		# load next input block
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vcipherlast	$out1,$out1,$in1
+	 lvx_u		$in1,$x10,$inp
+	vcipherlast	$out2,$out2,$in2
+	 le?vperm	$in0,$in0,$in0,$leperm
+	 lvx_u		$in2,$x20,$inp
+	 vand		$tmp,$tmp,$eighty7
+	vcipherlast	$out3,$out3,$in3
+	 le?vperm	$in1,$in1,$in1,$leperm
+	 lvx_u		$in3,$x30,$inp
+	vcipherlast	$out4,$out4,$in4
+	 le?vperm	$in2,$in2,$in2,$leperm
+	 lvx_u		$in4,$x40,$inp
+	 vxor		$tweak,$tweak,$tmp
+	vcipherlast	$tmp,$out5,$in5		# last block might be needed
+						# in stealing mode
+	 le?vperm	$in3,$in3,$in3,$leperm
+	 lvx_u		$in5,$x50,$inp
+	 addi		$inp,$inp,0x60
+	 le?vperm	$in4,$in4,$in4,$leperm
+	 le?vperm	$in5,$in5,$in5,$leperm
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	 vxor		$out0,$in0,$twk0
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	 vxor		$out1,$in1,$twk1
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	 vxor		$out2,$in2,$twk2
+	le?vperm	$out4,$out4,$out4,$leperm
+	stvx_u		$out3,$x30,$out
+	 vxor		$out3,$in3,$twk3
+	le?vperm	$out5,$tmp,$tmp,$leperm
+	stvx_u		$out4,$x40,$out
+	 vxor		$out4,$in4,$twk4
+	le?stvx_u	$out5,$x50,$out
+	be?stvx_u	$tmp, $x50,$out
+	 vxor		$out5,$in5,$twk5
+	addi		$out,$out,0x60
+
+	mtctr		$rounds
+	beq		Loop_xts_enc6x		# did $len-=96 borrow?
+
+	addic.		$len,$len,0x60
+	beq		Lxts_enc6x_zero
+	cmpwi		$len,0x20
+	blt		Lxts_enc6x_one
+	nop
+	beq		Lxts_enc6x_two
+	cmpwi		$len,0x40
+	blt		Lxts_enc6x_three
+	nop
+	beq		Lxts_enc6x_four
+
+Lxts_enc6x_five:
+	vxor		$out0,$in1,$twk0
+	vxor		$out1,$in2,$twk1
+	vxor		$out2,$in3,$twk2
+	vxor		$out3,$in4,$twk3
+	vxor		$out4,$in5,$twk4
+
+	bl		_aesp8_xts_enc5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk5		# unused tweak
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	vxor		$tmp,$out4,$twk5	# last block prep for stealing
+	le?vperm	$out4,$out4,$out4,$leperm
+	stvx_u		$out3,$x30,$out
+	stvx_u		$out4,$x40,$out
+	addi		$out,$out,0x50
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_four:
+	vxor		$out0,$in2,$twk0
+	vxor		$out1,$in3,$twk1
+	vxor		$out2,$in4,$twk2
+	vxor		$out3,$in5,$twk3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_enc5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk4		# unused tweak
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	vxor		$tmp,$out3,$twk4	# last block prep for stealing
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	stvx_u		$out3,$x30,$out
+	addi		$out,$out,0x40
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_three:
+	vxor		$out0,$in3,$twk0
+	vxor		$out1,$in4,$twk1
+	vxor		$out2,$in5,$twk2
+	vxor		$out3,$out3,$out3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_enc5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk3		# unused tweak
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$tmp,$out2,$twk3	# last block prep for stealing
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	stvx_u		$out2,$x20,$out
+	addi		$out,$out,0x30
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_two:
+	vxor		$out0,$in4,$twk0
+	vxor		$out1,$in5,$twk1
+	vxor		$out2,$out2,$out2
+	vxor		$out3,$out3,$out3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_enc5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk2		# unused tweak
+	vxor		$tmp,$out1,$twk2	# last block prep for stealing
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	stvx_u		$out1,$x10,$out
+	addi		$out,$out,0x20
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_one:
+	vxor		$out0,$in5,$twk0
+	nop
+Loop_xts_enc1x:
+	vcipher		$out0,$out0,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vcipher		$out0,$out0,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_xts_enc1x
+
+	add		$inp,$inp,$taillen
+	cmpwi		$taillen,0
+	vcipher		$out0,$out0,v24
+
+	subi		$inp,$inp,16
+	vcipher		$out0,$out0,v25
+
+	lvsr		$inpperm,0,$taillen
+	vcipher		$out0,$out0,v26
+
+	lvx_u		$in0,0,$inp
+	vcipher		$out0,$out0,v27
+
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	vcipher		$out0,$out0,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+
+	vcipher		$out0,$out0,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$twk0,$twk0,v31
+
+	le?vperm	$in0,$in0,$in0,$leperm
+	vcipher		$out0,$out0,v30
+
+	vperm		$in0,$in0,$in0,$inpperm
+	vcipherlast	$out0,$out0,$twk0
+
+	vmr		$twk0,$twk1		# unused tweak
+	vxor		$tmp,$out0,$twk1	# last block prep for stealing
+	le?vperm	$out0,$out0,$out0,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	addi		$out,$out,0x10
+	bne		Lxts_enc6x_steal
+	b		Lxts_enc6x_done
+
+.align	4
+Lxts_enc6x_zero:
+	cmpwi		$taillen,0
+	beq		Lxts_enc6x_done
+
+	add		$inp,$inp,$taillen
+	subi		$inp,$inp,16
+	lvx_u		$in0,0,$inp
+	lvsr		$inpperm,0,$taillen	# $in5 is no more
+	le?vperm	$in0,$in0,$in0,$leperm
+	vperm		$in0,$in0,$in0,$inpperm
+	vxor		$tmp,$tmp,$twk0
+Lxts_enc6x_steal:
+	vxor		$in0,$in0,$twk0
+	vxor		$out0,$out0,$out0
+	vspltisb	$out1,-1
+	vperm		$out0,$out0,$out1,$inpperm
+	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?
+
+	subi		r30,$out,17
+	subi		$out,$out,16
+	mtctr		$taillen
+Loop_xts_enc6x_steal:
+	lbzu		r0,1(r30)
+	stb		r0,16(r30)
+	bdnz		Loop_xts_enc6x_steal
+
+	li		$taillen,0
+	mtctr		$rounds
+	b		Loop_xts_enc1x		# one more time...
+
+.align	4
+Lxts_enc6x_done:
+	${UCMP}i	$ivp,0
+	beq		Lxts_enc6x_ret
+
+	vxor		$tweak,$twk0,$rndkey0
+	le?vperm	$tweak,$tweak,$tweak,$leperm
+	stvx_u		$tweak,0,$ivp
+
+Lxts_enc6x_ret:
+	mtlr		r11
+	li		r10,`$FRAME+15`
+	li		r11,`$FRAME+31`
+	stvx		$seven,r10,$sp		# wipe copies of round keys
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+
+	mtspr		256,$vrsave
+	lvx		v20,r10,$sp		# ABI says so
+	addi		r10,r10,32
+	lvx		v21,r11,$sp
+	addi		r11,r11,32
+	lvx		v22,r10,$sp
+	addi		r10,r10,32
+	lvx		v23,r11,$sp
+	addi		r11,r11,32
+	lvx		v24,r10,$sp
+	addi		r10,r10,32
+	lvx		v25,r11,$sp
+	addi		r11,r11,32
+	lvx		v26,r10,$sp
+	addi		r10,r10,32
+	lvx		v27,r11,$sp
+	addi		r11,r11,32
+	lvx		v28,r10,$sp
+	addi		r10,r10,32
+	lvx		v29,r11,$sp
+	addi		r11,r11,32
+	lvx		v30,r10,$sp
+	lvx		v31,r11,$sp
+	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+	blr
+	.long		0
+	.byte		0,12,0x04,1,0x80,6,6,0
+	.long		0
+
+.align	5
+_aesp8_xts_enc5x:
+	vcipher		$out0,$out0,v24
+	vcipher		$out1,$out1,v24
+	vcipher		$out2,$out2,v24
+	vcipher		$out3,$out3,v24
+	vcipher		$out4,$out4,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vcipher		$out0,$out0,v25
+	vcipher		$out1,$out1,v25
+	vcipher		$out2,$out2,v25
+	vcipher		$out3,$out3,v25
+	vcipher		$out4,$out4,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		_aesp8_xts_enc5x
+
+	add		$inp,$inp,$taillen
+	cmpwi		$taillen,0
+	vcipher		$out0,$out0,v24
+	vcipher		$out1,$out1,v24
+	vcipher		$out2,$out2,v24
+	vcipher		$out3,$out3,v24
+	vcipher		$out4,$out4,v24
+
+	subi		$inp,$inp,16
+	vcipher		$out0,$out0,v25
+	vcipher		$out1,$out1,v25
+	vcipher		$out2,$out2,v25
+	vcipher		$out3,$out3,v25
+	vcipher		$out4,$out4,v25
+	 vxor		$twk0,$twk0,v31
+
+	vcipher		$out0,$out0,v26
+	lvsr		$inpperm,0,$taillen	# $in5 is no more
+	vcipher		$out1,$out1,v26
+	vcipher		$out2,$out2,v26
+	vcipher		$out3,$out3,v26
+	vcipher		$out4,$out4,v26
+	 vxor		$in1,$twk1,v31
+
+	vcipher		$out0,$out0,v27
+	lvx_u		$in0,0,$inp
+	vcipher		$out1,$out1,v27
+	vcipher		$out2,$out2,v27
+	vcipher		$out3,$out3,v27
+	vcipher		$out4,$out4,v27
+	 vxor		$in2,$twk2,v31
+
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	vcipher		$out0,$out0,v28
+	vcipher		$out1,$out1,v28
+	vcipher		$out2,$out2,v28
+	vcipher		$out3,$out3,v28
+	vcipher		$out4,$out4,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+	 vxor		$in3,$twk3,v31
+
+	vcipher		$out0,$out0,v29
+	le?vperm	$in0,$in0,$in0,$leperm
+	vcipher		$out1,$out1,v29
+	vcipher		$out2,$out2,v29
+	vcipher		$out3,$out3,v29
+	vcipher		$out4,$out4,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$in4,$twk4,v31
+
+	vcipher		$out0,$out0,v30
+	vperm		$in0,$in0,$in0,$inpperm
+	vcipher		$out1,$out1,v30
+	vcipher		$out2,$out2,v30
+	vcipher		$out3,$out3,v30
+	vcipher		$out4,$out4,v30
+
+	vcipherlast	$out0,$out0,$twk0
+	vcipherlast	$out1,$out1,$in1
+	vcipherlast	$out2,$out2,$in2
+	vcipherlast	$out3,$out3,$in3
+	vcipherlast	$out4,$out4,$in4
+	blr
+        .long   	0
+        .byte   	0,12,0x14,0,0,0,0,0
+
+.align	5
+_aesp8_xts_decrypt6x:
+	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+	mflr		r11
+	li		r7,`$FRAME+8*16+15`
+	li		r3,`$FRAME+8*16+31`
+	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+	stvx		v20,r7,$sp		# ABI says so
+	addi		r7,r7,32
+	stvx		v21,r3,$sp
+	addi		r3,r3,32
+	stvx		v22,r7,$sp
+	addi		r7,r7,32
+	stvx		v23,r3,$sp
+	addi		r3,r3,32
+	stvx		v24,r7,$sp
+	addi		r7,r7,32
+	stvx		v25,r3,$sp
+	addi		r3,r3,32
+	stvx		v26,r7,$sp
+	addi		r7,r7,32
+	stvx		v27,r3,$sp
+	addi		r3,r3,32
+	stvx		v28,r7,$sp
+	addi		r7,r7,32
+	stvx		v29,r3,$sp
+	addi		r3,r3,32
+	stvx		v30,r7,$sp
+	stvx		v31,r3,$sp
+	li		r0,-1
+	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
+	li		$x10,0x10
+	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	li		$x20,0x20
+	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	li		$x30,0x30
+	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	li		$x40,0x40
+	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	li		$x50,0x50
+	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	li		$x60,0x60
+	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	li		$x70,0x70
+	mtspr		256,r0
+
+	subi		$rounds,$rounds,3	# -4 in total
+
+	lvx		$rndkey0,$x00,$key1	# load key schedule
+	lvx		v30,$x10,$key1
+	addi		$key1,$key1,0x20
+	lvx		v31,$x00,$key1
+	?vperm		$rndkey0,$rndkey0,v30,$keyperm
+	addi		$key_,$sp,`$FRAME+15`
+	mtctr		$rounds
+
+Load_xts_dec_key:
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v30,$x10,$key1
+	addi		$key1,$key1,0x20
+	stvx		v24,$x00,$key_		# off-load round[1]
+	?vperm		v25,v31,v30,$keyperm
+	lvx		v31,$x00,$key1
+	stvx		v25,$x10,$key_		# off-load round[2]
+	addi		$key_,$key_,0x20
+	bdnz		Load_xts_dec_key
+
+	lvx		v26,$x10,$key1
+	?vperm		v24,v30,v31,$keyperm
+	lvx		v27,$x20,$key1
+	stvx		v24,$x00,$key_		# off-load round[3]
+	?vperm		v25,v31,v26,$keyperm
+	lvx		v28,$x30,$key1
+	stvx		v25,$x10,$key_		# off-load round[4]
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	?vperm		v26,v26,v27,$keyperm
+	lvx		v29,$x40,$key1
+	?vperm		v27,v27,v28,$keyperm
+	lvx		v30,$x50,$key1
+	?vperm		v28,v28,v29,$keyperm
+	lvx		v31,$x60,$key1
+	?vperm		v29,v29,v30,$keyperm
+	lvx		$twk5,$x70,$key1	# borrow $twk5
+	?vperm		v30,v30,v31,$keyperm
+	lvx		v24,$x00,$key_		# pre-load round[1]
+	?vperm		v31,v31,$twk5,$keyperm
+	lvx		v25,$x10,$key_		# pre-load round[2]
+
+	 vperm		$in0,$inout,$inptail,$inpperm
+	 subi		$inp,$inp,31		# undo "caller"
+	vxor		$twk0,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out0,$in0,$twk0
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in1,$x10,$inp
+	vxor		$twk1,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in1,$in1,$in1,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out1,$in1,$twk1
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in2,$x20,$inp
+	 andi.		$taillen,$len,15
+	vxor		$twk2,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in2,$in2,$in2,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out2,$in2,$twk2
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in3,$x30,$inp
+	 sub		$len,$len,$taillen
+	vxor		$twk3,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in3,$in3,$in3,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out3,$in3,$twk3
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in4,$x40,$inp
+	 subi		$len,$len,0x60
+	vxor		$twk4,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in4,$in4,$in4,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out4,$in4,$twk4
+	vxor		$tweak,$tweak,$tmp
+
+	 lvx_u		$in5,$x50,$inp
+	 addi		$inp,$inp,0x60
+	vxor		$twk5,$tweak,$rndkey0
+	vsrab		$tmp,$tweak,$seven	# next tweak value
+	vaddubm		$tweak,$tweak,$tweak
+	vsldoi		$tmp,$tmp,$tmp,15
+	 le?vperm	$in5,$in5,$in5,$leperm
+	vand		$tmp,$tmp,$eighty7
+	 vxor		$out5,$in5,$twk5
+	vxor		$tweak,$tweak,$tmp
+
+	vxor		v31,v31,$rndkey0
+	mtctr		$rounds
+	b		Loop_xts_dec6x
+
+.align	5
+Loop_xts_dec6x:
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+	vncipher	$out5,$out5,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	vncipher	$out5,$out5,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_xts_dec6x
+
+	subic		$len,$len,96		# $len-=96
+	 vxor		$in0,$twk0,v31		# xor with last round key
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk0,$tweak,$rndkey0
+	 vaddubm	$tweak,$tweak,$tweak
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipher	$out4,$out4,v24
+	vncipher	$out5,$out5,v24
+
+	subfe.		r0,r0,r0		# borrow?-1:0
+	 vand		$tmp,$tmp,$eighty7
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	 vxor		$in1,$twk1,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk1,$tweak,$rndkey0
+	vncipher	$out4,$out4,v25
+	vncipher	$out5,$out5,v25
+
+	and		r0,r0,$len
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipher	$out0,$out0,v26
+	vncipher	$out1,$out1,v26
+	 vand		$tmp,$tmp,$eighty7
+	vncipher	$out2,$out2,v26
+	vncipher	$out3,$out3,v26
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out4,$out4,v26
+	vncipher	$out5,$out5,v26
+
+	add		$inp,$inp,r0		# $inp is adjusted in such
+						# way that at exit from the
+						# loop inX-in5 are loaded
+						# with last "words"
+	 vxor		$in2,$twk2,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk2,$tweak,$rndkey0
+	 vaddubm	$tweak,$tweak,$tweak
+	vncipher	$out0,$out0,v27
+	vncipher	$out1,$out1,v27
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipher	$out2,$out2,v27
+	vncipher	$out3,$out3,v27
+	 vand		$tmp,$tmp,$eighty7
+	vncipher	$out4,$out4,v27
+	vncipher	$out5,$out5,v27
+
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out0,$out0,v28
+	vncipher	$out1,$out1,v28
+	 vxor		$in3,$twk3,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk3,$tweak,$rndkey0
+	vncipher	$out2,$out2,v28
+	vncipher	$out3,$out3,v28
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipher	$out4,$out4,v28
+	vncipher	$out5,$out5,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+	 vand		$tmp,$tmp,$eighty7
+
+	vncipher	$out0,$out0,v29
+	vncipher	$out1,$out1,v29
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out2,$out2,v29
+	vncipher	$out3,$out3,v29
+	 vxor		$in4,$twk4,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk4,$tweak,$rndkey0
+	vncipher	$out4,$out4,v29
+	vncipher	$out5,$out5,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+
+	vncipher	$out0,$out0,v30
+	vncipher	$out1,$out1,v30
+	 vand		$tmp,$tmp,$eighty7
+	vncipher	$out2,$out2,v30
+	vncipher	$out3,$out3,v30
+	 vxor		$tweak,$tweak,$tmp
+	vncipher	$out4,$out4,v30
+	vncipher	$out5,$out5,v30
+	 vxor		$in5,$twk5,v31
+	 vsrab		$tmp,$tweak,$seven	# next tweak value
+	 vxor		$twk5,$tweak,$rndkey0
+
+	vncipherlast	$out0,$out0,$in0
+	 lvx_u		$in0,$x00,$inp		# load next input block
+	 vaddubm	$tweak,$tweak,$tweak
+	 vsldoi		$tmp,$tmp,$tmp,15
+	vncipherlast	$out1,$out1,$in1
+	 lvx_u		$in1,$x10,$inp
+	vncipherlast	$out2,$out2,$in2
+	 le?vperm	$in0,$in0,$in0,$leperm
+	 lvx_u		$in2,$x20,$inp
+	 vand		$tmp,$tmp,$eighty7
+	vncipherlast	$out3,$out3,$in3
+	 le?vperm	$in1,$in1,$in1,$leperm
+	 lvx_u		$in3,$x30,$inp
+	vncipherlast	$out4,$out4,$in4
+	 le?vperm	$in2,$in2,$in2,$leperm
+	 lvx_u		$in4,$x40,$inp
+	 vxor		$tweak,$tweak,$tmp
+	vncipherlast	$out5,$out5,$in5
+	 le?vperm	$in3,$in3,$in3,$leperm
+	 lvx_u		$in5,$x50,$inp
+	 addi		$inp,$inp,0x60
+	 le?vperm	$in4,$in4,$in4,$leperm
+	 le?vperm	$in5,$in5,$in5,$leperm
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	 vxor		$out0,$in0,$twk0
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	 vxor		$out1,$in1,$twk1
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	 vxor		$out2,$in2,$twk2
+	le?vperm	$out4,$out4,$out4,$leperm
+	stvx_u		$out3,$x30,$out
+	 vxor		$out3,$in3,$twk3
+	le?vperm	$out5,$out5,$out5,$leperm
+	stvx_u		$out4,$x40,$out
+	 vxor		$out4,$in4,$twk4
+	stvx_u		$out5,$x50,$out
+	 vxor		$out5,$in5,$twk5
+	addi		$out,$out,0x60
+
+	mtctr		$rounds
+	beq		Loop_xts_dec6x		# did $len-=96 borrow?
+
+	addic.		$len,$len,0x60
+	beq		Lxts_dec6x_zero
+	cmpwi		$len,0x20
+	blt		Lxts_dec6x_one
+	nop
+	beq		Lxts_dec6x_two
+	cmpwi		$len,0x40
+	blt		Lxts_dec6x_three
+	nop
+	beq		Lxts_dec6x_four
+
+Lxts_dec6x_five:
+	vxor		$out0,$in1,$twk0
+	vxor		$out1,$in2,$twk1
+	vxor		$out2,$in3,$twk2
+	vxor		$out3,$in4,$twk3
+	vxor		$out4,$in5,$twk4
+
+	bl		_aesp8_xts_dec5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk5		# unused tweak
+	vxor		$twk1,$tweak,$rndkey0
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$out0,$in0,$twk1
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	le?vperm	$out4,$out4,$out4,$leperm
+	stvx_u		$out3,$x30,$out
+	stvx_u		$out4,$x40,$out
+	addi		$out,$out,0x50
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_four:
+	vxor		$out0,$in2,$twk0
+	vxor		$out1,$in3,$twk1
+	vxor		$out2,$in4,$twk2
+	vxor		$out3,$in5,$twk3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_dec5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk4		# unused tweak
+	vmr		$twk1,$twk5
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$out0,$in0,$twk5
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	le?vperm	$out3,$out3,$out3,$leperm
+	stvx_u		$out2,$x20,$out
+	stvx_u		$out3,$x30,$out
+	addi		$out,$out,0x40
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_three:
+	vxor		$out0,$in3,$twk0
+	vxor		$out1,$in4,$twk1
+	vxor		$out2,$in5,$twk2
+	vxor		$out3,$out3,$out3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_dec5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk3		# unused tweak
+	vmr		$twk1,$twk4
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$out0,$in0,$twk4
+	le?vperm	$out2,$out2,$out2,$leperm
+	stvx_u		$out1,$x10,$out
+	stvx_u		$out2,$x20,$out
+	addi		$out,$out,0x30
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_two:
+	vxor		$out0,$in4,$twk0
+	vxor		$out1,$in5,$twk1
+	vxor		$out2,$out2,$out2
+	vxor		$out3,$out3,$out3
+	vxor		$out4,$out4,$out4
+
+	bl		_aesp8_xts_dec5x
+
+	le?vperm	$out0,$out0,$out0,$leperm
+	vmr		$twk0,$twk2		# unused tweak
+	vmr		$twk1,$twk3
+	le?vperm	$out1,$out1,$out1,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	vxor		$out0,$in0,$twk3
+	stvx_u		$out1,$x10,$out
+	addi		$out,$out,0x20
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_one:
+	vxor		$out0,$in5,$twk0
+	nop
+Loop_xts_dec1x:
+	vncipher	$out0,$out0,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Loop_xts_dec1x
+
+	subi		r0,$taillen,1
+	vncipher	$out0,$out0,v24
+
+	andi.		r0,r0,16
+	cmpwi		$taillen,0
+	vncipher	$out0,$out0,v25
+
+	sub		$inp,$inp,r0
+	vncipher	$out0,$out0,v26
+
+	lvx_u		$in0,0,$inp
+	vncipher	$out0,$out0,v27
+
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	vncipher	$out0,$out0,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+
+	vncipher	$out0,$out0,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$twk0,$twk0,v31
+
+	le?vperm	$in0,$in0,$in0,$leperm
+	vncipher	$out0,$out0,v30
+
+	mtctr		$rounds
+	vncipherlast	$out0,$out0,$twk0
+
+	vmr		$twk0,$twk1		# unused tweak
+	vmr		$twk1,$twk2
+	le?vperm	$out0,$out0,$out0,$leperm
+	stvx_u		$out0,$x00,$out		# store output
+	addi		$out,$out,0x10
+	vxor		$out0,$in0,$twk2
+	bne		Lxts_dec6x_steal
+	b		Lxts_dec6x_done
+
+.align	4
+Lxts_dec6x_zero:
+	cmpwi		$taillen,0
+	beq		Lxts_dec6x_done
+
+	lvx_u		$in0,0,$inp
+	le?vperm	$in0,$in0,$in0,$leperm
+	vxor		$out0,$in0,$twk1
+Lxts_dec6x_steal:
+	vncipher	$out0,$out0,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		Lxts_dec6x_steal
+
+	add		$inp,$inp,$taillen
+	vncipher	$out0,$out0,v24
+
+	cmpwi		$taillen,0
+	vncipher	$out0,$out0,v25
+
+	lvx_u		$in0,0,$inp
+	vncipher	$out0,$out0,v26
+
+	lvsr		$inpperm,0,$taillen	# $in5 is no more
+	vncipher	$out0,$out0,v27
+
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	vncipher	$out0,$out0,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+
+	vncipher	$out0,$out0,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$twk1,$twk1,v31
+
+	le?vperm	$in0,$in0,$in0,$leperm
+	vncipher	$out0,$out0,v30
+
+	vperm		$in0,$in0,$in0,$inpperm
+	vncipherlast	$tmp,$out0,$twk1
+
+	le?vperm	$out0,$tmp,$tmp,$leperm
+	le?stvx_u	$out0,0,$out
+	be?stvx_u	$tmp,0,$out
+
+	vxor		$out0,$out0,$out0
+	vspltisb	$out1,-1
+	vperm		$out0,$out0,$out1,$inpperm
+	vsel		$out0,$in0,$tmp,$out0
+	vxor		$out0,$out0,$twk0
+
+	subi		r30,$out,1
+	mtctr		$taillen
+Loop_xts_dec6x_steal:
+	lbzu		r0,1(r30)
+	stb		r0,16(r30)
+	bdnz		Loop_xts_dec6x_steal
+
+	li		$taillen,0
+	mtctr		$rounds
+	b		Loop_xts_dec1x		# one more time...
+
+.align	4
+Lxts_dec6x_done:
+	${UCMP}i	$ivp,0
+	beq		Lxts_dec6x_ret
+
+	vxor		$tweak,$twk0,$rndkey0
+	le?vperm	$tweak,$tweak,$tweak,$leperm
+	stvx_u		$tweak,0,$ivp
+
+Lxts_dec6x_ret:
+	mtlr		r11
+	li		r10,`$FRAME+15`
+	li		r11,`$FRAME+31`
+	stvx		$seven,r10,$sp		# wipe copies of round keys
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+	stvx		$seven,r10,$sp
+	addi		r10,r10,32
+	stvx		$seven,r11,$sp
+	addi		r11,r11,32
+
+	mtspr		256,$vrsave
+	lvx		v20,r10,$sp		# ABI says so
+	addi		r10,r10,32
+	lvx		v21,r11,$sp
+	addi		r11,r11,32
+	lvx		v22,r10,$sp
+	addi		r10,r10,32
+	lvx		v23,r11,$sp
+	addi		r11,r11,32
+	lvx		v24,r10,$sp
+	addi		r10,r10,32
+	lvx		v25,r11,$sp
+	addi		r11,r11,32
+	lvx		v26,r10,$sp
+	addi		r10,r10,32
+	lvx		v27,r11,$sp
+	addi		r11,r11,32
+	lvx		v28,r10,$sp
+	addi		r10,r10,32
+	lvx		v29,r11,$sp
+	addi		r11,r11,32
+	lvx		v30,r10,$sp
+	lvx		v31,r11,$sp
+	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+	blr
+	.long		0
+	.byte		0,12,0x04,1,0x80,6,6,0
+	.long		0
+
+.align	5
+_aesp8_xts_dec5x:
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+	lvx		v24,$x20,$key_		# round[3]
+	addi		$key_,$key_,0x20
+
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	lvx		v25,$x10,$key_		# round[4]
+	bdnz		_aesp8_xts_dec5x
+
+	subi		r0,$taillen,1
+	vncipher	$out0,$out0,v24
+	vncipher	$out1,$out1,v24
+	vncipher	$out2,$out2,v24
+	vncipher	$out3,$out3,v24
+	vncipher	$out4,$out4,v24
+
+	andi.		r0,r0,16
+	cmpwi		$taillen,0
+	vncipher	$out0,$out0,v25
+	vncipher	$out1,$out1,v25
+	vncipher	$out2,$out2,v25
+	vncipher	$out3,$out3,v25
+	vncipher	$out4,$out4,v25
+	 vxor		$twk0,$twk0,v31
+
+	sub		$inp,$inp,r0
+	vncipher	$out0,$out0,v26
+	vncipher	$out1,$out1,v26
+	vncipher	$out2,$out2,v26
+	vncipher	$out3,$out3,v26
+	vncipher	$out4,$out4,v26
+	 vxor		$in1,$twk1,v31
+
+	vncipher	$out0,$out0,v27
+	lvx_u		$in0,0,$inp
+	vncipher	$out1,$out1,v27
+	vncipher	$out2,$out2,v27
+	vncipher	$out3,$out3,v27
+	vncipher	$out4,$out4,v27
+	 vxor		$in2,$twk2,v31
+
+	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
+	vncipher	$out0,$out0,v28
+	vncipher	$out1,$out1,v28
+	vncipher	$out2,$out2,v28
+	vncipher	$out3,$out3,v28
+	vncipher	$out4,$out4,v28
+	lvx		v24,$x00,$key_		# re-pre-load round[1]
+	 vxor		$in3,$twk3,v31
+
+	vncipher	$out0,$out0,v29
+	le?vperm	$in0,$in0,$in0,$leperm
+	vncipher	$out1,$out1,v29
+	vncipher	$out2,$out2,v29
+	vncipher	$out3,$out3,v29
+	vncipher	$out4,$out4,v29
+	lvx		v25,$x10,$key_		# re-pre-load round[2]
+	 vxor		$in4,$twk4,v31
+
+	vncipher	$out0,$out0,v30
+	vncipher	$out1,$out1,v30
+	vncipher	$out2,$out2,v30
+	vncipher	$out3,$out3,v30
+	vncipher	$out4,$out4,v30
+
+	vncipherlast	$out0,$out0,$twk0
+	vncipherlast	$out1,$out1,$in1
+	vncipherlast	$out2,$out2,$in2
+	vncipherlast	$out3,$out3,$in3
+	vncipherlast	$out4,$out4,$in4
+	mtctr		$rounds
+	blr
+        .long   	0
+        .byte   	0,12,0x14,0,0,0,0,0
+___
+}}	}}}
+
+my $consts=1;
+foreach(split("\n",$code)) {
+        s/\`([^\`]*)\`/eval($1)/geo;
+
+	# constants table endian-specific conversion
+	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
+	    my $conv=$3;
+	    my @bytes=();
+
+	    # convert to endian-agnostic format
+	    if ($1 eq "long") {
+	      foreach (split(/,\s*/,$2)) {
+		my $l = /^0/?oct:int;
+		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+	      }
+	    } else {
+		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
+	    }
+
+	    # little-endian conversion
+	    if ($flavour =~ /le$/o) {
+		SWITCH: for($conv)  {
+		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
+		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
+		}
+	    }
+
+	    #emit
+	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+	    next;
+	}
+	$consts=0 if (m/Lconsts:/o);	# end of table
+
+	# instructions prefixed with '?' are endian-specific and need
+	# to be adjusted accordingly...
+	if ($flavour =~ /le$/o) {	# little-endian
+	    s/le\?//o		or
+	    s/be\?/#be#/o	or
+	    s/\?lvsr/lvsl/o	or
+	    s/\?lvsl/lvsr/o	or
+	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+	} else {			# big-endian
+	    s/le\?/#le#/o	or
+	    s/be\?//o		or
+	    s/\?([a-z]+)/$1/o;
+	}
+
+        print $_,"\n";
+}
+
+close STDOUT or die "error closing STDOUT: $!";
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/aes/internal.h
@@ -59,6 +59,12 @@ OPENSSL_INLINE int vpaes_capable(void) {
 OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
 #endif
 
+#elif defined(OPENSSL_PPC64LE)
+#define HWAES
+
+OPENSSL_INLINE int hwaes_capable(void) {
+  return CRYPTO_is_PPC64LE_vcrypto_capable();
+}
 #endif
 
 #endif  // !NO_ASM
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/bcm.c
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/bcm.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/bcm.c
@@ -102,6 +102,7 @@
 #include "self_check/fips.c"
 #include "self_check/self_check.c"
 #include "service_indicator/service_indicator.c"
+#include "sha/sha1-altivec.c"
 #include "sha/sha1.c"
 #include "sha/sha256.c"
 #include "sha/sha512.c"
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/bn/bn.c
@@ -384,6 +384,23 @@ int bn_expand(BIGNUM *bn, size_t bits) {
 }
 
 int bn_resize_words(BIGNUM *bn, size_t words) {
+#if defined(OPENSSL_PPC64LE)
+  // This is a workaround for a miscompilation bug in Clang 7.0.1 on POWER.
+  // The unittests catch the miscompilation, if it occurs, and it manifests
+  // as a crash in |bn_fits_in_words|.
+  //
+  // The bug only triggers if building in FIPS mode and with -O3. Clang 8.0.1
+  // has the same bug but this workaround is not effective there---I've not
+  // been able to find a workaround for 8.0.1.
+  //
+  // At the time of writing (2019-08-08), Clang git does *not* have this bug
+  // and does not need this workaroud. The current git version should go on to
+  // be Clang 10 thus, once we can depend on that, this can be removed.
+  if (value_barrier_w((size_t)bn->width == words)) {
+    return 1;
+  }
+#endif
+
   if ((size_t)bn->width <= words) {
     if (!bn_wexpand(bn, words)) {
       return 0;
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
@@ -1455,6 +1455,8 @@ int EVP_has_aes_hardware(void) {
   return hwaes_capable() && crypto_gcm_clmul_enabled();
 #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
   return hwaes_capable() && CRYPTO_is_ARMv8_PMULL_capable();
+#elif defined(OPENSSL_PPC64LE)
+  return CRYPTO_is_PPC64LE_vcrypto_capable();
 #else
   return 0;
 #endif
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/asm/ghashp8-ppc.pl
@@ -0,0 +1,671 @@
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+# May 2016
+#
+# 2x aggregated reduction improves performance by 50% (resulting
+# performance on POWER8 is 1 cycle per processed byte), and 4x
+# aggregated reduction - by 170% or 2.7x (resulting in 0.55 cpb).
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+	$SIZE_T=8;
+	$LRSAVE=2*$SIZE_T;
+	$STU="stdu";
+	$POP="ld";
+	$PUSH="std";
+	$UCMP="cmpld";
+	$SHRI="srdi";
+} elsif ($flavour =~ /32/) {
+	$SIZE_T=4;
+	$LRSAVE=$SIZE_T;
+	$STU="stwu";
+	$POP="lwz";
+	$PUSH="stw";
+	$UCMP="cmplw";
+	$SHRI="srwi";
+} else { die "nonsense $flavour"; }
+
+$sp="r1";
+$FRAME=6*$SIZE_T+13*16;	# 13*16 is for v20-v31 offload
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open OUT,"| $^X \"$xlate\" $flavour \"$output\"" || die "can't call $xlate: $!";
+*STDOUT=*OUT;
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
+my $vrsave="r12";
+
+$code=<<___;
+.machine	"any"
+
+.text
+
+.globl	.gcm_init_p8
+.align	5
+.gcm_init_p8:
+	li		r0,-4096
+	li		r8,0x10
+	mfspr		$vrsave,256
+	li		r9,0x20
+	mtspr		256,r0
+	li		r10,0x30
+	lvx_u		$H,0,r4			# load H
+
+	vspltisb	$xC2,-16		# 0xf0
+	vspltisb	$t0,1			# one
+	vaddubm		$xC2,$xC2,$xC2		# 0xe0
+	vxor		$zero,$zero,$zero
+	vor		$xC2,$xC2,$t0		# 0xe1
+	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
+	vsldoi		$t1,$zero,$t0,1		# ...1
+	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
+	vspltisb	$t2,7
+	vor		$xC2,$xC2,$t1		# 0xc2....01
+	vspltb		$t1,$H,0		# most significant byte
+	vsl		$H,$H,$t0		# H<<=1
+	vsrab		$t1,$t1,$t2		# broadcast carry bit
+	vand		$t1,$t1,$xC2
+	vxor		$IN,$H,$t1		# twisted H
+
+	vsldoi		$H,$IN,$IN,8		# twist even more ...
+	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
+	vsldoi		$Hl,$zero,$H,8		# ... and split
+	vsldoi		$Hh,$H,$zero,8
+
+	stvx_u		$xC2,0,r3		# save pre-computed table
+	stvx_u		$Hl,r8,r3
+	li		r8,0x40
+	stvx_u		$H, r9,r3
+	li		r9,0x50
+	stvx_u		$Hh,r10,r3
+	li		r10,0x60
+
+	vpmsumd		$Xl,$IN,$Hl		# H.lo·H.lo
+	vpmsumd		$Xm,$IN,$H		# H.hi·H.lo+H.lo·H.hi
+	vpmsumd		$Xh,$IN,$Hh		# H.hi·H.hi
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	vpmsumd		$Xl,$Xl,$xC2
+	vxor		$t1,$t1,$Xh
+	vxor		$IN1,$Xl,$t1
+
+	vsldoi		$H2,$IN1,$IN1,8
+	vsldoi		$H2l,$zero,$H2,8
+	vsldoi		$H2h,$H2,$zero,8
+
+	stvx_u		$H2l,r8,r3		# save H^2
+	li		r8,0x70
+	stvx_u		$H2,r9,r3
+	li		r9,0x80
+	stvx_u		$H2h,r10,r3
+	li		r10,0x90
+___
+{
+my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
+$code.=<<___;
+	vpmsumd		$Xl,$IN,$H2l		# H.lo·H^2.lo
+	 vpmsumd	$Xl1,$IN1,$H2l		# H^2.lo·H^2.lo
+	vpmsumd		$Xm,$IN,$H2		# H.hi·H^2.lo+H.lo·H^2.hi
+	 vpmsumd	$Xm1,$IN1,$H2		# H^2.hi·H^2.lo+H^2.lo·H^2.hi
+	vpmsumd		$Xh,$IN,$H2h		# H.hi·H^2.hi
+	 vpmsumd	$Xh1,$IN1,$H2h		# H^2.hi·H^2.hi
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+	 vpmsumd	$t6,$Xl1,$xC2		# 1st reduction phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	 vsldoi		$t4,$Xm1,$zero,8
+	 vsldoi		$t5,$zero,$Xm1,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+	 vxor		$Xl1,$Xl1,$t4
+	 vxor		$Xh1,$Xh1,$t5
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	 vsldoi		$Xl1,$Xl1,$Xl1,8
+	vxor		$Xl,$Xl,$t2
+	 vxor		$Xl1,$Xl1,$t6
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	 vsldoi		$t5,$Xl1,$Xl1,8		# 2nd reduction phase
+	vpmsumd		$Xl,$Xl,$xC2
+	 vpmsumd	$Xl1,$Xl1,$xC2
+	vxor		$t1,$t1,$Xh
+	 vxor		$t5,$t5,$Xh1
+	vxor		$Xl,$Xl,$t1
+	 vxor		$Xl1,$Xl1,$t5
+
+	vsldoi		$H,$Xl,$Xl,8
+	 vsldoi		$H2,$Xl1,$Xl1,8
+	vsldoi		$Hl,$zero,$H,8
+	vsldoi		$Hh,$H,$zero,8
+	 vsldoi		$H2l,$zero,$H2,8
+	 vsldoi		$H2h,$H2,$zero,8
+
+	stvx_u		$Hl,r8,r3		# save H^3
+	li		r8,0xa0
+	stvx_u		$H,r9,r3
+	li		r9,0xb0
+	stvx_u		$Hh,r10,r3
+	li		r10,0xc0
+	 stvx_u		$H2l,r8,r3		# save H^4
+	 stvx_u		$H2,r9,r3
+	 stvx_u		$H2h,r10,r3
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,2,0
+	.long		0
+.size	.gcm_init_p8,.-.gcm_init_p8
+___
+}
+$code.=<<___;
+.globl	.gcm_gmult_p8
+.align	5
+.gcm_gmult_p8:
+	lis		r0,0xfff8
+	li		r8,0x10
+	mfspr		$vrsave,256
+	li		r9,0x20
+	mtspr		256,r0
+	li		r10,0x30
+	lvx_u		$IN,0,$Xip		# load Xi
+
+	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
+	 le?lvsl	$lemask,r0,r0
+	lvx_u		$H, r9,$Htbl
+	 le?vspltisb	$t0,0x07
+	lvx_u		$Hh,r10,$Htbl
+	 le?vxor	$lemask,$lemask,$t0
+	lvx_u		$xC2,0,$Htbl
+	 le?vperm	$IN,$IN,$IN,$lemask
+	vxor		$zero,$zero,$zero
+
+	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
+	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
+	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	vpmsumd		$Xl,$Xl,$xC2
+	vxor		$t1,$t1,$Xh
+	vxor		$Xl,$Xl,$t1
+
+	le?vperm	$Xl,$Xl,$Xl,$lemask
+	stvx_u		$Xl,0,$Xip		# write out Xi
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,2,0
+	.long		0
+.size	.gcm_gmult_p8,.-.gcm_gmult_p8
+
+.globl	.gcm_ghash_p8
+.align	5
+.gcm_ghash_p8:
+	li		r0,-4096
+	li		r8,0x10
+	mfspr		$vrsave,256
+	li		r9,0x20
+	mtspr		256,r0
+	li		r10,0x30
+	lvx_u		$Xl,0,$Xip		# load Xi
+
+	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
+	li		r8,0x40
+	 le?lvsl	$lemask,r0,r0
+	lvx_u		$H, r9,$Htbl
+	li		r9,0x50
+	 le?vspltisb	$t0,0x07
+	lvx_u		$Hh,r10,$Htbl
+	li		r10,0x60
+	 le?vxor	$lemask,$lemask,$t0
+	lvx_u		$xC2,0,$Htbl
+	 le?vperm	$Xl,$Xl,$Xl,$lemask
+	vxor		$zero,$zero,$zero
+
+	${UCMP}i	$len,64
+	bge		Lgcm_ghash_p8_4x
+
+	lvx_u		$IN,0,$inp
+	addi		$inp,$inp,16
+	subic.		$len,$len,16
+	 le?vperm	$IN,$IN,$IN,$lemask
+	vxor		$IN,$IN,$Xl
+	beq		Lshort
+
+	lvx_u		$H2l,r8,$Htbl		# load H^2
+	li		r8,16
+	lvx_u		$H2, r9,$Htbl
+	add		r9,$inp,$len		# end of input
+	lvx_u		$H2h,r10,$Htbl
+	be?b		Loop_2x
+
+.align	5
+Loop_2x:
+	lvx_u		$IN1,0,$inp
+	le?vperm	$IN1,$IN1,$IN1,$lemask
+
+	 subic		$len,$len,32
+	vpmsumd		$Xl,$IN,$H2l		# H^2.lo·Xi.lo
+	 vpmsumd	$Xl1,$IN1,$Hl		# H.lo·Xi+1.lo
+	 subfe		r0,r0,r0		# borrow?-1:0
+	vpmsumd		$Xm,$IN,$H2		# H^2.hi·Xi.lo+H^2.lo·Xi.hi
+	 vpmsumd	$Xm1,$IN1,$H		# H.hi·Xi+1.lo+H.lo·Xi+1.hi
+	 and		r0,r0,$len
+	vpmsumd		$Xh,$IN,$H2h		# H^2.hi·Xi.hi
+	 vpmsumd	$Xh1,$IN1,$Hh		# H.hi·Xi+1.hi
+	 add		$inp,$inp,r0
+
+	vxor		$Xl,$Xl,$Xl1
+	vxor		$Xm,$Xm,$Xm1
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	 vxor		$Xh,$Xh,$Xh1
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+	 lvx_u		$IN,r8,$inp
+	 addi		$inp,$inp,32
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	vpmsumd		$Xl,$Xl,$xC2
+	 le?vperm	$IN,$IN,$IN,$lemask
+	vxor		$t1,$t1,$Xh
+	vxor		$IN,$IN,$t1
+	vxor		$IN,$IN,$Xl
+	$UCMP		r9,$inp
+	bgt		Loop_2x			# done yet?
+
+	cmplwi		$len,0
+	bne		Leven
+
+Lshort:
+	vpmsumd		$Xl,$IN,$Hl		# H.lo·Xi.lo
+	vpmsumd		$Xm,$IN,$H		# H.hi·Xi.lo+H.lo·Xi.hi
+	vpmsumd		$Xh,$IN,$Hh		# H.hi·Xi.hi
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	vpmsumd		$Xl,$Xl,$xC2
+	vxor		$t1,$t1,$Xh
+
+Leven:
+	vxor		$Xl,$Xl,$t1
+	le?vperm	$Xl,$Xl,$Xl,$lemask
+	stvx_u		$Xl,0,$Xip		# write out Xi
+
+	mtspr		256,$vrsave
+	blr
+	.long		0
+	.byte		0,12,0x14,0,0,0,4,0
+	.long		0
+___
+{
+my ($Xl3,$Xm2,$IN2,$H3l,$H3,$H3h,
+    $Xh3,$Xm3,$IN3,$H4l,$H4,$H4h) = map("v$_",(20..31));
+my $IN0=$IN;
+my ($H21l,$H21h,$loperm,$hiperm) = ($Hl,$Hh,$H2l,$H2h);
+
+$code.=<<___;
+.align	5
+.gcm_ghash_p8_4x:
+Lgcm_ghash_p8_4x:
+	$STU		$sp,-$FRAME($sp)
+	li		r10,`15+6*$SIZE_T`
+	li		r11,`31+6*$SIZE_T`
+	stvx		v20,r10,$sp
+	addi		r10,r10,32
+	stvx		v21,r11,$sp
+	addi		r11,r11,32
+	stvx		v22,r10,$sp
+	addi		r10,r10,32
+	stvx		v23,r11,$sp
+	addi		r11,r11,32
+	stvx		v24,r10,$sp
+	addi		r10,r10,32
+	stvx		v25,r11,$sp
+	addi		r11,r11,32
+	stvx		v26,r10,$sp
+	addi		r10,r10,32
+	stvx		v27,r11,$sp
+	addi		r11,r11,32
+	stvx		v28,r10,$sp
+	addi		r10,r10,32
+	stvx		v29,r11,$sp
+	addi		r11,r11,32
+	stvx		v30,r10,$sp
+	li		r10,0x60
+	stvx		v31,r11,$sp
+	li		r0,-1
+	stw		$vrsave,`$FRAME-4`($sp)	# save vrsave
+	mtspr		256,r0			# preserve all AltiVec registers
+
+	lvsl		$t0,0,r8		# 0x0001..0e0f
+	#lvx_u		$H2l,r8,$Htbl		# load H^2
+	li		r8,0x70
+	lvx_u		$H2, r9,$Htbl
+	li		r9,0x80
+	vspltisb	$t1,8			# 0x0808..0808
+	#lvx_u		$H2h,r10,$Htbl
+	li		r10,0x90
+	lvx_u		$H3l,r8,$Htbl		# load H^3
+	li		r8,0xa0
+	lvx_u		$H3, r9,$Htbl
+	li		r9,0xb0
+	lvx_u		$H3h,r10,$Htbl
+	li		r10,0xc0
+	lvx_u		$H4l,r8,$Htbl		# load H^4
+	li		r8,0x10
+	lvx_u		$H4, r9,$Htbl
+	li		r9,0x20
+	lvx_u		$H4h,r10,$Htbl
+	li		r10,0x30
+
+	vsldoi		$t2,$zero,$t1,8		# 0x0000..0808
+	vaddubm		$hiperm,$t0,$t2		# 0x0001..1617
+	vaddubm		$loperm,$t1,$hiperm	# 0x0809..1e1f
+
+	$SHRI		$len,$len,4		# this allows to use sign bit
+						# as carry
+	lvx_u		$IN0,0,$inp		# load input
+	lvx_u		$IN1,r8,$inp
+	subic.		$len,$len,8
+	lvx_u		$IN2,r9,$inp
+	lvx_u		$IN3,r10,$inp
+	addi		$inp,$inp,0x40
+	le?vperm	$IN0,$IN0,$IN0,$lemask
+	le?vperm	$IN1,$IN1,$IN1,$lemask
+	le?vperm	$IN2,$IN2,$IN2,$lemask
+	le?vperm	$IN3,$IN3,$IN3,$lemask
+
+	vxor		$Xh,$IN0,$Xl
+
+	 vpmsumd	$Xl1,$IN1,$H3l
+	 vpmsumd	$Xm1,$IN1,$H3
+	 vpmsumd	$Xh1,$IN1,$H3h
+
+	 vperm		$H21l,$H2,$H,$hiperm
+	 vperm		$t0,$IN2,$IN3,$loperm
+	 vperm		$H21h,$H2,$H,$loperm
+	 vperm		$t1,$IN2,$IN3,$hiperm
+	 vpmsumd	$Xm2,$IN2,$H2		# H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
+	 vpmsumd	$Xl3,$t0,$H21l		# H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
+	 vpmsumd	$Xm3,$IN3,$H		# H.hi·Xi+3.lo  +H.lo·Xi+3.hi
+	 vpmsumd	$Xh3,$t1,$H21h		# H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
+
+	 vxor		$Xm2,$Xm2,$Xm1
+	 vxor		$Xl3,$Xl3,$Xl1
+	 vxor		$Xm3,$Xm3,$Xm2
+	 vxor		$Xh3,$Xh3,$Xh1
+
+	blt		Ltail_4x
+
+Loop_4x:
+	lvx_u		$IN0,0,$inp
+	lvx_u		$IN1,r8,$inp
+	subic.		$len,$len,4
+	lvx_u		$IN2,r9,$inp
+	lvx_u		$IN3,r10,$inp
+	addi		$inp,$inp,0x40
+	le?vperm	$IN1,$IN1,$IN1,$lemask
+	le?vperm	$IN2,$IN2,$IN2,$lemask
+	le?vperm	$IN3,$IN3,$IN3,$lemask
+	le?vperm	$IN0,$IN0,$IN0,$lemask
+
+	vpmsumd		$Xl,$Xh,$H4l		# H^4.lo·Xi.lo
+	vpmsumd		$Xm,$Xh,$H4		# H^4.hi·Xi.lo+H^4.lo·Xi.hi
+	vpmsumd		$Xh,$Xh,$H4h		# H^4.hi·Xi.hi
+	 vpmsumd	$Xl1,$IN1,$H3l
+	 vpmsumd	$Xm1,$IN1,$H3
+	 vpmsumd	$Xh1,$IN1,$H3h
+
+	vxor		$Xl,$Xl,$Xl3
+	vxor		$Xm,$Xm,$Xm3
+	vxor		$Xh,$Xh,$Xh3
+	 vperm		$t0,$IN2,$IN3,$loperm
+	 vperm		$t1,$IN2,$IN3,$hiperm
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+	 vpmsumd	$Xl3,$t0,$H21l		# H.lo·Xi+3.lo  +H^2.lo·Xi+2.lo
+	 vpmsumd	$Xh3,$t1,$H21h		# H.hi·Xi+3.hi  +H^2.hi·Xi+2.hi
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	 vpmsumd	$Xm2,$IN2,$H2		# H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
+	 vpmsumd	$Xm3,$IN3,$H		# H.hi·Xi+3.lo  +H.lo·Xi+3.hi
+	vpmsumd		$Xl,$Xl,$xC2
+
+	 vxor		$Xl3,$Xl3,$Xl1
+	 vxor		$Xh3,$Xh3,$Xh1
+	vxor		$Xh,$Xh,$IN0
+	 vxor		$Xm2,$Xm2,$Xm1
+	vxor		$Xh,$Xh,$t1
+	 vxor		$Xm3,$Xm3,$Xm2
+	vxor		$Xh,$Xh,$Xl
+	bge		Loop_4x
+
+Ltail_4x:
+	vpmsumd		$Xl,$Xh,$H4l		# H^4.lo·Xi.lo
+	vpmsumd		$Xm,$Xh,$H4		# H^4.hi·Xi.lo+H^4.lo·Xi.hi
+	vpmsumd		$Xh,$Xh,$H4h		# H^4.hi·Xi.hi
+
+	vxor		$Xl,$Xl,$Xl3
+	vxor		$Xm,$Xm,$Xm3
+
+	vpmsumd		$t2,$Xl,$xC2		# 1st reduction phase
+
+	vsldoi		$t0,$Xm,$zero,8
+	vsldoi		$t1,$zero,$Xm,8
+	 vxor		$Xh,$Xh,$Xh3
+	vxor		$Xl,$Xl,$t0
+	vxor		$Xh,$Xh,$t1
+
+	vsldoi		$Xl,$Xl,$Xl,8
+	vxor		$Xl,$Xl,$t2
+
+	vsldoi		$t1,$Xl,$Xl,8		# 2nd reduction phase
+	vpmsumd		$Xl,$Xl,$xC2
+	vxor		$t1,$t1,$Xh
+	vxor		$Xl,$Xl,$t1
+
+	addic.		$len,$len,4
+	beq		Ldone_4x
+
+	lvx_u		$IN0,0,$inp
+	${UCMP}i	$len,2
+	li		$len,-4
+	blt		Lone
+	lvx_u		$IN1,r8,$inp
+	beq		Ltwo
+
+Lthree:
+	lvx_u		$IN2,r9,$inp
+	le?vperm	$IN0,$IN0,$IN0,$lemask
+	le?vperm	$IN1,$IN1,$IN1,$lemask
+	le?vperm	$IN2,$IN2,$IN2,$lemask
+
+	vxor		$Xh,$IN0,$Xl
+	vmr		$H4l,$H3l
+	vmr		$H4, $H3
+	vmr		$H4h,$H3h
+
+	vperm		$t0,$IN1,$IN2,$loperm
+	vperm		$t1,$IN1,$IN2,$hiperm
+	vpmsumd		$Xm2,$IN1,$H2		# H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
+	vpmsumd		$Xm3,$IN2,$H		# H.hi·Xi+2.lo  +H.lo·Xi+2.hi
+	vpmsumd		$Xl3,$t0,$H21l		# H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
+	vpmsumd		$Xh3,$t1,$H21h		# H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
+
+	vxor		$Xm3,$Xm3,$Xm2
+	b		Ltail_4x
+
+.align	4
+Ltwo:
+	le?vperm	$IN0,$IN0,$IN0,$lemask
+	le?vperm	$IN1,$IN1,$IN1,$lemask
+
+	vxor		$Xh,$IN0,$Xl
+	vperm		$t0,$zero,$IN1,$loperm
+	vperm		$t1,$zero,$IN1,$hiperm
+
+	vsldoi		$H4l,$zero,$H2,8
+	vmr		$H4, $H2
+	vsldoi		$H4h,$H2,$zero,8
+
+	vpmsumd		$Xl3,$t0, $H21l		# H.lo·Xi+1.lo
+	vpmsumd		$Xm3,$IN1,$H		# H.hi·Xi+1.lo+H.lo·Xi+2.hi
+	vpmsumd		$Xh3,$t1, $H21h		# H.hi·Xi+1.hi
+
+	b		Ltail_4x
+
+.align	4
+Lone:
+	le?vperm	$IN0,$IN0,$IN0,$lemask
+
+	vsldoi		$H4l,$zero,$H,8
+	vmr		$H4, $H
+	vsldoi		$H4h,$H,$zero,8
+
+	vxor		$Xh,$IN0,$Xl
+	vxor		$Xl3,$Xl3,$Xl3
+	vxor		$Xm3,$Xm3,$Xm3
+	vxor		$Xh3,$Xh3,$Xh3
+
+	b		Ltail_4x
+
+Ldone_4x:
+	le?vperm	$Xl,$Xl,$Xl,$lemask
+	stvx_u		$Xl,0,$Xip		# write out Xi
+
+	li		r10,`15+6*$SIZE_T`
+	li		r11,`31+6*$SIZE_T`
+	mtspr		256,$vrsave
+	lvx		v20,r10,$sp
+	addi		r10,r10,32
+	lvx		v21,r11,$sp
+	addi		r11,r11,32
+	lvx		v22,r10,$sp
+	addi		r10,r10,32
+	lvx		v23,r11,$sp
+	addi		r11,r11,32
+	lvx		v24,r10,$sp
+	addi		r10,r10,32
+	lvx		v25,r11,$sp
+	addi		r11,r11,32
+	lvx		v26,r10,$sp
+	addi		r10,r10,32
+	lvx		v27,r11,$sp
+	addi		r11,r11,32
+	lvx		v28,r10,$sp
+	addi		r10,r10,32
+	lvx		v29,r11,$sp
+	addi		r11,r11,32
+	lvx		v30,r10,$sp
+	lvx		v31,r11,$sp
+	addi		$sp,$sp,$FRAME
+	blr
+	.long		0
+	.byte		0,12,0x04,0,0x80,0,4,0
+	.long		0
+___
+}
+$code.=<<___;
+.size	.gcm_ghash_p8,.-.gcm_ghash_p8
+
+.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align  2
+___
+
+foreach (split("\n",$code)) {
+	s/\`([^\`]*)\`/eval $1/geo;
+
+	if ($flavour =~ /le$/o) {	# little-endian
+	    s/le\?//o		or
+	    s/be\?/#be#/o;
+	} else {
+	    s/le\?/#le#/o	or
+	    s/be\?//o;
+	}
+	print $_,"\n";
+}
+
+close STDOUT or die "error closing STDOUT: $!"; # enforce flush
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/gcm.c
@@ -228,6 +228,13 @@ void CRYPTO_ghash_init(gmult_func *out_m
     *out_hash = gcm_ghash_neon;
     return;
   }
+#elif defined(GHASH_ASM_PPC64LE)
+  if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
+    gcm_init_p8(out_table, H);
+    *out_mult = gcm_gmult_p8;
+    *out_hash = gcm_ghash_p8;
+    return;
+  }
 #endif
 
   gcm_init_nohw(out_table, H);
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/gcm_test.cc
@@ -215,5 +215,15 @@ TEST(GCMTest, ABI) {
     }
   }
 #endif
+
+#if defined(GHASH_ASM_PPC64LE)
+  if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
+    CHECK_ABI(gcm_init_p8, Htable, kH);
+    CHECK_ABI(gcm_gmult_p8, X, Htable);
+    for (size_t blocks : kBlockCounts) {
+      CHECK_ABI(gcm_ghash_p8, X, Htable, buf, 16 * blocks);
+    }
+  }
+#endif  // GHASH_ASM_PPC64LE
 }
 #endif  // SUPPORTS_ABI_TEST && !OPENSSL_NO_ASM
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/modes/internal.h
@@ -325,6 +325,13 @@ void aes_gcm_dec_kernel(const uint8_t *i
                         const u128 Htable[16]);
 #endif
 
+#elif defined(OPENSSL_PPC64LE)
+#define GHASH_ASM_PPC64LE
+#define GCM_FUNCREF
+void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_p8(uint8_t Xi[16], const u128 Htable[16]);
+void gcm_ghash_p8(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp,
+                  size_t len);
 #endif
 #endif  // OPENSSL_NO_ASM
 
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/rand/getrandom_fillin.h
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/rand/getrandom_fillin.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/rand/getrandom_fillin.h
@@ -30,6 +30,8 @@
 #define EXPECTED_NR_getrandom 278
 #elif defined(OPENSSL_ARM)
 #define EXPECTED_NR_getrandom 384
+#elif defined(OPENSSL_PPC64LE)
+#define EXPECTED_NR_getrandom 359
 #elif defined(OPENSSL_RISCV64)
 #define EXPECTED_NR_getrandom 278
 #endif
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/rand/rand.c
@@ -431,6 +431,11 @@ void RAND_bytes_with_additional_data(uin
     // Take a read lock around accesses to |state->drbg|. This is needed to
     // avoid returning bad entropy if we race with
     // |rand_thread_state_clear_all|.
+    //
+    // This lock must be taken after any calls to |CRYPTO_sysrand| to avoid a
+    // bug on ppc64le. glibc may implement pthread locks by wrapping user code
+    // in a hardware transaction, but, on some older versions of glibc and the
+    // kernel, syscalls made with |syscall| did not abort the transaction.
     CRYPTO_MUTEX_lock_read(&state->clear_drbg_lock);
 #endif
     if (!CTR_DRBG_reseed(&state->drbg, seed, reseed_additional_data,
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/internal.h
@@ -23,6 +23,16 @@
 extern "C" {
 #endif
 
+#if defined(OPENSSL_PPC64LE)
+// POWER has an intrinsics-based implementation of SHA-1 and thus the functions
+// normally defined in assembly are available even with |OPENSSL_NO_ASM| in
+// this case.
+#define SHA1_ASM_PPC64
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *in,
+                           size_t num_blocks);
+#endif
+
+
 // Define SHA{n}[_{variant}]_ASM if sha{n}_block_data_order[_{variant}] is
 // defined in assembly.
 
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/sha1-altivec.c
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/sha1-altivec.c
@@ -0,0 +1,361 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+// Altivec-optimized SHA1 in C. This is tested on ppc64le only.
+//
+// References:
+// https://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+// http://arctic.org/~dean/crypto/sha1.html
+//
+// This code used the generic SHA-1 from OpenSSL as a basis and AltiVec
+// optimisations were added on top.
+
+#include <openssl/sha.h>
+
+#if defined(OPENSSL_PPC64LE)
+
+#include <altivec.h>
+
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *data, size_t num);
+
+static uint32_t rotate(uint32_t a, int n) { return (a << n) | (a >> (32 - n)); }
+
+typedef vector unsigned int vec_uint32_t;
+typedef vector unsigned char vec_uint8_t;
+
+// Vector constants
+static const vec_uint8_t k_swap_endianness = {3,  2,  1, 0, 7,  6,  5,  4,
+                                              11, 10, 9, 8, 15, 14, 13, 12};
+
+// Shift amounts for byte and bit shifts and rotations
+static const vec_uint8_t k_4_bytes = {32, 32, 32, 32, 32, 32, 32, 32,
+                                      32, 32, 32, 32, 32, 32, 32, 32};
+static const vec_uint8_t k_12_bytes = {96, 96, 96, 96, 96, 96, 96, 96,
+                                       96, 96, 96, 96, 96, 96, 96, 96};
+
+#define K_00_19 0x5a827999UL
+#define K_20_39 0x6ed9eba1UL
+#define K_40_59 0x8f1bbcdcUL
+#define K_60_79 0xca62c1d6UL
+
+// Vector versions of the above.
+static const vec_uint32_t K_00_19_x_4 = {K_00_19, K_00_19, K_00_19, K_00_19};
+static const vec_uint32_t K_20_39_x_4 = {K_20_39, K_20_39, K_20_39, K_20_39};
+static const vec_uint32_t K_40_59_x_4 = {K_40_59, K_40_59, K_40_59, K_40_59};
+static const vec_uint32_t K_60_79_x_4 = {K_60_79, K_60_79, K_60_79, K_60_79};
+
+// vector message scheduling: compute message schedule for round i..i+3 where i
+// is divisible by 4. We return the schedule w[i..i+3] as a vector. In
+// addition, we also precompute sum w[i..+3] and an additive constant K. This
+// is done to offload some computation of f() in the integer execution units.
+//
+// Byte shifting code below may not be correct for big-endian systems.
+static vec_uint32_t sched_00_15(vec_uint32_t *pre_added, const void *data,
+                                vec_uint32_t k) {
+  const vector unsigned char unaligned_data =
+    vec_vsx_ld(0, (const unsigned char*) data);
+  const vec_uint32_t v = (vec_uint32_t) unaligned_data;
+  const vec_uint32_t w = vec_perm(v, v, k_swap_endianness);
+  vec_st(w + k, 0, pre_added);
+  return w;
+}
+
+// Compute w[i..i+3] using these steps for i in [16, 20, 24, 28]
+//
+// w'[i  ]  = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1
+// w'[i+1]  = (w[i-2] ^ w[i-7] ^ w[i-13] ^ w[i-15]) <<< 1
+// w'[i+2]  = (w[i-1] ^ w[i-6] ^ w[i-12] ^ w[i-14]) <<< 1
+// w'[i+3]  = (     0 ^ w[i-5] ^ w[i-11] ^ w[i-13]) <<< 1
+//
+// w[  i] = w'[  i]
+// w[i+1] = w'[i+1]
+// w[i+2] = w'[i+2]
+// w[i+3] = w'[i+3] ^ (w'[i] <<< 1)
+static vec_uint32_t sched_16_31(vec_uint32_t *pre_added, vec_uint32_t minus_4,
+                                vec_uint32_t minus_8, vec_uint32_t minus_12,
+                                vec_uint32_t minus_16, vec_uint32_t k) {
+  const vec_uint32_t minus_3 = vec_sro(minus_4, k_4_bytes);
+  const vec_uint32_t minus_14 = vec_sld((minus_12), (minus_16), 8);
+  const vec_uint32_t k_1_bit = vec_splat_u32(1);
+  const vec_uint32_t w_prime =
+      vec_rl(minus_3 ^ minus_8 ^ minus_14 ^ minus_16, k_1_bit);
+  const vec_uint32_t w =
+      w_prime ^ vec_rl(vec_slo(w_prime, k_12_bytes), k_1_bit);
+  vec_st(w + k, 0, pre_added);
+  return w;
+}
+
+// Compute w[i..i+3] using this relation for i in [32, 36, 40 ... 76]
+// w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]), 2) <<< 2
+static vec_uint32_t sched_32_79(vec_uint32_t *pre_added, vec_uint32_t minus_4,
+                                vec_uint32_t minus_8, vec_uint32_t minus_16,
+                                vec_uint32_t minus_28, vec_uint32_t minus_32,
+                                vec_uint32_t k) {
+  const vec_uint32_t minus_6 = vec_sld(minus_4, minus_8, 8);
+  const vec_uint32_t k_2_bits = vec_splat_u32(2);
+  const vec_uint32_t w =
+      vec_rl(minus_6 ^ minus_16 ^ minus_28 ^ minus_32, k_2_bits);
+  vec_st(w + k, 0, pre_added);
+  return w;
+}
+
+// As pointed out by Wei Dai <weidai@eskimo.com>, F() below can be simplified
+// to the code in F_00_19. Wei attributes these optimisations to Peter
+// Gutmann's SHS code, and he attributes it to Rich Schroeppel. #define
+// F(x,y,z) (((x) & (y))  |  ((~(x)) & (z))) I've just become aware of another
+// tweak to be made, again from Wei Dai, in F_40_59, (x&a)|(y&a) -> (x|y)&a
+#define F_00_19(b, c, d) ((((c) ^ (d)) & (b)) ^ (d))
+#define F_20_39(b, c, d) ((b) ^ (c) ^ (d))
+#define F_40_59(b, c, d) (((b) & (c)) | (((b) | (c)) & (d)))
+#define F_60_79(b, c, d) F_20_39(b, c, d)
+
+// We pre-added the K constants during message scheduling.
+#define BODY_00_19(i, a, b, c, d, e, f)                         \
+  do {                                                          \
+    (f) = w[i] + (e) + rotate((a), 5) + F_00_19((b), (c), (d)); \
+    (b) = rotate((b), 30);                                      \
+  } while (0)
+
+#define BODY_20_39(i, a, b, c, d, e, f)                         \
+  do {                                                          \
+    (f) = w[i] + (e) + rotate((a), 5) + F_20_39((b), (c), (d)); \
+    (b) = rotate((b), 30);                                      \
+  } while (0)
+
+#define BODY_40_59(i, a, b, c, d, e, f)                         \
+  do {                                                          \
+    (f) = w[i] + (e) + rotate((a), 5) + F_40_59((b), (c), (d)); \
+    (b) = rotate((b), 30);                                      \
+  } while (0)
+
+#define BODY_60_79(i, a, b, c, d, e, f)                         \
+  do {                                                          \
+    (f) = w[i] + (e) + rotate((a), 5) + F_60_79((b), (c), (d)); \
+    (b) = rotate((b), 30);                                      \
+  } while (0)
+
+void sha1_block_data_order_ppc64(uint32_t *state, const uint8_t *data, size_t num) {
+  uint32_t A, B, C, D, E, T;
+
+  A = state[0];
+  B = state[1];
+  C = state[2];
+  D = state[3];
+  E = state[4];
+
+  for (;;) {
+    vec_uint32_t vw[20];
+    const uint32_t *w = (const uint32_t *)&vw;
+
+    vec_uint32_t k = K_00_19_x_4;
+    const vec_uint32_t w0 = sched_00_15(vw + 0, data + 0, k);
+    BODY_00_19(0, A, B, C, D, E, T);
+    BODY_00_19(1, T, A, B, C, D, E);
+    BODY_00_19(2, E, T, A, B, C, D);
+    BODY_00_19(3, D, E, T, A, B, C);
+
+    const vec_uint32_t w4 = sched_00_15(vw + 1, data + 16, k);
+    BODY_00_19(4, C, D, E, T, A, B);
+    BODY_00_19(5, B, C, D, E, T, A);
+    BODY_00_19(6, A, B, C, D, E, T);
+    BODY_00_19(7, T, A, B, C, D, E);
+
+    const vec_uint32_t w8 = sched_00_15(vw + 2, data + 32, k);
+    BODY_00_19(8, E, T, A, B, C, D);
+    BODY_00_19(9, D, E, T, A, B, C);
+    BODY_00_19(10, C, D, E, T, A, B);
+    BODY_00_19(11, B, C, D, E, T, A);
+
+    const vec_uint32_t w12 = sched_00_15(vw + 3, data + 48, k);
+    BODY_00_19(12, A, B, C, D, E, T);
+    BODY_00_19(13, T, A, B, C, D, E);
+    BODY_00_19(14, E, T, A, B, C, D);
+    BODY_00_19(15, D, E, T, A, B, C);
+
+    const vec_uint32_t w16 = sched_16_31(vw + 4, w12, w8, w4, w0, k);
+    BODY_00_19(16, C, D, E, T, A, B);
+    BODY_00_19(17, B, C, D, E, T, A);
+    BODY_00_19(18, A, B, C, D, E, T);
+    BODY_00_19(19, T, A, B, C, D, E);
+
+    k = K_20_39_x_4;
+    const vec_uint32_t w20 = sched_16_31(vw + 5, w16, w12, w8, w4, k);
+    BODY_20_39(20, E, T, A, B, C, D);
+    BODY_20_39(21, D, E, T, A, B, C);
+    BODY_20_39(22, C, D, E, T, A, B);
+    BODY_20_39(23, B, C, D, E, T, A);
+
+    const vec_uint32_t w24 = sched_16_31(vw + 6, w20, w16, w12, w8, k);
+    BODY_20_39(24, A, B, C, D, E, T);
+    BODY_20_39(25, T, A, B, C, D, E);
+    BODY_20_39(26, E, T, A, B, C, D);
+    BODY_20_39(27, D, E, T, A, B, C);
+
+    const vec_uint32_t w28 = sched_16_31(vw + 7, w24, w20, w16, w12, k);
+    BODY_20_39(28, C, D, E, T, A, B);
+    BODY_20_39(29, B, C, D, E, T, A);
+    BODY_20_39(30, A, B, C, D, E, T);
+    BODY_20_39(31, T, A, B, C, D, E);
+
+    const vec_uint32_t w32 = sched_32_79(vw + 8, w28, w24, w16, w4, w0, k);
+    BODY_20_39(32, E, T, A, B, C, D);
+    BODY_20_39(33, D, E, T, A, B, C);
+    BODY_20_39(34, C, D, E, T, A, B);
+    BODY_20_39(35, B, C, D, E, T, A);
+
+    const vec_uint32_t w36 = sched_32_79(vw + 9, w32, w28, w20, w8, w4, k);
+    BODY_20_39(36, A, B, C, D, E, T);
+    BODY_20_39(37, T, A, B, C, D, E);
+    BODY_20_39(38, E, T, A, B, C, D);
+    BODY_20_39(39, D, E, T, A, B, C);
+
+    k = K_40_59_x_4;
+    const vec_uint32_t w40 = sched_32_79(vw + 10, w36, w32, w24, w12, w8, k);
+    BODY_40_59(40, C, D, E, T, A, B);
+    BODY_40_59(41, B, C, D, E, T, A);
+    BODY_40_59(42, A, B, C, D, E, T);
+    BODY_40_59(43, T, A, B, C, D, E);
+
+    const vec_uint32_t w44 = sched_32_79(vw + 11, w40, w36, w28, w16, w12, k);
+    BODY_40_59(44, E, T, A, B, C, D);
+    BODY_40_59(45, D, E, T, A, B, C);
+    BODY_40_59(46, C, D, E, T, A, B);
+    BODY_40_59(47, B, C, D, E, T, A);
+
+    const vec_uint32_t w48 = sched_32_79(vw + 12, w44, w40, w32, w20, w16, k);
+    BODY_40_59(48, A, B, C, D, E, T);
+    BODY_40_59(49, T, A, B, C, D, E);
+    BODY_40_59(50, E, T, A, B, C, D);
+    BODY_40_59(51, D, E, T, A, B, C);
+
+    const vec_uint32_t w52 = sched_32_79(vw + 13, w48, w44, w36, w24, w20, k);
+    BODY_40_59(52, C, D, E, T, A, B);
+    BODY_40_59(53, B, C, D, E, T, A);
+    BODY_40_59(54, A, B, C, D, E, T);
+    BODY_40_59(55, T, A, B, C, D, E);
+
+    const vec_uint32_t w56 = sched_32_79(vw + 14, w52, w48, w40, w28, w24, k);
+    BODY_40_59(56, E, T, A, B, C, D);
+    BODY_40_59(57, D, E, T, A, B, C);
+    BODY_40_59(58, C, D, E, T, A, B);
+    BODY_40_59(59, B, C, D, E, T, A);
+
+    k = K_60_79_x_4;
+    const vec_uint32_t w60 = sched_32_79(vw + 15, w56, w52, w44, w32, w28, k);
+    BODY_60_79(60, A, B, C, D, E, T);
+    BODY_60_79(61, T, A, B, C, D, E);
+    BODY_60_79(62, E, T, A, B, C, D);
+    BODY_60_79(63, D, E, T, A, B, C);
+
+    const vec_uint32_t w64 = sched_32_79(vw + 16, w60, w56, w48, w36, w32, k);
+    BODY_60_79(64, C, D, E, T, A, B);
+    BODY_60_79(65, B, C, D, E, T, A);
+    BODY_60_79(66, A, B, C, D, E, T);
+    BODY_60_79(67, T, A, B, C, D, E);
+
+    const vec_uint32_t w68 = sched_32_79(vw + 17, w64, w60, w52, w40, w36, k);
+    BODY_60_79(68, E, T, A, B, C, D);
+    BODY_60_79(69, D, E, T, A, B, C);
+    BODY_60_79(70, C, D, E, T, A, B);
+    BODY_60_79(71, B, C, D, E, T, A);
+
+    const vec_uint32_t w72 = sched_32_79(vw + 18, w68, w64, w56, w44, w40, k);
+    BODY_60_79(72, A, B, C, D, E, T);
+    BODY_60_79(73, T, A, B, C, D, E);
+    BODY_60_79(74, E, T, A, B, C, D);
+    BODY_60_79(75, D, E, T, A, B, C);
+
+    // We don't use the last value
+    (void)sched_32_79(vw + 19, w72, w68, w60, w48, w44, k);
+    BODY_60_79(76, C, D, E, T, A, B);
+    BODY_60_79(77, B, C, D, E, T, A);
+    BODY_60_79(78, A, B, C, D, E, T);
+    BODY_60_79(79, T, A, B, C, D, E);
+
+    const uint32_t mask = 0xffffffffUL;
+    state[0] = (state[0] + E) & mask;
+    state[1] = (state[1] + T) & mask;
+    state[2] = (state[2] + A) & mask;
+    state[3] = (state[3] + B) & mask;
+    state[4] = (state[4] + C) & mask;
+
+    data += 64;
+    if (--num == 0) {
+      break;
+    }
+
+    A = state[0];
+    B = state[1];
+    C = state[2];
+    D = state[3];
+    E = state[4];
+  }
+}
+
+#endif  // OPENSSL_PPC64LE
+
+#undef K_00_19
+#undef K_20_39
+#undef K_40_59
+#undef K_60_79
+#undef F_00_19
+#undef F_20_39
+#undef F_40_59
+#undef F_60_79
+#undef BODY_00_19
+#undef BODY_20_39
+#undef BODY_40_59
+#undef BODY_60_79
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/internal.h
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/internal.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/internal.h
@@ -181,7 +181,7 @@ extern "C" {
 
 
 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \
-    defined(OPENSSL_AARCH64)
+    defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
 // OPENSSL_cpuid_setup initializes the platform-specific feature cache.
 void OPENSSL_cpuid_setup(void);
 #endif
@@ -1638,6 +1638,16 @@ OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA51
 
 #endif  // OPENSSL_ARM || OPENSSL_AARCH64
 
+#if defined(OPENSSL_PPC64LE)
+
+// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports
+// the Vector.AES category of instructions.
+int CRYPTO_is_PPC64LE_vcrypto_capable(void);
+
+extern unsigned long OPENSSL_ppc64le_hwcap2;
+
+#endif  // OPENSSL_PPC64LE
+
 #if defined(BORINGSSL_DISPATCH_TEST)
 // Runtime CPU dispatch testing support
 
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/perlasm/ppc-xlate.pl
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/perlasm/ppc-xlate.pl
@@ -0,0 +1,320 @@
+#! /usr/bin/env perl
+# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+my %GLOBALS;
+my %TYPES;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $type = sub {
+    my ($dir,$name,$type) = @_;
+
+    $TYPES{$name} = $type;
+    if ($flavour =~ /linux/) {
+	$name =~ s|^\.||;
+	".type	$name,$type";
+    } else {
+	"";
+    }
+};
+my $globl = sub {
+    my $junk = shift;
+    my $name = shift;
+    my $global = \$GLOBALS{$name};
+    my $type = \$TYPES{$name};
+    my $ret;
+
+    $name =~ s|^\.||;
+
+    SWITCH: for ($flavour) {
+	/aix/		&& do { if (!$$type) {
+				    $$type = "\@function";
+				}
+				if ($$type =~ /function/) {
+				    $name = ".$name";
+				}
+				last;
+			      };
+	/osx/		&& do { $name = "_$name";
+				last;
+			      };
+	/linux.*(32|64le)/
+			&& do {	$ret .= ".globl	$name";
+				if (!$$type) {
+				    $ret .= "\n.type	$name,\@function";
+				    $$type = "\@function";
+				}
+				last;
+			      };
+	/linux.*64/	&& do {	$ret .= ".globl	$name";
+				if (!$$type) {
+				    $ret .= "\n.type	$name,\@function";
+				    $$type = "\@function";
+				}
+				if ($$type =~ /function/) {
+				    $ret .= "\n.section	\".opd\",\"aw\"";
+				    $ret .= "\n.align	3";
+				    $ret .= "\n$name:";
+				    $ret .= "\n.quad	.$name,.TOC.\@tocbase,0";
+				    $ret .= "\n.previous";
+				    $name = ".$name";
+				}
+				last;
+			      };
+    }
+
+    $ret = ".globl	$name" if (!$ret);
+    $$global = $name;
+    $ret;
+};
+my $text = sub {
+    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
+    $ret;
+};
+my $machine = sub {
+    my $junk = shift;
+    my $arch = shift;
+    if ($flavour =~ /osx/)
+    {	$arch =~ s/\"//g;
+	$arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
+    }
+    ".machine	$arch";
+};
+my $size = sub {
+    if ($flavour =~ /linux/)
+    {	shift;
+	my $name = shift;
+	my $real = $GLOBALS{$name} ? \$GLOBALS{$name} : \$name;
+	my $ret  = ".size	$$real,.-$$real";
+	$name =~ s|^\.||;
+	if ($$real ne $name) {
+	    $ret .= "\n.size	$name,.-$$real";
+	}
+	$ret;
+    }
+    else
+    {	"";	}
+};
+my $asciz = sub {
+    shift;
+    my $line = join(",",@_);
+    if ($line =~ /^"(.*)"$/)
+    {	".byte	" . join(",",unpack("C*",$1),0) . "\n.align	2";	}
+    else
+    {	"";	}
+};
+my $quad = sub {
+    shift;
+    my @ret;
+    my ($hi,$lo);
+    for (@_) {
+	if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+	{  $hi=$1?"0x$1":"0"; $lo="0x$2";  }
+	elsif (/^([0-9]+)$/o)
+	{  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl
+	else
+	{  $hi=undef; $lo=$_; }
+
+	if (defined($hi))
+	{  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  }
+	else
+	{  push(@ret,".quad	$lo");  }
+    }
+    join("\n",@ret);
+};
+
+################################################################
+# simplified mnemonics not handled by at least one assembler
+################################################################
+my $cmplw = sub {
+    my $f = shift;
+    my $cr = 0; $cr = shift if ($#_>1);
+    # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
+    ($flavour =~ /linux.*32/) ?
+	"	.long	".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
+	"	cmplw	".join(',',$cr,@_);
+};
+my $bdnz = sub {
+    my $f = shift;
+    my $bo = $f=~/[\+\-]/ ? 16+9 : 16;	# optional "to be taken" hint
+    "	bc	$bo,0,".shift;
+} if ($flavour!~/linux/);
+my $bltlr = sub {
+    my $f = shift;
+    my $bo = $f=~/\-/ ? 12+2 : 12;	# optional "not to be taken" hint
+    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
+	"	.long	".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
+	"	bclr	$bo,0";
+};
+my $bnelr = sub {
+    my $f = shift;
+    my $bo = $f=~/\-/ ? 4+2 : 4;	# optional "not to be taken" hint
+    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
+	"	.long	".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
+	"	bclr	$bo,2";
+};
+my $beqlr = sub {
+    my $f = shift;
+    my $bo = $f=~/-/ ? 12+2 : 12;	# optional "not to be taken" hint
+    ($flavour =~ /linux/) ?		# GNU as doesn't allow most recent hints
+	"	.long	".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
+	"	bclr	$bo,2";
+};
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
+# arguments is 64, with "operand out of range" error.
+my $extrdi = sub {
+    my ($f,$ra,$rs,$n,$b) = @_;
+    $b = ($b+$n)&63; $n = 64-$n;
+    "	rldicl	$ra,$rs,$b,$n";
+};
+my $vmr = sub {
+    my ($f,$vx,$vy) = @_;
+    "	vor	$vx,$vy,$vy";
+};
+
+# Some ABIs specify vrsave, special-purpose register #256, as reserved
+# for system use.
+my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $mtspr = sub {
+    my ($f,$idx,$ra) = @_;
+    if ($idx == 256 && $no_vrsave) {
+	"	or	$ra,$ra,$ra";
+    } else {
+	"	mtspr	$idx,$ra";
+    }
+};
+my $mfspr = sub {
+    my ($f,$rd,$idx) = @_;
+    if ($idx == 256 && $no_vrsave) {
+	"	li	$rd,-1";
+    } else {
+	"	mfspr	$rd,$idx";
+    }
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+    my ($f, $vrt, $ra, $rb, $op) = @_;
+    "	.long	".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u	= sub {	vsxmem_op(@_, 844); };	# lxvd2x
+my $stvx_u	= sub {	vsxmem_op(@_, 972); };	# stxvd2x
+my $lvdx_u	= sub {	vsxmem_op(@_, 588); };	# lxsdx
+my $stvdx_u	= sub {	vsxmem_op(@_, 716); };	# stxsdx
+my $lvx_4w	= sub { vsxmem_op(@_, 780); };	# lxvw4x
+my $stvx_4w	= sub { vsxmem_op(@_, 908); };	# stxvw4x
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+    my ($f, $vrt, $vra, $vrb, $op) = @_;
+    "	.long	".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+my $vcipher	= sub { vcrypto_op(@_, 1288); };
+my $vcipherlast	= sub { vcrypto_op(@_, 1289); };
+my $vncipher	= sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox	= sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb	= sub { vcrypto_op(@_, 1032); };
+my $vpmsumd	= sub { vcrypto_op(@_, 1224); };
+my $vpmsubh	= sub { vcrypto_op(@_, 1096); };
+my $vpmsumw	= sub { vcrypto_op(@_, 1160); };
+my $vaddudm	= sub { vcrypto_op(@_, 192);  };
+
+my $mtsle	= sub {
+    my ($f, $arg) = @_;
+    "	.long	".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
+
+# PowerISA 3.0 stuff
+my $maddhdu = sub {
+    my ($f, $rt, $ra, $rb, $rc) = @_;
+    "	.long	".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|49;
+};
+my $maddld = sub {
+    my ($f, $rt, $ra, $rb, $rc) = @_;
+    "	.long	".sprintf "0x%X",(4<<26)|($rt<<21)|($ra<<16)|($rb<<11)|($rc<<6)|51;
+};
+
+my $darn = sub {
+    my ($f, $rt, $l) = @_;
+    "	.long	".sprintf "0x%X",(31<<26)|($rt<<21)|($l<<16)|(755<<1);
+};
+
+print <<___;
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if !defined(OPENSSL_NO_ASM) && defined(__powerpc64__) && defined(__ELF__)
+___
+
+while($line=<>) {
+
+    $line =~ s|[#!;].*$||;	# get rid of asm-style comments...
+    $line =~ s|/\*.*\*/||;	# ... and C-style comments...
+    $line =~ s|^\s+||;		# ... and skip white spaces in beginning...
+    $line =~ s|\s+$||;		# ... and at the end
+
+    {
+	$line =~ s|\.L(\w+)|L$1|g;	# common denominator for Locallabel
+	$line =~ s|\bL(\w+)|\.L$1|g	if ($dotinlocallabels);
+    }
+
+    {
+	$line =~ s|(^[\.\w]+)\:\s*||;
+	my $label = $1;
+	if ($label) {
+	    my $xlated = ($GLOBALS{$label} or $label);
+	    print "$xlated:";
+	    if ($flavour =~ /linux.*64le/) {
+		if ($TYPES{$label} =~ /function/) {
+		    printf "\n.localentry	%s,0\n",$xlated;
+		}
+	    }
+	}
+    }
+
+    {
+	$line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+	my $c = $1; $c = "\t" if ($c eq "");
+	my $mnemonic = $2;
+	my $f = $3;
+	my $opcode = eval("\$$mnemonic");
+	$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+	if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+	elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; }
+    }
+
+    print $line if ($line);
+    print "\n";
+}
+
+print <<___;
+#endif  // !OPENSSL_NO_ASM && __powerpc64__ && __ELF__
+#if defined(__ELF__)
+// See https://www.airs.com/blog/archives/518.
+.section .note.GNU-stack,"",\%progbits
+#endif
+___
+
+close STDOUT or die "error closing STDOUT: $!";
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/test/abi_test.h
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/test/abi_test.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/test/abi_test.h
@@ -179,7 +179,78 @@ struct alignas(16) Reg128 {
   CALLER_STATE_REGISTER(uint64_t, x28)                               \
   CALLER_STATE_REGISTER(uint64_t, x29)
 
-#endif  // X86_64 || X86 || ARM || AARCH64
+#elif defined(OPENSSL_PPC64LE)
+
+// CRReg only compares the CR2-CR4 bits of a CR register.
+struct CRReg {
+  uint32_t masked() const { return value & 0x00fff000; }
+  bool operator==(CRReg r) const { return masked() == r.masked(); }
+  bool operator!=(CRReg r) const { return masked() != r.masked(); }
+  uint32_t value;
+};
+
+// References:
+// ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+//
+// Note vector and floating-point registers on POWER have two different names.
+// Originally, there were 32 floating-point registers and 32 vector registers,
+// labelled f0-f31 and v0-v31 respectively. Later, VSX (Vector Scalar Extension)
+// unified them into 64 registers vs0-vs63. f0-f31 map to the lower halves of
+// vs0-vs31. v0-v31 map to vs32-vs63. The ABI was defined in terms of pre-VSX
+// names, so we use those names here. In particular, f14-f31 are
+// callee-saved, but the upper halves of vs14-vs31 are not.
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  CALLER_STATE_REGISTER(Reg128, v20)   \
+  CALLER_STATE_REGISTER(Reg128, v21)   \
+  CALLER_STATE_REGISTER(Reg128, v22)   \
+  CALLER_STATE_REGISTER(Reg128, v23)   \
+  CALLER_STATE_REGISTER(Reg128, v24)   \
+  CALLER_STATE_REGISTER(Reg128, v25)   \
+  CALLER_STATE_REGISTER(Reg128, v26)   \
+  CALLER_STATE_REGISTER(Reg128, v27)   \
+  CALLER_STATE_REGISTER(Reg128, v28)   \
+  CALLER_STATE_REGISTER(Reg128, v29)   \
+  CALLER_STATE_REGISTER(Reg128, v30)   \
+  CALLER_STATE_REGISTER(Reg128, v31)   \
+  CALLER_STATE_REGISTER(uint64_t, r14) \
+  CALLER_STATE_REGISTER(uint64_t, r15) \
+  CALLER_STATE_REGISTER(uint64_t, r16) \
+  CALLER_STATE_REGISTER(uint64_t, r17) \
+  CALLER_STATE_REGISTER(uint64_t, r18) \
+  CALLER_STATE_REGISTER(uint64_t, r19) \
+  CALLER_STATE_REGISTER(uint64_t, r20) \
+  CALLER_STATE_REGISTER(uint64_t, r21) \
+  CALLER_STATE_REGISTER(uint64_t, r22) \
+  CALLER_STATE_REGISTER(uint64_t, r23) \
+  CALLER_STATE_REGISTER(uint64_t, r24) \
+  CALLER_STATE_REGISTER(uint64_t, r25) \
+  CALLER_STATE_REGISTER(uint64_t, r26) \
+  CALLER_STATE_REGISTER(uint64_t, r27) \
+  CALLER_STATE_REGISTER(uint64_t, r28) \
+  CALLER_STATE_REGISTER(uint64_t, r29) \
+  CALLER_STATE_REGISTER(uint64_t, r30) \
+  CALLER_STATE_REGISTER(uint64_t, r31) \
+  CALLER_STATE_REGISTER(uint64_t, f14) \
+  CALLER_STATE_REGISTER(uint64_t, f15) \
+  CALLER_STATE_REGISTER(uint64_t, f16) \
+  CALLER_STATE_REGISTER(uint64_t, f17) \
+  CALLER_STATE_REGISTER(uint64_t, f18) \
+  CALLER_STATE_REGISTER(uint64_t, f19) \
+  CALLER_STATE_REGISTER(uint64_t, f20) \
+  CALLER_STATE_REGISTER(uint64_t, f21) \
+  CALLER_STATE_REGISTER(uint64_t, f22) \
+  CALLER_STATE_REGISTER(uint64_t, f23) \
+  CALLER_STATE_REGISTER(uint64_t, f24) \
+  CALLER_STATE_REGISTER(uint64_t, f25) \
+  CALLER_STATE_REGISTER(uint64_t, f26) \
+  CALLER_STATE_REGISTER(uint64_t, f27) \
+  CALLER_STATE_REGISTER(uint64_t, f28) \
+  CALLER_STATE_REGISTER(uint64_t, f29) \
+  CALLER_STATE_REGISTER(uint64_t, f30) \
+  CALLER_STATE_REGISTER(uint64_t, f31) \
+  CALLER_STATE_REGISTER(CRReg, cr)
+
+#endif  // X86_64 || X86 || ARM || AARCH64 || PPC64LE
 
 // Enable ABI testing if all of the following are true.
 //
@@ -231,6 +302,12 @@ inline crypto_word_t ToWord(T t) {
   // on 32-bit architectures for simplicity.
   static_assert(sizeof(T) == 4, "parameter types must be word-sized");
   return (crypto_word_t)t;
+#elif defined(OPENSSL_PPC64LE)
+  // ELFv2, section 2.2.2.3 says the parameter save area sign- or zero-extends
+  // parameters passed in memory. Section 2.2.3 is unclear on how to handle
+  // register parameters, but section 2.2.2.3 additionally says that the memory
+  // copy of a parameter is identical to the register one.
+  return (crypto_word_t)t;
 #elif defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)
   // AAPCS64, section 5.4.2, clauses C.7 and C.14 says any remaining bits in
   // aarch are unspecified. iOS64 contradicts this and says the callee extends
@@ -285,9 +362,9 @@ inline crypto_word_t ToWord(T t) {
 template <typename R, typename... Args>
 inline crypto_word_t CheckImpl(Result *out, bool unwind, R (*func)(Args...),
                                typename DeductionGuard<Args>::Type... args) {
-  // We only support up to 8 arguments, so all arguments on aarch64 are passed
-  // in registers. This is simpler and avoids the iOS discrepancy around packing
-  // small arguments on the stack. (See the iOS64 reference.)
+  // We only support up to 8 arguments, so all arguments on aarch64 and ppc64le
+  // are passed in registers. This is simpler and avoids the iOS discrepancy
+  // around packing small arguments on the stack. (See the iOS64 reference.)
   static_assert(sizeof...(args) <= 8,
                 "too many arguments for abi_test_trampoline");
 
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/test/asm/trampoline-ppc.pl
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/test/asm/trampoline-ppc.pl
@@ -0,0 +1,262 @@
+#!/usr/bin/env perl
+# Copyright (c) 2019, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on ppc64le. See
+# that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections or equivalent is used.
+#
+# References:
+#
+# ELFv2: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf
+
+use strict;
+
+my $flavour = shift;
+my $output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+my $xlate;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT = *OUT;
+
+unless ($flavour =~ /linux.*64le/) {
+    die "This file only supports the ELFv2 ABI, used by ppc64le";
+}
+
+my $code = "";
+
+sub load_or_store_regs {
+  # $op is "l" or "st".
+  my ($op, $base_reg, $base_offset) = @_;
+  # Vector registers.
+  foreach (20..31) {
+    my $offset = $base_offset + ($_ - 20) * 16;
+    # Vector registers only support indexed register addressing.
+    $code .= "\tli\tr11, $offset\n";
+    $code .= "\t${op}vx\tv$_, r11, $base_reg\n";
+  }
+  # Save general registers.
+  foreach (14..31) {
+    my $offset = $base_offset + 192 + ($_ - 14) * 8;
+    $code .= "\t${op}d\tr$_, $offset($base_reg)\n";
+  }
+  # Save floating point registers.
+  foreach (14..31) {
+    my $offset = $base_offset + 336 + ($_ - 14) * 8;
+    $code .= "\t${op}fd\tf$_, $offset($base_reg)\n";
+  }
+}
+
+sub load_regs {
+  my ($base_reg, $base_offset) = @_;
+  load_or_store_regs("l", $base_reg, $base_offset);
+}
+
+sub store_regs {
+  my ($base_reg, $base_offset) = @_;
+  load_or_store_regs("st", $base_reg, $base_offset);
+}
+
+my ($func, $state, $argv, $argc) = ("r3", "r4", "r5", "r6");
+$code .= <<____;
+.machine	"any"
+.text
+
+# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+# with |argv|, then saves the callee-saved registers into |state|. It returns
+# the result of |func|. The |unwind| argument is unused.
+# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
+#                              const uint64_t *argv, size_t argc,
+#                              uint64_t unwind);
+.globl	abi_test_trampoline
+.align	5
+abi_test_trampoline:
+	# LR is saved into the caller's stack frame.
+	mflr	r0
+	std	r0, 16(r1)
+
+	# Allocate 66*8 = 528 bytes of stack frame. From the top of the stack
+	# to the bottom, the stack frame is:
+	#
+	#     0(r1) - Back chain pointer
+	#     8(r1) - CR save area
+	#    16(r1) - LR save area (for |func|)
+	#    24(r1) - TOC pointer save area
+	#    32(r1) - Saved copy of |state|
+	#    40(r1) - Padding
+	#    48(r1) - Vector register save area (v20-v31, 12 registers)
+	#   240(r1) - General register save area (r14-r31, 18 registers)
+	#   384(r1) - Floating point register save area (f14-f31, 18 registers)
+	#
+	# Note the layouts of the register save areas and CallerState match.
+	#
+	# In the ELFv2 ABI, the parameter save area is optional if the function
+	# is non-variadic and all parameters fit in registers. We only support
+	# such functions, so we omit it to test that |func| does not rely on it.
+	stdu	r1, -528(r1)
+
+	mfcr	r0
+	std	r0, 8(r1)	# Save CR
+	std	r2, 24(r1)	# Save TOC
+	std	$state, 32(r1)	# Save |state|
+____
+# Save registers to the stack.
+store_regs("r1", 48);
+# Load registers from the caller.
+load_regs($state, 0);
+$code .= <<____;
+	# Load CR from |state|.
+	ld	r0, 480($state)
+	mtcr	r0
+
+	# Move parameters into temporary registers so they are not clobbered.
+	addi	r11, $argv, -8	# Adjust for ldu below
+	mr	r12, $func
+
+	# Load parameters into registers.
+	cmpdi	$argc, 0
+	beq	.Largs_done
+	mtctr	$argc
+	ldu	r3, 8(r11)
+	bdz	.Largs_done
+	ldu	r4, 8(r11)
+	bdz	.Largs_done
+	ldu	r5, 8(r11)
+	bdz	.Largs_done
+	ldu	r6, 8(r11)
+	bdz	.Largs_done
+	ldu	r7, 8(r11)
+	bdz	.Largs_done
+	ldu	r8, 8(r11)
+	bdz	.Largs_done
+	ldu	r9, 8(r11)
+	bdz	.Largs_done
+	ldu	r10, 8(r11)
+
+.Largs_done:
+	li	r2, 0		# Clear TOC to test |func|'s global entry point
+	mtctr	r12
+	bctrl
+	ld	r2, 24(r1)	# Restore TOC
+
+	ld	$state, 32(r1)	# Reload |state|
+____
+# Output resulting registers to the caller.
+store_regs($state, 0);
+# Restore registers from the stack.
+load_regs("r1", 48);
+$code .= <<____;
+	mfcr	r0
+	std	r0, 480($state)	# Output CR to caller
+	ld	r0, 8(r1)
+	mtcrf	0b00111000, r0	# Restore CR2-CR4
+	addi	r1, r1, 528
+	ld	r0, 16(r1)	# Restore LR
+	mtlr	r0
+	blr
+.size	abi_test_trampoline,.-abi_test_trampoline
+____
+
+# abi_test_clobber_* clobbers the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach (0..31) {
+  # r1 is the stack pointer. r13 is the thread pointer.
+  next if ($_ == 1 || $_ == 13);
+  $code .= <<____;
+.globl	abi_test_clobber_r$_
+.align	5
+abi_test_clobber_r$_:
+	li	r$_, 0
+	blr
+.size	abi_test_clobber_r$_,.-abi_test_clobber_r$_
+____
+}
+
+foreach (0..31) {
+  $code .= <<____;
+.globl	abi_test_clobber_f$_
+.align	4
+abi_test_clobber_f$_:
+	li	r0, 0
+	# Use the red zone.
+	std	r0, -8(r1)
+	lfd	f$_, -8(r1)
+	blr
+.size	abi_test_clobber_f$_,.-abi_test_clobber_f$_
+____
+}
+
+foreach (0..31) {
+  $code .= <<____;
+.globl	abi_test_clobber_v$_
+.align	4
+abi_test_clobber_v$_:
+	vxor	v$_, v$_, v$_
+	blr
+.size	abi_test_clobber_v$_,.-abi_test_clobber_v$_
+____
+}
+
+foreach (0..7) {
+  # PPC orders CR fields in big-endian, so the mask is reversed from what one
+  # would expect.
+  my $mask = 1 << (7 - $_);
+  $code .= <<____;
+.globl	abi_test_clobber_cr$_
+.align	4
+abi_test_clobber_cr$_:
+	# Flip the bits on cr$_ rather than setting to zero. With a four-bit
+	# register, zeroing it will do nothing 1 in 16 times.
+	mfcr	r0
+	not	r0, r0
+	mtcrf	$mask, r0
+	blr
+.size	abi_test_clobber_cr$_,.-abi_test_clobber_cr$_
+____
+}
+
+$code .= <<____;
+.globl	abi_test_clobber_ctr
+.align	4
+abi_test_clobber_ctr:
+	li	r0, 0
+	mtctr	r0
+	blr
+.size	abi_test_clobber_ctr,.-abi_test_clobber_ctr
+
+.globl	abi_test_clobber_lr
+.align	4
+abi_test_clobber_lr:
+	mflr	r0
+	mtctr	r0
+	li	r0, 0
+	mtlr	r0
+	bctr
+.size	abi_test_clobber_lr,.-abi_test_clobber_lr
+
+____
+
+print $code;
+close STDOUT or die "error closing STDOUT: $!";
Index: chromium-125.0.6422.41/third_party/boringssl/src/include/openssl/target.h
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/include/openssl/target.h
+++ chromium-125.0.6422.41/third_party/boringssl/src/include/openssl/target.h
@@ -34,6 +34,9 @@
 #elif defined(__ARMEL__) || defined(_M_ARM)
 #define OPENSSL_32_BIT
 #define OPENSSL_ARM
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && defined(_LITTLE_ENDIAN)
+#define OPENSSL_64_BIT
+#define OPENSSL_PPC64LE
 #elif defined(__MIPSEL__) && !defined(__LP64__)
 #define OPENSSL_32_BIT
 #define OPENSSL_MIPS
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/acvp/modulewrapper/main.cc
@@ -37,6 +37,8 @@ int main(int argc, char **argv) {
     puts("ARM (32-bit)");
 #elif defined(OPENSSL_AARCH64)
     puts("aarch64 (64-bit)");
+#elif defined(OPENSSL_PPC64LE)
+    puts("PPC64LE (64-bit)");
 #else
 #error "FIPS build not supported on this architecture"
 #endif
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate.go
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/fipstools/delocate/delocate.go
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate.go
@@ -54,7 +54,8 @@ type stringWriter interface {
 type processorType int
 
 const (
-	x86_64 processorType = iota + 1
+	ppc64le processorType = iota + 1
+	x86_64
 	aarch64
 )
 
@@ -67,6 +68,8 @@ type delocation struct {
 
 	// symbols is the set of symbols defined in the module.
 	symbols map[string]struct{}
+	// localEntrySymbols is the set of symbols with .localentry directives.
+	localEntrySymbols map[string]struct{}
 	// redirectors maps from out-call symbol name to the name of a
 	// redirector function for that symbol. E.g. “memcpy” ->
 	// “bcm_redirector_memcpy”.
@@ -75,6 +78,9 @@ type delocation struct {
 	// should be used to reference it. E.g. “P384_data_storage” ->
 	// “P384_data_storage”.
 	bssAccessorsNeeded map[string]string
+	// tocLoaders is a set of symbol names for which TOC helper functions
+	// are required. (ppc64le only.)
+	tocLoaders map[string]struct{}
 	// gotExternalsNeeded is a set of symbol names for which we need
 	// “delta” symbols: symbols that contain the offset from their location
 	// to the memory in question.
@@ -151,6 +157,8 @@ func (d *delocation) processInput(input
 			switch d.processor {
 			case x86_64:
 				statement, err = d.processIntelInstruction(statement, node.up)
+			case ppc64le:
+				statement, err = d.processPPCInstruction(statement, node.up)
 			case aarch64:
 				statement, err = d.processAarch64Instruction(statement, node.up)
 			default:
@@ -247,7 +255,7 @@ func (d *delocation) processDirective(st
 			d.writeNode(statement)
 			break
 
-		case ".debug", ".note":
+		case ".debug", ".note", ".toc":
 			d.writeNode(statement)
 			break
 
@@ -336,6 +344,10 @@ func (d *delocation) processLabelContain
 		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
 	}
 
+	if name == ".localentry" {
+		d.output.WriteString(localEntryName(args[0]) + ":\n")
+	}
+
 	return statement, nil
 }
 
@@ -646,6 +658,191 @@ func (d *delocation) processAarch64Instr
 	return statement, nil
 }
 
+/* ppc64le
+
+[PABI]: “64-Bit ELF V2 ABI Specification. Power Architecture.” March 21st,
+        2017
+
+(Also useful is “Power ISA Version 2.07 B”. Note that version three of that
+document is /not/ good as that's POWER9 specific.)
+
+ppc64le doesn't have IP-relative addressing and does a lot to work around this.
+Rather than reference a PLT and GOT direction, it has a single structure called
+the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
+.got, .plt, .bss, etc sections [PABI;3.3].
+
+A pointer to the TOC is maintained in r2 and the following pattern is used to
+load the address of an element into a register:
+
+  addis <address register>, 2, foo@toc@ha
+  addi <address register>, <address register>, foo@toc@l
+
+The “addis” instruction shifts a signed constant left 16 bits and adds the
+result to its second argument, saving the result in the first argument. The
+“addi” instruction does the same, but without shifting. Thus the “@toc@ha"
+suffix on a symbol means “the top 16 bits of the TOC offset” and “@toc@l” means
+“the bottom 16 bits of the offset”. However, note that both values are signed,
+thus offsets in the top half of a 64KB chunk will have an @ha value that's one
+greater than expected and a negative @l value.
+
+The TOC is specific to a “module” (basically an executable or shared object).
+This means that there's not a single TOC in a process and that r2 needs to
+change as control moves between modules. Thus functions have two entry points:
+the “global” entry point and the “local” entry point. Jumps from within the
+same module can use the local entry while jumps from other modules must use the
+global entry. The global entry establishes the correct value of r2 before
+running the function and the local entry skips that code.
+
+The global entry point for a function is defined by its label. The local entry
+is a power-of-two number of bytes from the global entry, set by the
+“.localentry” directive. (ppc64le instructions are always 32 bits, so an offset
+of 1 or 2 bytes is treated as an offset of zero.)
+
+In order to help the global entry code set r2 to point to the local TOC, r12 is
+set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
+the global entry will typically use an addis+addi pair to add a known offset to
+r12 and store it in r2. For example:
+
+foo:
+  addis 2, 12, .TOC. - foo@ha
+  addi  2, 2,  .TOC. - foo@l
+
+(It's worth noting that the '@' operator binds very loosely, so the 3rd
+arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
+
+When calling a function, the compiler doesn't know whether that function is in
+the same module or not. Thus it doesn't know whether r12 needs to be set nor
+whether r2 will be clobbered on return. Rather than always assume the worst,
+the linker fixes stuff up once it knows that a call is going out of module:
+
+Firstly, calling, say, memcpy (which we assume to be in a different module)
+won't actually jump directly to memcpy, or even a PLT resolution function.
+It'll call a synthesised function that:
+  a) saves r2 in the caller's stack frame
+  b) loads the address of memcpy@PLT into r12
+  c) jumps to r12.
+
+As this synthesised function loads memcpy@PLT, a call to memcpy from the
+compiled code just references “memcpy” directly, not “memcpy@PLT”.
+
+Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
+calls must be followed by a nop. If the call ends up going out-of-module, the
+linker will rewrite that nop to load r2 from the stack.
+
+Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
+red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
+followed as called functions will write into their parent's stack frame. For
+example, the synthesised out-of-module trampolines will save r2 24 bytes into
+the caller's frame and all non-leaf functions save the return address 16 bytes
+into the caller's frame.
+
+A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
+result in zero and all writes are discarded. POWER does something a little like
+that, but r0 is only special in certain argument positions for certain
+instructions. You just have to read the manual to know which they are.
+
+
+Delocation is easier than Intel because there's just TOC references, but it's
+also harder because there's no IP-relative addressing.
+
+Jumps are IP-relative however, and have a 24-bit immediate value. So we can
+jump to functions that set a register to the needed value. (r3 is the
+return-value register and so that's what is generally used here.) */
+
+// isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
+// source to relative and writing the result to target.
+func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
+	instruction := skipWS(statement.up).up
+	assertNodeType(instruction, ruleInstructionName)
+	name1 := d.contents(instruction)
+	args1 := instructionArgs(instruction.next)
+
+	statement = statement.next
+	instruction = skipWS(statement.up).up
+	assertNodeType(instruction, ruleInstructionName)
+	name2 := d.contents(instruction)
+	args2 := instructionArgs(instruction.next)
+
+	if name1 != "addis" ||
+		len(args1) != 3 ||
+		name2 != "addi" ||
+		len(args2) != 3 {
+		return "", "", "", false
+	}
+
+	target = d.contents(args1[0])
+	relative = d.contents(args1[1])
+	source1 := d.contents(args1[2])
+	source2 := d.contents(args2[2])
+
+	if !strings.HasSuffix(source1, "@ha") ||
+		!strings.HasSuffix(source2, "@l") ||
+		source1[:len(source1)-3] != source2[:len(source2)-2] ||
+		d.contents(args2[0]) != target ||
+		d.contents(args2[1]) != target {
+		return "", "", "", false
+	}
+
+	source = source1[:len(source1)-3]
+	ok = true
+	return
+}
+
+// establishTOC writes the global entry prelude for a function. The standard
+// prelude involves relocations so this version moves the relocation outside
+// the integrity-checked area.
+func establishTOC(w stringWriter) {
+	w.WriteString("999:\n")
+	w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
+	w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
+	w.WriteString("\tld 12, 0(2)\n")
+	w.WriteString("\tadd 2, 2, 12\n")
+}
+
+// loadTOCFuncName returns the name of a synthesized function that sets r3 to
+// the value of “symbol+offset”.
+func loadTOCFuncName(symbol, offset string) string {
+	symbol = strings.Replace(symbol, ".", "_dot_", -1)
+	ret := ".Lbcm_loadtoc_" + symbol
+	if len(offset) != 0 {
+		offset = strings.Replace(offset, "+", "_plus_", -1)
+		offset = strings.Replace(offset, "-", "_minus_", -1)
+		ret += "_" + offset
+	}
+	return ret
+}
+
+func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
+	d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
+
+	return func(k func()) {
+		w.WriteString("\taddi 1, 1, -288\n")   // Clear the red zone.
+		w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
+		w.WriteString("\tstd " + dest + ", -8(1)\n")
+		// The TOC loader will use r3, so stash it if necessary.
+		if dest != "3" {
+			w.WriteString("\tstd 3, -16(1)\n")
+		}
+
+		// Because loadTOCFuncName returns a “.L” name, we don't need a
+		// nop after this call.
+		w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
+
+		// Cycle registers around. We need r3 -> destReg, -8(1) ->
+		// lr and, optionally, -16(1) -> r3.
+		w.WriteString("\tstd 3, -24(1)\n")
+		w.WriteString("\tld 3, -8(1)\n")
+		w.WriteString("\tmtlr 3\n")
+		w.WriteString("\tld " + dest + ", -24(1)\n")
+		if dest != "3" {
+			w.WriteString("\tld 3, -16(1)\n")
+		}
+		w.WriteString("\taddi 1, 1, 288\n")
+
+		k()
+	}
+}
+
 func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
 	for symRef != nil && symRef.pegRule == ruleOffset {
 		offset := d.contents(symRef)
@@ -700,6 +897,215 @@ func (d *delocation) parseMemRef(memRef
 	return
 }
 
+func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
+	assertNodeType(instruction, ruleInstructionName)
+	instructionName := d.contents(instruction)
+	isBranch := instructionName[0] == 'b'
+
+	argNodes := instructionArgs(instruction.next)
+
+	var wrappers wrapperStack
+	var args []string
+	changed := false
+
+Args:
+	for i, arg := range argNodes {
+		fullArg := arg
+		isIndirect := false
+
+		if arg.pegRule == ruleIndirectionIndicator {
+			arg = arg.next
+			isIndirect = true
+		}
+
+		switch arg.pegRule {
+		case ruleRegisterOrConstant, ruleLocalLabelRef:
+			args = append(args, d.contents(fullArg))
+
+		case ruleTOCRefLow:
+			return nil, errors.New("Found low TOC reference outside preamble pattern")
+
+		case ruleTOCRefHigh:
+			target, _, relative, ok := d.isPPC64LEAPair(statement)
+			if !ok {
+				return nil, errors.New("Found high TOC reference outside preamble pattern")
+			}
+
+			if relative != "12" {
+				return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
+			}
+
+			if target != "2" {
+				return nil, fmt.Errorf("preamble is setting %q, not r2", target)
+			}
+
+			statement = statement.next
+			establishTOC(d.output)
+			instructionName = ""
+			changed = true
+			break Args
+
+		case ruleMemoryRef:
+			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
+			changed = didChange
+
+			if len(symbol) > 0 {
+				if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
+					symbol = localEntryName(symbol)
+					changed = true
+				} else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
+					symbol = localTargetName(symbol)
+					changed = true
+				} else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
+					changed = true
+					d.redirectors[symbol] = redirectorName(symbol)
+					symbol = redirectorName(symbol)
+					// TODO(davidben): This should sanity-check the next
+					// instruction is a nop and ideally remove it.
+					wrappers = append(wrappers, func(k func()) {
+						k()
+						// Like the linker's PLT stubs, redirector functions
+						// expect callers to restore r2.
+						d.output.WriteString("\tld 2, 24(1)\n")
+					})
+				}
+			}
+
+			switch section {
+			case "":
+
+			case "tls":
+				// This section identifier just tells the
+				// assembler to use r13, the pointer to the
+				// thread-local data [PABI;3.7.3.3].
+
+			case "toc@ha":
+				// Delete toc@ha instructions. Per
+				// [PABI;3.6.3], the linker is allowed to erase
+				// toc@ha instructions. We take advantage of
+				// this by unconditionally erasing the toc@ha
+				// instructions and doing the full lookup when
+				// processing toc@l.
+				//
+				// Note that any offset here applies before @ha
+				// and @l. That is, 42+foo@toc@ha is
+				// #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
+				// corresponding toc@l references are required
+				// by the ABI to have the same offset. The
+				// offset will be incorporated in full when
+				// those are processed.
+				if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
+					return nil, errors.New("can't process toc@ha reference")
+				}
+				changed = true
+				instructionName = ""
+				break Args
+
+			case "toc@l":
+				// Per [PAB;3.6.3], this instruction must take
+				// as input a register which was the output of
+				// a toc@ha computation and compute the actual
+				// address of some symbol. The toc@ha
+				// computation was elided, so we ignore that
+				// input register and compute the address
+				// directly.
+				changed = true
+
+				// For all supported toc@l instructions, the
+				// destination register is the first argument.
+				destReg := args[0]
+
+				wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
+				switch instructionName {
+				case "addi":
+					// The original instruction was:
+					//   addi destReg, tocHaReg, offset+symbol@toc@l
+					instructionName = ""
+
+				case "ld", "lhz", "lwz":
+					// The original instruction was:
+					//   l?? destReg, offset+symbol@toc@l(tocHaReg)
+					//
+					// We transform that into the
+					// equivalent dereference of destReg:
+					//   l?? destReg, 0(destReg)
+					origInstructionName := instructionName
+					instructionName = ""
+
+					assertNodeType(memRef, ruleBaseIndexScale)
+					assertNodeType(memRef.up, ruleRegisterOrConstant)
+					if memRef.next != nil || memRef.up.next != nil {
+						return nil, errors.New("expected single register in BaseIndexScale for ld argument")
+					}
+
+					baseReg := destReg
+					if baseReg == "0" {
+						// Register zero is special as the base register for a load.
+						// Avoid it by spilling and using r3 instead.
+						baseReg = "3"
+						wrappers = append(wrappers, func(k func()) {
+							d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
+							d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
+							d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
+							k()
+							d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
+							d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
+						})
+					}
+
+					wrappers = append(wrappers, func(k func()) {
+						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
+					})
+				default:
+					return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
+				}
+
+			default:
+				return nil, fmt.Errorf("Unknown section type %q", section)
+			}
+
+			argStr := ""
+			if isIndirect {
+				argStr += "*"
+			}
+			argStr += symbol
+			if len(offset) > 0 {
+				argStr += offset
+			}
+			if len(section) > 0 {
+				argStr += "@"
+				argStr += section
+			}
+
+			for ; memRef != nil; memRef = memRef.next {
+				argStr += d.contents(memRef)
+			}
+
+			args = append(args, argStr)
+
+		default:
+			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
+		}
+	}
+
+	if changed {
+		d.writeCommentedNode(statement)
+
+		var replacement string
+		if len(instructionName) > 0 {
+			replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
+		}
+
+		wrappers.do(func() {
+			d.output.WriteString(replacement)
+		})
+	} else {
+		d.writeNode(statement)
+	}
+
+	return statement, nil
+}
+
 /* Intel */
 
 type instructionType int
@@ -1323,6 +1729,8 @@ func writeAarch64Function(w stringWriter
 func transform(w stringWriter, inputs []inputFile) error {
 	// symbols contains all defined symbols.
 	symbols := make(map[string]struct{})
+	// localEntrySymbols contains all symbols with a .localentry directive.
+	localEntrySymbols := make(map[string]struct{})
 	// fileNumbers is the set of IDs seen in .file directives.
 	fileNumbers := make(map[int]struct{})
 	// maxObservedFileNumber contains the largest seen file number in a
@@ -1346,6 +1754,25 @@ func transform(w stringWriter, inputs []
 		}, ruleStatement, ruleLabel, ruleSymbolName)
 
 		forEachPath(input.ast.up, func(node *node32) {
+			node = node.up
+			assertNodeType(node, ruleLabelContainingDirectiveName)
+			directive := input.contents[node.begin:node.end]
+			if directive != ".localentry" {
+				return
+			}
+			// Extract the first argument.
+			node = skipWS(node.next)
+			assertNodeType(node, ruleSymbolArgs)
+			node = node.up
+			assertNodeType(node, ruleSymbolArg)
+			symbol := input.contents[node.begin:node.end]
+			if _, ok := localEntrySymbols[symbol]; ok {
+				panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
+			}
+			localEntrySymbols[symbol] = struct{}{}
+		}, ruleStatement, ruleLabelContainingDirective)
+
+		forEachPath(input.ast.up, func(node *node32) {
 			assertNodeType(node, ruleLocationDirective)
 			directive := input.contents[node.begin:node.end]
 			if !strings.HasPrefix(directive, ".file") {
@@ -1393,11 +1820,13 @@ func transform(w stringWriter, inputs []
 
 	d := &delocation{
 		symbols:             symbols,
+		localEntrySymbols:   localEntrySymbols,
 		processor:           processor,
 		commentIndicator:    commentIndicator,
 		output:              w,
 		redirectors:         make(map[string]string),
 		bssAccessorsNeeded:  make(map[string]string),
+		tocLoaders:          make(map[string]struct{}),
 		gotExternalsNeeded:  make(map[string]struct{}),
 		gotOffsetsNeeded:    make(map[string]struct{}),
 		gotOffOffsetsNeeded: make(map[string]struct{}),
@@ -1432,6 +1861,22 @@ func transform(w stringWriter, inputs []
 	for _, name := range redirectorNames {
 		redirector := d.redirectors[name]
 		switch d.processor {
+		case ppc64le:
+			w.WriteString(".section \".toc\", \"aw\"\n")
+			w.WriteString(".Lredirector_toc_" + name + ":\n")
+			w.WriteString(".quad " + name + "\n")
+			w.WriteString(".text\n")
+			w.WriteString(".type " + redirector + ", @function\n")
+			w.WriteString(redirector + ":\n")
+			// |name| will clobber r2, so save it. This is matched by a restore in
+			// redirector calls.
+			w.WriteString("\tstd 2, 24(1)\n")
+			// Load and call |name|'s global entry point.
+			w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
+			w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
+			w.WriteString("\tmtctr 12\n")
+			w.WriteString("\tbctr\n")
+
 		case aarch64:
 			writeAarch64Function(w, redirector, func(w stringWriter) {
 				w.WriteString("\tb " + name + "\n")
@@ -1456,6 +1901,13 @@ func transform(w stringWriter, inputs []
 		target := d.bssAccessorsNeeded[name]
 
 		switch d.processor {
+		case ppc64le:
+			w.WriteString(".type " + funcName + ", @function\n")
+			w.WriteString(funcName + ":\n")
+			w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
+			w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
+			w.WriteString("\tblr\n")
+
 		case x86_64:
 			w.WriteString(".type " + funcName + ", @function\n")
 			w.WriteString(funcName + ":\n")
@@ -1471,6 +1923,26 @@ func transform(w stringWriter, inputs []
 	}
 
 	switch d.processor {
+	case ppc64le:
+		loadTOCNames := sortedSet(d.tocLoaders)
+		for _, symbolAndOffset := range loadTOCNames {
+			parts := strings.SplitN(symbolAndOffset, "\x00", 2)
+			symbol, offset := parts[0], parts[1]
+
+			funcName := loadTOCFuncName(symbol, offset)
+			ref := symbol + offset
+
+			w.WriteString(".type " + funcName[2:] + ", @function\n")
+			w.WriteString(funcName[2:] + ":\n")
+			w.WriteString(funcName + ":\n")
+			w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
+			w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
+			w.WriteString("\tblr\n")
+		}
+
+		w.WriteString(".LBORINGSSL_external_toc:\n")
+		w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
+
 	case aarch64:
 		externalNames := sortedSet(d.gotExternalsNeeded)
 		for _, symbol := range externalNames {
@@ -1781,6 +2253,10 @@ func localTargetName(name string) string
 	return ".L" + name + "_local_target"
 }
 
+func localEntryName(name string) string {
+	return ".L" + name + "_local_entry"
+}
+
 func isSynthesized(symbol string) bool {
 	return strings.HasSuffix(symbol, "_bss_get") ||
 		symbol == "OPENSSL_ia32cap_get" ||
@@ -1836,6 +2312,8 @@ func detectProcessor(input inputFile) pr
 		switch instructionName {
 		case "movq", "call", "leaq":
 			return x86_64
+		case "addis", "addi", "mflr":
+			return ppc64le
 		case "str", "bl", "ldr", "st1":
 			return aarch64
 		}
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate.peg
@@ -12,7 +12,7 @@
 # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
 
-# This is a rough parser for x86-64 and aarch64 assembly designed to work with
+# This is a rough parser for x86-64 and ppc64le assembly designed to work with
 # https://github.com/pointlander/peg. delocate.go has a go:generate line for
 # rebuilding delocate.peg.go from this file.
 
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/delocate_test.go
@@ -39,6 +39,11 @@ func (test *delocateTest) Path(file stri
 
 var delocateTests = []delocateTest{
 	{"generic-FileDirectives", []string{"in.s"}, "out.s"},
+	{"ppc64le-GlobalEntry", []string{"in.s"}, "out.s"},
+	{"ppc64le-LoadToR0", []string{"in.s"}, "out.s"},
+	{"ppc64le-Sample2", []string{"in.s"}, "out.s"},
+	{"ppc64le-Sample", []string{"in.s"}, "out.s"},
+	{"ppc64le-TOCWithOffset", []string{"in.s"}, "out.s"},
 	{"x86_64-Basic", []string{"in.s"}, "out.s"},
 	{"x86_64-BSS", []string{"in.s"}, "out.s"},
 	{"x86_64-GOTRewrite", []string{"in.s"}, "out.s"},
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/in.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/in.s
@@ -0,0 +1,9 @@
+	.text
+foo:
+.LCF0:
+0:
+	addis 2,12,.TOC.-.LCF0@ha
+	addi 2,2,.TOC.-.LCF0@l
+	.localentry foo,.-foo
+.LVL0:
+	bl
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/out.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-GlobalEntry/out.s
@@ -0,0 +1,62 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+	.text
+.Lfoo_local_target:
+foo:
+.LCF0:
+
+0:
+
+999:
+	addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+	addi 2, 2, .LBORINGSSL_external_toc-999b@l
+	ld 12, 0(2)
+	add 2, 2, 12
+# WAS addi 2,2,.TOC.-.LCF0@l
+	.localentry foo,.-foo
+.Lfoo_local_entry:
+.LVL0:
+
+	bl
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/in.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/in.s
@@ -0,0 +1,4 @@
+	.text
+foo:
+	addis 22,2,bar@toc@ha
+	ld 0,bar@toc@l(22)
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/out.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-LoadToR0/out.s
@@ -0,0 +1,72 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+	.text
+.Lfoo_local_target:
+foo:
+# WAS addis 22,2,bar@toc@ha
+# WAS ld 0,bar@toc@l(22)
+	addi 1, 1, -288
+	mflr 0
+	std 0, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc_bar
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 0, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	addi 1, 1, -288
+	std 3, -8(1)
+	mr 3, 0
+	ld 0, 0(3)
+	ld 3, -8(1)
+	addi 1, 1, 288
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.type bcm_loadtoc_bar, @function
+bcm_loadtoc_bar:
+.Lbcm_loadtoc_bar:
+	addis 3, 2, bar@toc@ha
+	addi 3, 3, bar@toc@l
+	blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/in.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/in.s
@@ -0,0 +1,161 @@
+	.file	"foo.c"
+	.abiversion 2
+	.section	".toc","aw"
+	.section	".text"
+	.section	.rodata
+	.align 3
+	.type	kString, @object
+	.size	kString, 12
+kString:
+	.string	"hello world"
+	.globl kExportedString
+	.align 3
+	.type	kExportedString, @object
+	.size	kExportedString, 26
+kExportedString:
+	.string	"hello world, more visibly"
+	.align 2
+	.type	kGiantArray, @object
+	.size	kGiantArray, 400000
+kGiantArray:
+	.long	1
+	.long	0
+	.zero	399992
+	.lcomm	bss,20,4
+	.type	bss, @object
+	.align 3
+.LC1:
+	.string	"kString is %p\n"
+	.align 3
+.LC2:
+	.string	"kExportedString is %p\n"
+	.align 3
+.LC4:
+	.string	"function is %p\n"
+	.align 3
+.LC5:
+	.string	"exported_function is %p\n"
+	.align 3
+.LC7:
+	.string	"&kString[5] is %p\n"
+	.align 3
+.LC9:
+	.string	"&kGiantArray[0x12345] is %p\n"
+	.section	".toc","aw"
+.LC0:
+	.quad	stderr
+.LC3:
+	.quad	kExportedString
+.LC6:
+	.quad	exported_function
+.LC8:
+	.quad	kString+5
+.LC10:
+	.quad	kGiantArray+298260
+	.section	".text"
+	.align 2
+	.type	function, @function
+function:
+0:	addis 2,12,.TOC.-0b@ha
+	addi 2,2,.TOC.-0b@l
+	.localentry	function,.-function
+	mflr 0
+	std 0,16(1)
+	std 31,-8(1)
+	stdu 1,-112(1)
+	mr 31,1
+	addis 10,2,.LC0@toc@ha
+	ld 9,.LC0@toc@l(10)
+	ld 9,0(9)
+	mr 3,9
+	addis 4,2,.LC1@toc@ha
+	addi 4,4,.LC1@toc@l
+	addis 5,2,kString@toc@ha
+	addi 5,5,kString@toc@l
+	bl fprintf
+	nop
+	addis 10,2,.LC0@toc@ha
+	ld 9,.LC0@toc@l(10)
+	ld 9,0(9)
+	mr 3,9
+	addis 4,2,.LC2@toc@ha
+	addi 4,4,.LC2@toc@l
+	addis 9,2,.LC3@toc@ha
+	ld 5,.LC3@toc@l(9)
+	bl fprintf
+	nop
+	addis 10,2,.LC0@toc@ha
+	ld 9,.LC0@toc@l(10)
+	ld 9,0(9)
+	mr 3,9
+	addis 4,2,.LC4@toc@ha
+	addi 4,4,.LC4@toc@l
+	addis 5,2,function@toc@ha
+	addi 5,5,function@toc@l
+	bl fprintf
+	nop
+	addis 10,2,.LC0@toc@ha
+	ld 9,.LC0@toc@l(10)
+	ld 9,0(9)
+	mr 3,9
+	addis 4,2,.LC5@toc@ha
+	addi 4,4,.LC5@toc@l
+	addis 9,2,.LC6@toc@ha
+	ld 5,.LC6@toc@l(9)
+	bl fprintf
+	nop
+	addis 10,2,.LC0@toc@ha
+	ld 9,.LC0@toc@l(10)
+	ld 9,0(9)
+	mr 3,9
+	addis 4,2,.LC7@toc@ha
+	addi 4,4,.LC7@toc@l
+	addis 9,2,.LC8@toc@ha
+	ld 5,.LC8@toc@l(9)
+	bl fprintf
+	nop
+	addis 10,2,.LC0@toc@ha
+	ld 9,.LC0@toc@l(10)
+	ld 9,0(9)
+	mr 3,9
+	addis 4,2,.LC9@toc@ha
+	addi 4,4,.LC9@toc@l
+	addis 9,2,.LC10@toc@ha
+	ld 5,.LC10@toc@l(9)
+	bl fprintf
+	nop
+	bl exported_function
+	nop
+	mr 3,9
+	addi 1,31,112
+	ld 0,16(1)
+	mtlr 0
+	ld 31,-8(1)
+	blr
+	.long 0
+	.byte 0,0,0,1,128,1,0,1
+	.size	function,.-function
+	.align 2
+	.globl exported_function
+	.type	exported_function, @function
+exported_function:
+0:	addis 2,12,.TOC.-0b@ha
+	addi 2,2,.TOC.-0b@l
+	.localentry	exported_function,.-exported_function
+	mflr 0
+	std 0,16(1)
+	std 31,-8(1)
+	stdu 1,-48(1)
+	mr 31,1
+	bl function
+	mr 3,9
+	addi 1,31,48
+	ld 0,16(1)
+	mtlr 0
+	ld 31,-8(1)
+	blr
+	.long 0
+	.byte 0,0,0,1,128,1,0,1
+	.size	exported_function,.-exported_function
+	.ident	"GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
+	.section	.note.GNU-stack,"",@progbits
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/out.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample/out.s
@@ -0,0 +1,552 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+	.file	"foo.c"
+	.abiversion 2
+	.section	".toc","aw"
+# WAS .section	".text"
+.text
+# WAS .section	.rodata
+.text
+	.align 3
+	.type	kString, @object
+	.size	kString, 12
+.LkString_local_target:
+kString:
+	.string	"hello world"
+	.globl kExportedString
+	.align 3
+	.type	kExportedString, @object
+	.size	kExportedString, 26
+.LkExportedString_local_target:
+kExportedString:
+	.string	"hello world, more visibly"
+	.align 2
+	.type	kGiantArray, @object
+	.size	kGiantArray, 400000
+.LkGiantArray_local_target:
+kGiantArray:
+	.long	1
+	.long	0
+	.zero	399992
+	.lcomm	bss,20,4
+	.type	bss, @object
+	.align 3
+.LC1:
+
+	.string	"kString is %p\n"
+	.align 3
+.LC2:
+
+	.string	"kExportedString is %p\n"
+	.align 3
+.LC4:
+
+	.string	"function is %p\n"
+	.align 3
+.LC5:
+
+	.string	"exported_function is %p\n"
+	.align 3
+.LC7:
+
+	.string	"&kString[5] is %p\n"
+	.align 3
+.LC9:
+
+	.string	"&kGiantArray[0x12345] is %p\n"
+	.section	".toc","aw"
+.LC0:
+
+	.quad	stderr
+.LC3:
+
+	.quad	kExportedString
+.LC6:
+
+	.quad	exported_function
+.LC8:
+
+	.quad	kString+5
+.LC10:
+
+	.quad	kGiantArray+298260
+# WAS .section	".text"
+.text
+	.align 2
+	.type	function, @function
+.Lfunction_local_target:
+function:
+0:
+999:
+	addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+	addi 2, 2, .LBORINGSSL_external_toc-999b@l
+	ld 12, 0(2)
+	add 2, 2, 12
+# WAS addi 2,2,.TOC.-0b@l
+	.localentry	function,.-function
+.Lfunction_local_entry:
+	mflr 0
+	std 0,16(1)
+	std 31,-8(1)
+	stdu 1,-112(1)
+	mr 31,1
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+	addi 1, 1, -288
+	mflr 9
+	std 9, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 9, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 9, 0(9)
+	ld 9,0(9)
+	mr 3,9
+# WAS addis 4,2,.LC1@toc@ha
+# WAS addi 4,4,.LC1@toc@l
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC1
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addis 5,2,kString@toc@ha
+# WAS addi 5,5,kString@toc@l
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LkString_local_target
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS bl fprintf
+	bl	bcm_redirector_fprintf
+	ld 2, 24(1)
+	nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+	addi 1, 1, -288
+	mflr 9
+	std 9, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 9, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 9, 0(9)
+	ld 9,0(9)
+	mr 3,9
+# WAS addis 4,2,.LC2@toc@ha
+# WAS addi 4,4,.LC2@toc@l
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC2
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addis 9,2,.LC3@toc@ha
+# WAS ld 5,.LC3@toc@l(9)
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC3
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 5, 0(5)
+# WAS bl fprintf
+	bl	bcm_redirector_fprintf
+	ld 2, 24(1)
+	nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+	addi 1, 1, -288
+	mflr 9
+	std 9, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 9, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 9, 0(9)
+	ld 9,0(9)
+	mr 3,9
+# WAS addis 4,2,.LC4@toc@ha
+# WAS addi 4,4,.LC4@toc@l
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC4
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addis 5,2,function@toc@ha
+# WAS addi 5,5,function@toc@l
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_Lfunction_local_target
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS bl fprintf
+	bl	bcm_redirector_fprintf
+	ld 2, 24(1)
+	nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+	addi 1, 1, -288
+	mflr 9
+	std 9, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 9, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 9, 0(9)
+	ld 9,0(9)
+	mr 3,9
+# WAS addis 4,2,.LC5@toc@ha
+# WAS addi 4,4,.LC5@toc@l
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC5
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addis 9,2,.LC6@toc@ha
+# WAS ld 5,.LC6@toc@l(9)
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC6
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 5, 0(5)
+# WAS bl fprintf
+	bl	bcm_redirector_fprintf
+	ld 2, 24(1)
+	nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+	addi 1, 1, -288
+	mflr 9
+	std 9, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 9, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 9, 0(9)
+	ld 9,0(9)
+	mr 3,9
+# WAS addis 4,2,.LC7@toc@ha
+# WAS addi 4,4,.LC7@toc@l
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC7
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addis 9,2,.LC8@toc@ha
+# WAS ld 5,.LC8@toc@l(9)
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC8
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 5, 0(5)
+# WAS bl fprintf
+	bl	bcm_redirector_fprintf
+	ld 2, 24(1)
+	nop
+# WAS addis 10,2,.LC0@toc@ha
+# WAS ld 9,.LC0@toc@l(10)
+	addi 1, 1, -288
+	mflr 9
+	std 9, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 9, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 9, 0(9)
+	ld 9,0(9)
+	mr 3,9
+# WAS addis 4,2,.LC9@toc@ha
+# WAS addi 4,4,.LC9@toc@l
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC9
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addis 9,2,.LC10@toc@ha
+# WAS ld 5,.LC10@toc@l(9)
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC10
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 5, 0(5)
+# WAS bl fprintf
+	bl	bcm_redirector_fprintf
+	ld 2, 24(1)
+	nop
+# WAS bl exported_function
+	bl	.Lexported_function_local_entry
+	nop
+	mr 3,9
+	addi 1,31,112
+	ld 0,16(1)
+	mtlr 0
+	ld 31,-8(1)
+	blr
+	.long 0
+	.byte 0,0,0,1,128,1,0,1
+	.size	function,.-function
+	.align 2
+	.globl exported_function
+	.type	exported_function, @function
+.Lexported_function_local_target:
+exported_function:
+0:
+999:
+	addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+	addi 2, 2, .LBORINGSSL_external_toc-999b@l
+	ld 12, 0(2)
+	add 2, 2, 12
+# WAS addi 2,2,.TOC.-0b@l
+	.localentry	exported_function,.-exported_function
+.Lexported_function_local_entry:
+	mflr 0
+	std 0,16(1)
+	std 31,-8(1)
+	stdu 1,-48(1)
+	mr 31,1
+# WAS bl function
+	bl	.Lfunction_local_entry
+	mr 3,9
+	addi 1,31,48
+	ld 0,16(1)
+	mtlr 0
+	ld 31,-8(1)
+	blr
+	.long 0
+	.byte 0,0,0,1,128,1,0,1
+	.size	exported_function,.-exported_function
+	.ident	"GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
+	.section	.note.GNU-stack,"",@progbits
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.section ".toc", "aw"
+.Lredirector_toc_fprintf:
+.quad fprintf
+.text
+.type bcm_redirector_fprintf, @function
+bcm_redirector_fprintf:
+	std 2, 24(1)
+	addis 12, 2, .Lredirector_toc_fprintf@toc@ha
+	ld 12, .Lredirector_toc_fprintf@toc@l(12)
+	mtctr 12
+	bctr
+.type bss_bss_get, @function
+bss_bss_get:
+	addis 3, 2, bss@toc@ha
+	addi 3, 3, bss@toc@l
+	blr
+.type bcm_loadtoc__dot_LC0, @function
+bcm_loadtoc__dot_LC0:
+.Lbcm_loadtoc__dot_LC0:
+	addis 3, 2, .LC0@toc@ha
+	addi 3, 3, .LC0@toc@l
+	blr
+.type bcm_loadtoc__dot_LC1, @function
+bcm_loadtoc__dot_LC1:
+.Lbcm_loadtoc__dot_LC1:
+	addis 3, 2, .LC1@toc@ha
+	addi 3, 3, .LC1@toc@l
+	blr
+.type bcm_loadtoc__dot_LC10, @function
+bcm_loadtoc__dot_LC10:
+.Lbcm_loadtoc__dot_LC10:
+	addis 3, 2, .LC10@toc@ha
+	addi 3, 3, .LC10@toc@l
+	blr
+.type bcm_loadtoc__dot_LC2, @function
+bcm_loadtoc__dot_LC2:
+.Lbcm_loadtoc__dot_LC2:
+	addis 3, 2, .LC2@toc@ha
+	addi 3, 3, .LC2@toc@l
+	blr
+.type bcm_loadtoc__dot_LC3, @function
+bcm_loadtoc__dot_LC3:
+.Lbcm_loadtoc__dot_LC3:
+	addis 3, 2, .LC3@toc@ha
+	addi 3, 3, .LC3@toc@l
+	blr
+.type bcm_loadtoc__dot_LC4, @function
+bcm_loadtoc__dot_LC4:
+.Lbcm_loadtoc__dot_LC4:
+	addis 3, 2, .LC4@toc@ha
+	addi 3, 3, .LC4@toc@l
+	blr
+.type bcm_loadtoc__dot_LC5, @function
+bcm_loadtoc__dot_LC5:
+.Lbcm_loadtoc__dot_LC5:
+	addis 3, 2, .LC5@toc@ha
+	addi 3, 3, .LC5@toc@l
+	blr
+.type bcm_loadtoc__dot_LC6, @function
+bcm_loadtoc__dot_LC6:
+.Lbcm_loadtoc__dot_LC6:
+	addis 3, 2, .LC6@toc@ha
+	addi 3, 3, .LC6@toc@l
+	blr
+.type bcm_loadtoc__dot_LC7, @function
+bcm_loadtoc__dot_LC7:
+.Lbcm_loadtoc__dot_LC7:
+	addis 3, 2, .LC7@toc@ha
+	addi 3, 3, .LC7@toc@l
+	blr
+.type bcm_loadtoc__dot_LC8, @function
+bcm_loadtoc__dot_LC8:
+.Lbcm_loadtoc__dot_LC8:
+	addis 3, 2, .LC8@toc@ha
+	addi 3, 3, .LC8@toc@l
+	blr
+.type bcm_loadtoc__dot_LC9, @function
+bcm_loadtoc__dot_LC9:
+.Lbcm_loadtoc__dot_LC9:
+	addis 3, 2, .LC9@toc@ha
+	addi 3, 3, .LC9@toc@l
+	blr
+.type bcm_loadtoc__dot_Lfunction_local_target, @function
+bcm_loadtoc__dot_Lfunction_local_target:
+.Lbcm_loadtoc__dot_Lfunction_local_target:
+	addis 3, 2, .Lfunction_local_target@toc@ha
+	addi 3, 3, .Lfunction_local_target@toc@l
+	blr
+.type bcm_loadtoc__dot_LkString_local_target, @function
+bcm_loadtoc__dot_LkString_local_target:
+.Lbcm_loadtoc__dot_LkString_local_target:
+	addis 3, 2, .LkString_local_target@toc@ha
+	addi 3, 3, .LkString_local_target@toc@l
+	blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/in.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/in.s
@@ -0,0 +1,226 @@
+	.file	"foo.c"
+	.abiversion 2
+	.section	".toc","aw"
+	.section	".text"
+	.section	".toc","aw"
+.LC0:
+	.quad	stderr
+.LC3:
+	.quad	kExportedString
+.LC6:
+	.quad	exported_function
+	.section	".text"
+	.align 2
+	.p2align 4,,15
+	.globl exported_function
+	.type	exported_function, @function
+exported_function:
+0:	addis 2,12,.TOC.-0b@ha
+	addi 2,2,.TOC.-0b@l
+	.localentry	exported_function,.-exported_function
+	mflr 0
+	std 19,-104(1)
+	std 20,-96(1)
+	std 21,-88(1)
+	std 22,-80(1)
+	addis 21,2,.LC1@toc@ha
+	addis 22,2,.LC2@toc@ha
+	std 23,-72(1)
+	std 24,-64(1)
+	addis 23,2,.LC4@toc@ha
+	addis 24,2,function@toc@ha
+	std 25,-56(1)
+	std 26,-48(1)
+	addis 25,2,.LC5@toc@ha
+	addis 26,2,.LC7@toc@ha
+	std 27,-40(1)
+	std 28,-32(1)
+	addis 28,2,.LC8@toc@ha
+	addi 21,21,.LC1@toc@l
+	std 29,-24(1)
+	std 30,-16(1)
+	addis 29,2,.LANCHOR0@toc@ha
+	addi 22,22,.LC2@toc@l
+	std 31,-8(1)
+	std 0,16(1)
+	addi 29,29,.LANCHOR0@toc@l
+	addi 23,23,.LC4@toc@l
+	stdu 1,-208(1)
+	addis 31,2,.LC0@toc@ha		# gpr load fusion, type long
+	ld 31,.LC0@toc@l(31)
+	addis 19,2,.LC3@toc@ha		# gpr load fusion, type long
+	ld 19,.LC3@toc@l(19)
+	addis 30,29,0x5
+	addi 24,24,function@toc@l
+	addis 20,2,.LC6@toc@ha		# gpr load fusion, type long
+	ld 20,.LC6@toc@l(20)
+	addi 25,25,.LC5@toc@l
+	addi 26,26,.LC7@toc@l
+	addi 27,29,5
+	addi 28,28,.LC8@toc@l
+	addi 30,30,-29404
+	.p2align 4,,15
+.L2:
+	ld 3,0(31)
+	mr 5,21
+	mr 6,29
+	li 4,1
+	bl __fprintf_chk
+	nop
+	ld 3,0(31)
+	mr 5,22
+	mr 6,19
+	li 4,1
+	bl __fprintf_chk
+	nop
+	ld 3,0(31)
+	mr 5,23
+	mr 6,24
+	li 4,1
+	bl __fprintf_chk
+	nop
+	ld 3,0(31)
+	mr 5,25
+	mr 6,20
+	li 4,1
+	bl __fprintf_chk
+	nop
+	ld 3,0(31)
+	mr 5,26
+	mr 6,27
+	li 4,1
+	bl __fprintf_chk
+	nop
+	ld 3,0(31)
+	li 4,1
+	mr 5,28
+	mr 6,30
+	bl __fprintf_chk
+	nop
+	b .L2
+	.long 0
+	.byte 0,0,0,1,128,13,0,0
+	.size	exported_function,.-exported_function
+	.section	".toc","aw"
+	.set .LC11,.LC0
+	.set .LC12,.LC3
+	.set .LC13,.LC6
+	.section	".text"
+	.align 2
+	.p2align 4,,15
+	.type	function, @function
+function:
+0:	addis 2,12,.TOC.-0b@ha
+	addi 2,2,.TOC.-0b@l
+	.localentry	function,.-function
+	mflr 0
+	std 31,-8(1)
+	addis 31,2,.LC11@toc@ha		# gpr load fusion, type long
+	ld 31,.LC11@toc@l(31)
+	addis 5,2,.LC1@toc@ha
+	std 30,-16(1)
+	addis 30,2,.LANCHOR0@toc@ha
+	addi 5,5,.LC1@toc@l
+	addi 30,30,.LANCHOR0@toc@l
+	li 4,1
+	mr 6,30
+	std 0,16(1)
+	stdu 1,-112(1)
+	ld 3,0(31)
+	bl __fprintf_chk
+	nop
+	addis 6,2,.LC12@toc@ha		# gpr load fusion, type long
+	ld 6,.LC12@toc@l(6)
+	ld 3,0(31)
+	addis 5,2,.LC2@toc@ha
+	li 4,1
+	addi 5,5,.LC2@toc@l
+	bl __fprintf_chk
+	nop
+	ld 3,0(31)
+	addis 5,2,.LC4@toc@ha
+	addis 6,2,function@toc@ha
+	addi 5,5,.LC4@toc@l
+	addi 6,6,function@toc@l
+	li 4,1
+	bl __fprintf_chk
+	nop
+	addis 6,2,.LC13@toc@ha		# gpr load fusion, type long
+	ld 6,.LC13@toc@l(6)
+	ld 3,0(31)
+	addis 5,2,.LC5@toc@ha
+	li 4,1
+	addi 5,5,.LC5@toc@l
+	bl __fprintf_chk
+	nop
+	ld 3,0(31)
+	addis 5,2,.LC7@toc@ha
+	addi 6,30,5
+	addi 5,5,.LC7@toc@l
+	li 4,1
+	bl __fprintf_chk
+	nop
+	ld 3,0(31)
+	addis 6,30,0x5
+	addis 5,2,.LC8@toc@ha
+	li 4,1
+	addi 5,5,.LC8@toc@l
+	addi 6,6,-29404
+	bl __fprintf_chk
+	nop
+	bl exported_function
+	nop
+	addi 1,1,112
+	ld 0,16(1)
+	ld 30,-16(1)
+	ld 31,-8(1)
+	mtlr 0
+	blr
+	.long 0
+	.byte 0,0,0,1,128,2,0,0
+	.size	function,.-function
+	.globl kExportedString
+	.section	.rodata
+	.align 4
+	.set	.LANCHOR0,. + 0
+	.type	kString, @object
+	.size	kString, 12
+kString:
+	.string	"hello world"
+	.zero	4
+	.type	kGiantArray, @object
+	.size	kGiantArray, 400000
+kGiantArray:
+	.long	1
+	.long	0
+	.zero	399992
+	.type	kExportedString, @object
+	.size	kExportedString, 26
+kExportedString:
+	.string	"hello world, more visibly"
+	.section	.rodata.str1.8,"aMS",@progbits,1
+	.align 3
+.LC1:
+	.string	"kString is %p\n"
+	.zero	1
+.LC2:
+	.string	"kExportedString is %p\n"
+	.zero	1
+.LC4:
+	.string	"function is %p\n"
+.LC5:
+	.string	"exported_function is %p\n"
+	.zero	7
+.LC7:
+	.string	"&kString[5] is %p\n"
+	.zero	5
+.LC8:
+	.string	"&kGiantArray[0x12345] is %p\n"
+	.section	".bss"
+	.align 2
+	.type	bss, @object
+	.size	bss, 20
+bss:
+	.zero	20
+	.ident	"GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
+	.section	.note.GNU-stack,"",@progbits
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/out.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-Sample2/out.s
@@ -0,0 +1,677 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+	.file	"foo.c"
+	.abiversion 2
+	.section	".toc","aw"
+# WAS .section	".text"
+.text
+	.section	".toc","aw"
+.LC0:
+
+	.quad	stderr
+.LC3:
+
+	.quad	kExportedString
+.LC6:
+
+	.quad	exported_function
+# WAS .section	".text"
+.text
+	.align 2
+	.p2align 4,,15
+	.globl exported_function
+	.type	exported_function, @function
+.Lexported_function_local_target:
+exported_function:
+0:
+999:
+	addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+	addi 2, 2, .LBORINGSSL_external_toc-999b@l
+	ld 12, 0(2)
+	add 2, 2, 12
+# WAS addi 2,2,.TOC.-0b@l
+	.localentry	exported_function,.-exported_function
+.Lexported_function_local_entry:
+	mflr 0
+	std 19,-104(1)
+	std 20,-96(1)
+	std 21,-88(1)
+	std 22,-80(1)
+# WAS addis 21,2,.LC1@toc@ha
+# WAS addis 22,2,.LC2@toc@ha
+	std 23,-72(1)
+	std 24,-64(1)
+# WAS addis 23,2,.LC4@toc@ha
+# WAS addis 24,2,function@toc@ha
+	std 25,-56(1)
+	std 26,-48(1)
+# WAS addis 25,2,.LC5@toc@ha
+# WAS addis 26,2,.LC7@toc@ha
+	std 27,-40(1)
+	std 28,-32(1)
+# WAS addis 28,2,.LC8@toc@ha
+# WAS addi 21,21,.LC1@toc@l
+	addi 1, 1, -288
+	mflr 21
+	std 21, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC1
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 21, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	std 29,-24(1)
+	std 30,-16(1)
+# WAS addis 29,2,.LANCHOR0@toc@ha
+# WAS addi 22,22,.LC2@toc@l
+	addi 1, 1, -288
+	mflr 22
+	std 22, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC2
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 22, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	std 31,-8(1)
+	std 0,16(1)
+# WAS addi 29,29,.LANCHOR0@toc@l
+	addi 1, 1, -288
+	mflr 29
+	std 29, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LANCHOR0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 29, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addi 23,23,.LC4@toc@l
+	addi 1, 1, -288
+	mflr 23
+	std 23, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC4
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 23, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	stdu 1,-208(1)
+# WAS addis 31,2,.LC0@toc@ha		# gpr load fusion, type long
+# WAS ld 31,.LC0@toc@l(31)
+	addi 1, 1, -288
+	mflr 31
+	std 31, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 31, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 31, 0(31)
+# WAS addis 19,2,.LC3@toc@ha		# gpr load fusion, type long
+# WAS ld 19,.LC3@toc@l(19)
+	addi 1, 1, -288
+	mflr 19
+	std 19, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC3
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 19, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 19, 0(19)
+	addis 30,29,0x5
+# WAS addi 24,24,function@toc@l
+	addi 1, 1, -288
+	mflr 24
+	std 24, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_Lfunction_local_target
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 24, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addis 20,2,.LC6@toc@ha		# gpr load fusion, type long
+# WAS ld 20,.LC6@toc@l(20)
+	addi 1, 1, -288
+	mflr 20
+	std 20, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC6
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 20, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 20, 0(20)
+# WAS addi 25,25,.LC5@toc@l
+	addi 1, 1, -288
+	mflr 25
+	std 25, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC5
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 25, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addi 26,26,.LC7@toc@l
+	addi 1, 1, -288
+	mflr 26
+	std 26, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC7
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 26, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	addi 27,29,5
+# WAS addi 28,28,.LC8@toc@l
+	addi 1, 1, -288
+	mflr 28
+	std 28, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC8
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 28, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	addi 30,30,-29404
+	.p2align 4,,15
+.L2:
+
+	ld 3,0(31)
+	mr 5,21
+	mr 6,29
+	li 4,1
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	ld 3,0(31)
+	mr 5,22
+	mr 6,19
+	li 4,1
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	ld 3,0(31)
+	mr 5,23
+	mr 6,24
+	li 4,1
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	ld 3,0(31)
+	mr 5,25
+	mr 6,20
+	li 4,1
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	ld 3,0(31)
+	mr 5,26
+	mr 6,27
+	li 4,1
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	ld 3,0(31)
+	li 4,1
+	mr 5,28
+	mr 6,30
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	b .L2
+	.long 0
+	.byte 0,0,0,1,128,13,0,0
+	.size	exported_function,.-exported_function
+	.section	".toc","aw"
+	.set .LC11,.LC0
+	.set .LC12,.LC3
+	.set .LC13,.LC6
+# WAS .section	".text"
+.text
+	.align 2
+	.p2align 4,,15
+	.type	function, @function
+.Lfunction_local_target:
+function:
+0:
+999:
+	addis 2, 12, .LBORINGSSL_external_toc-999b@ha
+	addi 2, 2, .LBORINGSSL_external_toc-999b@l
+	ld 12, 0(2)
+	add 2, 2, 12
+# WAS addi 2,2,.TOC.-0b@l
+	.localentry	function,.-function
+.Lfunction_local_entry:
+	mflr 0
+	std 31,-8(1)
+# WAS addis 31,2,.LC11@toc@ha		# gpr load fusion, type long
+# WAS ld 31,.LC11@toc@l(31)
+	addi 1, 1, -288
+	mflr 31
+	std 31, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC11
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 31, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 31, 0(31)
+# WAS addis 5,2,.LC1@toc@ha
+	std 30,-16(1)
+# WAS addis 30,2,.LANCHOR0@toc@ha
+# WAS addi 5,5,.LC1@toc@l
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC1
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addi 30,30,.LANCHOR0@toc@l
+	addi 1, 1, -288
+	mflr 30
+	std 30, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LANCHOR0
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 30, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	li 4,1
+	mr 6,30
+	std 0,16(1)
+	stdu 1,-112(1)
+	ld 3,0(31)
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+# WAS addis 6,2,.LC12@toc@ha		# gpr load fusion, type long
+# WAS ld 6,.LC12@toc@l(6)
+	addi 1, 1, -288
+	mflr 6
+	std 6, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC12
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 6, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 6, 0(6)
+	ld 3,0(31)
+# WAS addis 5,2,.LC2@toc@ha
+	li 4,1
+# WAS addi 5,5,.LC2@toc@l
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC2
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	ld 3,0(31)
+# WAS addis 5,2,.LC4@toc@ha
+# WAS addis 6,2,function@toc@ha
+# WAS addi 5,5,.LC4@toc@l
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC4
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS addi 6,6,function@toc@l
+	addi 1, 1, -288
+	mflr 6
+	std 6, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_Lfunction_local_target
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 6, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	li 4,1
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+# WAS addis 6,2,.LC13@toc@ha		# gpr load fusion, type long
+# WAS ld 6,.LC13@toc@l(6)
+	addi 1, 1, -288
+	mflr 6
+	std 6, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC13
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 6, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 6, 0(6)
+	ld 3,0(31)
+# WAS addis 5,2,.LC5@toc@ha
+	li 4,1
+# WAS addi 5,5,.LC5@toc@l
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC5
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	ld 3,0(31)
+# WAS addis 5,2,.LC7@toc@ha
+	addi 6,30,5
+# WAS addi 5,5,.LC7@toc@l
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC7
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	li 4,1
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+	ld 3,0(31)
+	addis 6,30,0x5
+# WAS addis 5,2,.LC8@toc@ha
+	li 4,1
+# WAS addi 5,5,.LC8@toc@l
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_LC8
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	addi 6,6,-29404
+# WAS bl __fprintf_chk
+	bl	bcm_redirector___fprintf_chk
+	ld 2, 24(1)
+	nop
+# WAS bl exported_function
+	bl	.Lexported_function_local_entry
+	nop
+	addi 1,1,112
+	ld 0,16(1)
+	ld 30,-16(1)
+	ld 31,-8(1)
+	mtlr 0
+	blr
+	.long 0
+	.byte 0,0,0,1,128,2,0,0
+	.size	function,.-function
+	.globl kExportedString
+# WAS .section	.rodata
+.text
+	.align 4
+	.set	.LANCHOR0,. + 0
+	.type	kString, @object
+	.size	kString, 12
+.LkString_local_target:
+kString:
+	.string	"hello world"
+	.zero	4
+	.type	kGiantArray, @object
+	.size	kGiantArray, 400000
+.LkGiantArray_local_target:
+kGiantArray:
+	.long	1
+	.long	0
+	.zero	399992
+	.type	kExportedString, @object
+	.size	kExportedString, 26
+.LkExportedString_local_target:
+kExportedString:
+	.string	"hello world, more visibly"
+# WAS .section	.rodata.str1.8,"aMS",@progbits,1
+.text
+	.align 3
+.LC1:
+
+	.string	"kString is %p\n"
+	.zero	1
+.LC2:
+
+	.string	"kExportedString is %p\n"
+	.zero	1
+.LC4:
+
+	.string	"function is %p\n"
+.LC5:
+
+	.string	"exported_function is %p\n"
+	.zero	7
+.LC7:
+
+	.string	"&kString[5] is %p\n"
+	.zero	5
+.LC8:
+
+	.string	"&kGiantArray[0x12345] is %p\n"
+	.section	".bss"
+	.align 2
+	.type	bss, @object
+	.size	bss, 20
+bss:
+.Lbss_local_target:
+
+	.zero	20
+	.ident	"GCC: (Ubuntu 4.9.2-10ubuntu13) 4.9.2"
+	.section	.note.GNU-stack,"",@progbits
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.section ".toc", "aw"
+.Lredirector_toc___fprintf_chk:
+.quad __fprintf_chk
+.text
+.type bcm_redirector___fprintf_chk, @function
+bcm_redirector___fprintf_chk:
+	std 2, 24(1)
+	addis 12, 2, .Lredirector_toc___fprintf_chk@toc@ha
+	ld 12, .Lredirector_toc___fprintf_chk@toc@l(12)
+	mtctr 12
+	bctr
+.type bss_bss_get, @function
+bss_bss_get:
+	addis 3, 2, .Lbss_local_target@toc@ha
+	addi 3, 3, .Lbss_local_target@toc@l
+	blr
+.type bcm_loadtoc__dot_LANCHOR0, @function
+bcm_loadtoc__dot_LANCHOR0:
+.Lbcm_loadtoc__dot_LANCHOR0:
+	addis 3, 2, .LANCHOR0@toc@ha
+	addi 3, 3, .LANCHOR0@toc@l
+	blr
+.type bcm_loadtoc__dot_LC0, @function
+bcm_loadtoc__dot_LC0:
+.Lbcm_loadtoc__dot_LC0:
+	addis 3, 2, .LC0@toc@ha
+	addi 3, 3, .LC0@toc@l
+	blr
+.type bcm_loadtoc__dot_LC1, @function
+bcm_loadtoc__dot_LC1:
+.Lbcm_loadtoc__dot_LC1:
+	addis 3, 2, .LC1@toc@ha
+	addi 3, 3, .LC1@toc@l
+	blr
+.type bcm_loadtoc__dot_LC11, @function
+bcm_loadtoc__dot_LC11:
+.Lbcm_loadtoc__dot_LC11:
+	addis 3, 2, .LC11@toc@ha
+	addi 3, 3, .LC11@toc@l
+	blr
+.type bcm_loadtoc__dot_LC12, @function
+bcm_loadtoc__dot_LC12:
+.Lbcm_loadtoc__dot_LC12:
+	addis 3, 2, .LC12@toc@ha
+	addi 3, 3, .LC12@toc@l
+	blr
+.type bcm_loadtoc__dot_LC13, @function
+bcm_loadtoc__dot_LC13:
+.Lbcm_loadtoc__dot_LC13:
+	addis 3, 2, .LC13@toc@ha
+	addi 3, 3, .LC13@toc@l
+	blr
+.type bcm_loadtoc__dot_LC2, @function
+bcm_loadtoc__dot_LC2:
+.Lbcm_loadtoc__dot_LC2:
+	addis 3, 2, .LC2@toc@ha
+	addi 3, 3, .LC2@toc@l
+	blr
+.type bcm_loadtoc__dot_LC3, @function
+bcm_loadtoc__dot_LC3:
+.Lbcm_loadtoc__dot_LC3:
+	addis 3, 2, .LC3@toc@ha
+	addi 3, 3, .LC3@toc@l
+	blr
+.type bcm_loadtoc__dot_LC4, @function
+bcm_loadtoc__dot_LC4:
+.Lbcm_loadtoc__dot_LC4:
+	addis 3, 2, .LC4@toc@ha
+	addi 3, 3, .LC4@toc@l
+	blr
+.type bcm_loadtoc__dot_LC5, @function
+bcm_loadtoc__dot_LC5:
+.Lbcm_loadtoc__dot_LC5:
+	addis 3, 2, .LC5@toc@ha
+	addi 3, 3, .LC5@toc@l
+	blr
+.type bcm_loadtoc__dot_LC6, @function
+bcm_loadtoc__dot_LC6:
+.Lbcm_loadtoc__dot_LC6:
+	addis 3, 2, .LC6@toc@ha
+	addi 3, 3, .LC6@toc@l
+	blr
+.type bcm_loadtoc__dot_LC7, @function
+bcm_loadtoc__dot_LC7:
+.Lbcm_loadtoc__dot_LC7:
+	addis 3, 2, .LC7@toc@ha
+	addi 3, 3, .LC7@toc@l
+	blr
+.type bcm_loadtoc__dot_LC8, @function
+bcm_loadtoc__dot_LC8:
+.Lbcm_loadtoc__dot_LC8:
+	addis 3, 2, .LC8@toc@ha
+	addi 3, 3, .LC8@toc@l
+	blr
+.type bcm_loadtoc__dot_Lfunction_local_target, @function
+bcm_loadtoc__dot_Lfunction_local_target:
+.Lbcm_loadtoc__dot_Lfunction_local_target:
+	addis 3, 2, .Lfunction_local_target@toc@ha
+	addi 3, 3, .Lfunction_local_target@toc@l
+	blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/in.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/in.s
@@ -0,0 +1,23 @@
+	.text
+foo:
+	# TOC references may have offsets.
+	addis 3, 2, 5+foo@toc@ha
+	addi 3, 3, 10+foo@toc@l
+
+	addis 3, 2, 15+foo@toc@ha
+	addi 3, 3, 20+foo@toc@l
+
+	addis 4, 2, foo@toc@ha
+	addi 4, 4, foo@toc@l
+
+	addis 5, 2, 5+foo@toc@ha
+	ld 5, 10+foo@toc@l(5)
+
+	addis 4, 2, foo-10@toc@ha
+	addi 4, 4, foo-10@toc@l
+
+	addis 4, 2, foo@toc@ha+25
+	addi 4, 4, foo@toc@l+25
+
+	addis 4, 2, 1+foo-2@toc@ha+3
+	addi 4, 4, 1+foo-2@toc@l+3
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/out.s
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/fipstools/delocate/testdata/ppc64le-TOCWithOffset/out.s
@@ -0,0 +1,178 @@
+.text
+.file 1 "inserted_by_delocate.c"
+.loc 1 1 0
+BORINGSSL_bcm_text_start:
+	.text
+.Lfoo_local_target:
+foo:
+	# TOC references may have offsets.
+# WAS addis 3, 2, 5+foo@toc@ha
+# WAS addi 3, 3, 10+foo@toc@l
+	addi 1, 1, -288
+	mflr 3
+	std 3, -8(1)
+	bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_10
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 3, -24(1)
+	addi 1, 1, 288
+
+# WAS addis 3, 2, 15+foo@toc@ha
+# WAS addi 3, 3, 20+foo@toc@l
+	addi 1, 1, -288
+	mflr 3
+	std 3, -8(1)
+	bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_20
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 3, -24(1)
+	addi 1, 1, 288
+
+# WAS addis 4, 2, foo@toc@ha
+# WAS addi 4, 4, foo@toc@l
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_Lfoo_local_target
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+
+# WAS addis 5, 2, 5+foo@toc@ha
+# WAS ld 5, 10+foo@toc@l(5)
+	addi 1, 1, -288
+	mflr 5
+	std 5, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_10
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 5, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	ld 5, 0(5)
+
+# WAS addis 4, 2, foo-10@toc@ha
+# WAS addi 4, 4, foo-10@toc@l
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_Lfoo_local_target__minus_10
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+
+# WAS addis 4, 2, foo@toc@ha+25
+# WAS addi 4, 4, foo@toc@l+25
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_25
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+
+# WAS addis 4, 2, 1+foo-2@toc@ha+3
+# WAS addi 4, 4, 1+foo-2@toc@l+3
+	addi 1, 1, -288
+	mflr 4
+	std 4, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 4, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+.text
+.loc 1 2 0
+BORINGSSL_bcm_text_end:
+.type bcm_loadtoc__dot_Lfoo_local_target, @function
+bcm_loadtoc__dot_Lfoo_local_target:
+.Lbcm_loadtoc__dot_Lfoo_local_target:
+	addis 3, 2, .Lfoo_local_target@toc@ha
+	addi 3, 3, .Lfoo_local_target@toc@l
+	blr
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3, @function
+bcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3:
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_1_minus_2_plus_3:
+	addis 3, 2, .Lfoo_local_target+1-2+3@toc@ha
+	addi 3, 3, .Lfoo_local_target+1-2+3@toc@l
+	blr
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_10, @function
+bcm_loadtoc__dot_Lfoo_local_target__plus_10:
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_10:
+	addis 3, 2, .Lfoo_local_target+10@toc@ha
+	addi 3, 3, .Lfoo_local_target+10@toc@l
+	blr
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_20, @function
+bcm_loadtoc__dot_Lfoo_local_target__plus_20:
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_20:
+	addis 3, 2, .Lfoo_local_target+20@toc@ha
+	addi 3, 3, .Lfoo_local_target+20@toc@l
+	blr
+.type bcm_loadtoc__dot_Lfoo_local_target__plus_25, @function
+bcm_loadtoc__dot_Lfoo_local_target__plus_25:
+.Lbcm_loadtoc__dot_Lfoo_local_target__plus_25:
+	addis 3, 2, .Lfoo_local_target+25@toc@ha
+	addi 3, 3, .Lfoo_local_target+25@toc@l
+	blr
+.type bcm_loadtoc__dot_Lfoo_local_target__minus_10, @function
+bcm_loadtoc__dot_Lfoo_local_target__minus_10:
+.Lbcm_loadtoc__dot_Lfoo_local_target__minus_10:
+	addis 3, 2, .Lfoo_local_target-10@toc@ha
+	addi 3, 3, .Lfoo_local_target-10@toc@l
+	blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 32
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
Index: chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c
+++ chromium-125.0.6422.41/third_party/boringssl/src/crypto/fipsmodule/sha/sha1.c
@@ -416,6 +416,10 @@ static void sha1_block_data_order(uint32
     return;
   }
 #endif
+#if defined(SHA1_ASM_PPC64)
+  sha1_block_data_order_ppc64(state, data, num);
+  return;
+#endif
   sha1_block_data_order_nohw(state, data, num);
 }
 
Index: chromium-125.0.6422.41/third_party/boringssl/src/build.json
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/build.json
+++ chromium-125.0.6422.41/third_party/boringssl/src/build.json
@@ -118,6 +118,10 @@
             {"src": "crypto/fipsmodule/sha/asm/sha512-armv4.pl"},
             {"src": "crypto/fipsmodule/aes/asm/vpaes-armv7.pl"}
         ],
+        "perlasm_ppc64le": [
+            {"src": "crypto/fipsmodule/aes/asm/aesp8-ppc.pl"},
+            {"src": "crypto/fipsmodule/modes/asm/ghashp8-ppc.pl"}
+        ],
         "perlasm_x86": [
             {"src": "crypto/fipsmodule/aes/asm/aesni-x86.pl"},
             {"src": "crypto/fipsmodule/bn/asm/bn-586.pl"},
@@ -221,6 +225,7 @@
             "crypto/cpu_arm_freebsd.c",
             "crypto/cpu_arm_linux.c",
             "crypto/cpu_intel.c",
+            "crypto/cpu_ppc64le.c",
             "crypto/crypto.c",
             "crypto/curve25519/curve25519.c",
             "crypto/curve25519/curve25519_64_adx.c",
@@ -765,6 +770,9 @@
         "perlasm_arm": [
             {"src": "crypto/test/asm/trampoline-armv4.pl"}
         ],
+        "perlasm_ppc64le": [
+            {"src": "crypto/test/asm/trampoline-ppc.pl"}
+        ],
         "perlasm_x86": [
             {"src": "crypto/test/asm/trampoline-x86.pl"}
         ],
Index: chromium-125.0.6422.41/third_party/boringssl/src/util/pregenerate/build.go
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/src/util/pregenerate/build.go
+++ chromium-125.0.6422.41/third_party/boringssl/src/util/pregenerate/build.go
@@ -38,6 +38,7 @@ type InputTarget struct {
 	// architecture.
 	PerlasmAarch64 []PerlasmSource `json:"perlasm_aarch64,omitempty"`
 	PerlasmArm     []PerlasmSource `json:"perlasm_arm,omitempty"`
+	PerlasmPPC64LE []PerlasmSource `json:"perlasm_ppc64le,omitempty"`
 	PerlasmX86     []PerlasmSource `json:"perlasm_x86,omitempty"`
 	PerlasmX86_64  []PerlasmSource `json:"perlasm_x86_64,omitempty"`
 }
@@ -116,6 +117,9 @@ func (in *InputTarget) Pregenerate(name
 	for _, p := range in.PerlasmArm {
 		addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"linux32"})
 	}
+	for _, p := range in.PerlasmPPC64LE {
+		addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"linux64le"})
+	}
 	for _, p := range in.PerlasmX86 {
 		addPerlasmTask(&out.Asm, &p, "-apple.S", []string{"macosx", "-fPIC", "-DOPENSSL_IA32_SSE2"})
 		addPerlasmTask(&out.Asm, &p, "-linux.S", []string{"elf", "-fPIC", "-DOPENSSL_IA32_SSE2"})
Index: chromium-125.0.6422.41/third_party/boringssl/BUILD.generated.gni
===================================================================
--- chromium-125.0.6422.41.orig/third_party/boringssl/BUILD.generated.gni
+++ chromium-125.0.6422.41/third_party/boringssl/BUILD.generated.gni
@@ -93,6 +93,7 @@ crypto_sources = [
   "src/crypto/cpu_arm_linux.c",
   "src/crypto/cpu_arm_linux.h",
   "src/crypto/cpu_intel.c",
+  "src/crypto/cpu_ppc64le.c",
   "src/crypto/crypto.c",
   "src/crypto/curve25519/curve25519.c",
   "src/crypto/curve25519/curve25519_64_adx.c",
@@ -334,6 +335,7 @@ crypto_sources_asm = [
   "src/gen/bcm/aesv8-gcm-armv8-apple.S",
   "src/gen/bcm/aesv8-gcm-armv8-linux.S",
   "src/gen/bcm/aesv8-gcm-armv8-win.S",
+  "src/gen/bcm/aesp8-ppc-linux.S",
   "src/gen/bcm/armv4-mont-linux.S",
   "src/gen/bcm/armv8-mont-apple.S",
   "src/gen/bcm/armv8-mont-linux.S",
@@ -350,6 +352,7 @@ crypto_sources_asm = [
   "src/gen/bcm/ghash-neon-armv8-apple.S",
   "src/gen/bcm/ghash-neon-armv8-linux.S",
   "src/gen/bcm/ghash-neon-armv8-win.S",
+  "src/gen/bcm/ghashp8-ppc-linux.S",
   "src/gen/bcm/ghash-ssse3-x86-apple.S",
   "src/gen/bcm/ghash-ssse3-x86-linux.S",
   "src/gen/bcm/ghash-ssse3-x86_64-apple.S",
Index: chromium-125.0.6422.41/third_party/boringssl/README.ppc64le
===================================================================
--- /dev/null
+++ chromium-125.0.6422.41/third_party/boringssl/README.ppc64le
@@ -0,0 +1,8 @@
+==============================================================
+To recreate boringssl pregenerated files patch for ppc64le:
+
+cd third_party/boringssl/src
+cp -Rp gen gen.orig
+go run ./util/pregenerate
+cd ../../../../
+diff -urN chromium-*/third_party/boringssl/src/gen.orig chromium-*/third_party/boringssl/src/gen