95 lines
4.5 KiB
95 lines
4.5 KiB
2 years ago
|
From df728d2e2fe8284316ed083c5a210bca1e50986e Mon Sep 17 00:00:00 2001
|
||
|
From: Jose Dapena Paz <jdapena@igalia.com>
|
||
|
Date: Mon, 16 Jan 2023 12:53:21 +0100
|
||
|
Subject: [PATCH] GCC: fix distributed point functions build in ARM64 with SVE
|
||
|
|
||
|
SVE build is broken in ARM64, because distributed_point_functions
|
||
|
is trying to use operator& and operator|.
|
||
|
|
||
|
Bug:819294
|
||
|
Change-Id: I465efa475ed59031a2cfaf4172ef9fbd8bb05050
|
||
|
---
|
||
|
dpf/internal/evaluate_prg_hwy.cc | 34 ++++++++++++++------------------
|
||
|
1 file changed, 15 insertions(+), 19 deletions(-)
|
||
|
|
||
|
diff --git a/dpf/internal/evaluate_prg_hwy.cc b/dpf/internal/evaluate_prg_hwy.cc
|
||
|
index 3d743f4..a1dd8e0 100644
|
||
|
--- a//third_party/distributed_point_functions/code/dpf/internal/evaluate_prg_hwy.cc
|
||
|
+++ b//third_party/distributed_point_functions/code/dpf/internal/evaluate_prg_hwy.cc
|
||
|
@@ -124,14 +124,14 @@ auto IsBitSet(D d, const V input, int index) {
|
||
|
|
||
|
// Compute input AND index_64 on 64-bit integers.
|
||
|
auto input_64 = hn::BitCast(d64, input);
|
||
|
- input_64 &= index_64;
|
||
|
+ input_64 = hn::And(input_64, index_64);
|
||
|
|
||
|
// Take the OR of every two adjacent 64-bit integers. This ensures that each
|
||
|
// half of an 128-bit block is nonzero iff at least one half was nonzero.
|
||
|
- input_64 |= hn::Shuffle01(input_64);
|
||
|
+ input_64 = hn::Or(input_64, hn::Shuffle01(input_64));
|
||
|
|
||
|
// Compute a 64-bit mask that checks which integers are nonzero.
|
||
|
- return input_64 != hn::Zero(d64);
|
||
|
+ return hn::Ne(input_64, hn::Zero(d64));
|
||
|
}
|
||
|
|
||
|
// Dummy struct to get HWY_ALIGN as a number, for testing if an array of
|
||
|
@@ -236,24 +236,20 @@ absl::Status EvaluateSeedsHwy(
|
||
|
// Apply correction.
|
||
|
const auto correction_seed = hn::LoadDup128(
|
||
|
d64, reinterpret_cast<const uint64_t*>(correction_seeds + j));
|
||
|
- vec_0 ^=
|
||
|
- hn::BitCast(d8, hn::IfThenElseZero(control_mask_0, correction_seed));
|
||
|
- vec_1 ^=
|
||
|
- hn::BitCast(d8, hn::IfThenElseZero(control_mask_1, correction_seed));
|
||
|
- vec_2 ^=
|
||
|
- hn::BitCast(d8, hn::IfThenElseZero(control_mask_2, correction_seed));
|
||
|
- vec_3 ^=
|
||
|
- hn::BitCast(d8, hn::IfThenElseZero(control_mask_3, correction_seed));
|
||
|
+ vec_0 = hn::Xor(vec_0, hn::BitCast(d8, hn::IfThenElseZero(control_mask_0, correction_seed)));
|
||
|
+ vec_1 = hn::Xor(vec_1, hn::BitCast(d8, hn::IfThenElseZero(control_mask_1, correction_seed)));
|
||
|
+ vec_2 = hn::Xor(vec_2, hn::BitCast(d8, hn::IfThenElseZero(control_mask_2, correction_seed)));
|
||
|
+ vec_3 = hn::Xor(vec_3, hn::BitCast(d8, hn::IfThenElseZero(control_mask_3, correction_seed)));
|
||
|
|
||
|
// Extract control bit for next level.
|
||
|
const auto next_control_mask_0 = IsBitSet(d8, vec_0, 0);
|
||
|
const auto next_control_mask_1 = IsBitSet(d8, vec_1, 0);
|
||
|
const auto next_control_mask_2 = IsBitSet(d8, vec_2, 0);
|
||
|
const auto next_control_mask_3 = IsBitSet(d8, vec_3, 0);
|
||
|
- vec_0 &= clear_lowest_bit;
|
||
|
- vec_1 &= clear_lowest_bit;
|
||
|
- vec_2 &= clear_lowest_bit;
|
||
|
- vec_3 &= clear_lowest_bit;
|
||
|
+ vec_0 = hn::And(vec_0, clear_lowest_bit);
|
||
|
+ vec_1 = hn::And(vec_1, clear_lowest_bit);
|
||
|
+ vec_2 = hn::And(vec_2, clear_lowest_bit);
|
||
|
+ vec_3 = hn::And(vec_3, clear_lowest_bit);
|
||
|
|
||
|
// Perform control bit correction.
|
||
|
const auto correction_control_mask_left =
|
||
|
@@ -321,10 +317,10 @@ absl::Status EvaluateSeedsHwy(
|
||
|
// Apply correction.
|
||
|
const auto correction_seed = hn::LoadDup128(
|
||
|
d64, reinterpret_cast<const uint64_t*>(correction_seeds + j));
|
||
|
- vec ^= hn::BitCast(d8, hn::IfThenElseZero(control_mask, correction_seed));
|
||
|
+ vec = hn::Xor(vec, hn::BitCast(d8, hn::IfThenElseZero(control_mask, correction_seed)));
|
||
|
// Extract control bit for next level.
|
||
|
const auto next_control_mask = IsBitSet(d8, vec, 0);
|
||
|
- vec &= clear_lowest_bit;
|
||
|
+ vec = hn::And(vec, clear_lowest_bit);
|
||
|
|
||
|
// Perform control bit correction.
|
||
|
const auto correction_control_mask_left =
|
||
|
@@ -376,9 +372,9 @@ absl::Status EvaluateSeedsHwy(
|
||
|
// Perform seed correction.
|
||
|
const auto correction_seed = hn::LoadDup128(
|
||
|
d64, reinterpret_cast<const uint64_t*>(correction_seeds + j));
|
||
|
- vec ^= hn::BitCast(d8, hn::IfThenElseZero(control_mask, correction_seed));
|
||
|
+ vec = hn::Xor(vec, hn::BitCast(d8, hn::IfThenElseZero(control_mask, correction_seed)));
|
||
|
const auto next_control_mask = IsBitSet(d8, vec, 0);
|
||
|
- vec &= clear_lowest_bit;
|
||
|
+ vec = hn::And(vec, clear_lowest_bit);
|
||
|
const auto correction_control_mask_left =
|
||
|
correction_controls_left[j] ? mask_all_one : mask_all_zero;
|
||
|
const auto correction_control_mask_right =
|