parent
9b54e1f438
commit
b53226344e
@ -0,0 +1,73 @@
|
|||||||
|
From 5eaae2af0defeca148c2a281873bb31a15246876 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ilya Leoshkevich <iii@linux.ibm.com>
|
||||||
|
Date: Thu, 2 Feb 2023 19:34:24 +0100
|
||||||
|
Subject: [PATCH] 2155328
|
||||||
|
|
||||||
|
---
|
||||||
|
contrib/s390/dfltcc.c | 21 +++++++++++++++------
|
||||||
|
1 file changed, 15 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c
|
||||||
|
index 72099e7..f8dc21c 100644
|
||||||
|
--- a/contrib/s390/dfltcc.c
|
||||||
|
+++ b/contrib/s390/dfltcc.c
|
||||||
|
@@ -456,7 +456,10 @@ again:
|
||||||
|
*strm->next_out = (Bytef)state->bi_buf;
|
||||||
|
/* Honor history and check value */
|
||||||
|
param->nt = 0;
|
||||||
|
- param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler;
|
||||||
|
+ if (state->wrap == 1)
|
||||||
|
+ param->cv = strm->adler;
|
||||||
|
+ else if (state->wrap == 2)
|
||||||
|
+ param->cv = ZSWAP32(strm->adler);
|
||||||
|
|
||||||
|
/* When opening a block, choose a Huffman-Table Type */
|
||||||
|
if (!param->bcf) {
|
||||||
|
@@ -488,7 +491,10 @@ again:
|
||||||
|
state->bi_buf = 0; /* Avoid accessing next_out */
|
||||||
|
else
|
||||||
|
state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1);
|
||||||
|
- strm->adler = state->wrap == 2 ? ZSWAP32(param->cv) : param->cv;
|
||||||
|
+ if (state->wrap == 1)
|
||||||
|
+ strm->adler = param->cv;
|
||||||
|
+ else if (state->wrap == 2)
|
||||||
|
+ strm->adler = ZSWAP32(param->cv);
|
||||||
|
|
||||||
|
/* Unmask the input data */
|
||||||
|
strm->avail_in += masked_avail_in;
|
||||||
|
@@ -600,11 +606,12 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Translate stream to parameter block */
|
||||||
|
- param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32;
|
||||||
|
+ param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32;
|
||||||
|
param->sbb = state->bits;
|
||||||
|
if (param->hl)
|
||||||
|
param->nt = 0; /* Honor history for the first block */
|
||||||
|
- param->cv = state->flags ? ZSWAP32(state->check) : state->check;
|
||||||
|
+ if (state->wrap & 4)
|
||||||
|
+ param->cv = state->flags ? ZSWAP32(state->check) : state->check;
|
||||||
|
|
||||||
|
/* Inflate */
|
||||||
|
do {
|
||||||
|
@@ -615,7 +622,9 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret)
|
||||||
|
strm->msg = oesc_msg(dfltcc_state->msg, param->oesc);
|
||||||
|
state->last = cc == DFLTCC_CC_OK;
|
||||||
|
state->bits = param->sbb;
|
||||||
|
- strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv;
|
||||||
|
+ if (state->wrap & 4)
|
||||||
|
+ strm->adler = state->check = state->flags ?
|
||||||
|
+ ZSWAP32(param->cv) : param->cv;
|
||||||
|
if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
|
||||||
|
/* Report an error if stream is corrupted */
|
||||||
|
state->mode = BAD;
|
||||||
|
@@ -1077,4 +1086,4 @@ int ZLIB_INTERNAL dfltcc_inflate_get_dictionary(strm, dictionary, dict_length)
|
||||||
|
if (dict_length)
|
||||||
|
*dict_length = param->hl;
|
||||||
|
return Z_OK;
|
||||||
|
-}
|
||||||
|
\ No newline at end of file
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.39.1
|
||||||
|
|
@ -0,0 +1,54 @@
|
|||||||
|
From 480b65cae6c20a41aa698a6c9d3b260f6f744004 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ilya Leoshkevich <iii@linux.ibm.com>
|
||||||
|
Date: Thu, 2 Feb 2023 19:41:32 +0100
|
||||||
|
Subject: [PATCH] 0003-PATCH-Fix-clang-s-behavior-on-versions-7.patch
|
||||||
|
|
||||||
|
---
|
||||||
|
contrib/power/clang_workaround.h | 15 ++++++++++-----
|
||||||
|
1 file changed, 10 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/contrib/power/clang_workaround.h b/contrib/power/clang_workaround.h
|
||||||
|
index b5e7dae..915f7e5 100644
|
||||||
|
--- a/contrib/power/clang_workaround.h
|
||||||
|
+++ b/contrib/power/clang_workaround.h
|
||||||
|
@@ -39,7 +39,12 @@ __vector unsigned long long __builtin_pack_vector (unsigned long __a,
|
||||||
|
return __v;
|
||||||
|
}
|
||||||
|
|
||||||
|
-#ifndef vec_xxpermdi
|
||||||
|
+/*
|
||||||
|
+ * Clang 7 changed the behavior of vec_xxpermdi in order to provide the same
|
||||||
|
+ * behavior of GCC. That means code adapted to Clang >= 7 does not work on
|
||||||
|
+ * Clang <= 6. So, fallback to __builtin_unpack_vector() on Clang <= 6.
|
||||||
|
+ */
|
||||||
|
+#if !defined vec_xxpermdi || __clang_major__ <= 6
|
||||||
|
|
||||||
|
static inline
|
||||||
|
unsigned long __builtin_unpack_vector (__vector unsigned long long __v,
|
||||||
|
@@ -62,9 +67,9 @@ static inline
|
||||||
|
unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v)
|
||||||
|
{
|
||||||
|
#if defined(__BIG_ENDIAN__)
|
||||||
|
- return vec_xxpermdi(__v, __v, 0x0)[1];
|
||||||
|
- #else
|
||||||
|
return vec_xxpermdi(__v, __v, 0x0)[0];
|
||||||
|
+ #else
|
||||||
|
+ return vec_xxpermdi(__v, __v, 0x3)[0];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -72,9 +77,9 @@ static inline
|
||||||
|
unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v)
|
||||||
|
{
|
||||||
|
#if defined(__BIG_ENDIAN__)
|
||||||
|
- return vec_xxpermdi(__v, __v, 0x3)[1];
|
||||||
|
- #else
|
||||||
|
return vec_xxpermdi(__v, __v, 0x3)[0];
|
||||||
|
+ #else
|
||||||
|
+ return vec_xxpermdi(__v, __v, 0x0)[0];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif /* vec_xxpermdi */
|
||||||
|
--
|
||||||
|
2.39.1
|
||||||
|
|
@ -0,0 +1,17 @@
|
|||||||
|
--- zlib-1.2.11/contrib/s390/dfltcc_deflate.h.old 2023-05-04 09:39:23.423753908 +0000
|
||||||
|
+++ zlib-1.2.11/contrib/s390/dfltcc_deflate.h 2023-05-04 09:39:48.087753908 +0000
|
||||||
|
@@ -45,11 +45,11 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dic
|
||||||
|
#define DEFLATE_DONE dfltcc_deflate_done
|
||||||
|
#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \
|
||||||
|
do { \
|
||||||
|
- if (dfltcc_can_deflate((strm))) \
|
||||||
|
+ if (deflateStateCheck((strm)) || dfltcc_can_deflate((strm))) \
|
||||||
|
(complen) = DEFLATE_BOUND_COMPLEN(source_len); \
|
||||||
|
} while (0)
|
||||||
|
#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm)))
|
||||||
|
#define DEFLATE_HOOK dfltcc_deflate
|
||||||
|
#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm)))
|
||||||
|
|
||||||
|
-#endif
|
||||||
|
\ No newline at end of file
|
||||||
|
+#endif
|
@ -0,0 +1,299 @@
|
|||||||
|
--- a/contrib/s390/dfltcc.c
|
||||||
|
+++ b/contrib/s390/dfltcc.c
|
||||||
|
@@ -539,10 +539,6 @@ int ZLIB_INTERNAL dfltcc_can_inflate(strm)
|
||||||
|
struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
|
||||||
|
struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||||
|
|
||||||
|
- /* Unsupported compression settings */
|
||||||
|
- if (state->wbits != HB_BITS)
|
||||||
|
- return 0;
|
||||||
|
-
|
||||||
|
/* Unsupported hardware */
|
||||||
|
return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) &&
|
||||||
|
is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
|
||||||
|
@@ -606,8 +602,6 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret)
|
||||||
|
/* Translate stream to parameter block */
|
||||||
|
param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32;
|
||||||
|
param->sbb = state->bits;
|
||||||
|
- param->hl = state->whave; /* Software and hardware history formats match */
|
||||||
|
- param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1);
|
||||||
|
if (param->hl)
|
||||||
|
param->nt = 0; /* Honor history for the first block */
|
||||||
|
param->cv = state->flags ? ZSWAP32(state->check) : state->check;
|
||||||
|
@@ -621,8 +615,6 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret)
|
||||||
|
strm->msg = oesc_msg(dfltcc_state->msg, param->oesc);
|
||||||
|
state->last = cc == DFLTCC_CC_OK;
|
||||||
|
state->bits = param->sbb;
|
||||||
|
- state->whave = param->hl;
|
||||||
|
- state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1);
|
||||||
|
strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv;
|
||||||
|
if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
|
||||||
|
/* Report an error if stream is corrupted */
|
||||||
|
@@ -644,11 +636,52 @@ int ZLIB_INTERNAL dfltcc_was_inflate_used(strm)
|
||||||
|
return !param->nt;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ Rotates a circular buffer.
|
||||||
|
+ The implementation is based on https://cplusplus.com/reference/algorithm/rotate/
|
||||||
|
+ */
|
||||||
|
+local void rotate OF((Bytef *start, Bytef *pivot, Bytef *end));
|
||||||
|
+local void rotate(start, pivot, end)
|
||||||
|
+ Bytef *start;
|
||||||
|
+ Bytef *pivot;
|
||||||
|
+ Bytef *end;
|
||||||
|
+{
|
||||||
|
+ Bytef *p = pivot;
|
||||||
|
+ Bytef tmp;
|
||||||
|
+
|
||||||
|
+ while (p != start) {
|
||||||
|
+ tmp = *start;
|
||||||
|
+ *start = *p;
|
||||||
|
+ *p = tmp;
|
||||||
|
+
|
||||||
|
+ start++;
|
||||||
|
+ p++;
|
||||||
|
+
|
||||||
|
+ if (p == end)
|
||||||
|
+ p = pivot;
|
||||||
|
+ else if (start == pivot)
|
||||||
|
+ pivot = p;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#define MIN(x, y) ({ \
|
||||||
|
+ typeof(x) _x = (x); \
|
||||||
|
+ typeof(y) _y = (y); \
|
||||||
|
+ _x < _y ? _x : _y; \
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+#define MAX(x, y) ({ \
|
||||||
|
+ typeof(x) _x = (x); \
|
||||||
|
+ typeof(y) _y = (y); \
|
||||||
|
+ _x > _y ? _x : _y; \
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
int ZLIB_INTERNAL dfltcc_inflate_disable(strm)
|
||||||
|
z_streamp strm;
|
||||||
|
{
|
||||||
|
struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state;
|
||||||
|
struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||||
|
+ struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||||
|
|
||||||
|
if (!dfltcc_can_inflate(strm))
|
||||||
|
return 0;
|
||||||
|
@@ -660,6 +693,9 @@ int ZLIB_INTERNAL dfltcc_inflate_disable(strm)
|
||||||
|
return 1;
|
||||||
|
/* DFLTCC was not used yet - decompress in software */
|
||||||
|
memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af));
|
||||||
|
+ /* Convert the window from the hardware to the software format */
|
||||||
|
+ rotate(state->window, state->window + param->ho, state->window + HB_SIZE);
|
||||||
|
+ state->whave = state->wnext = MIN(param->hl, state->wsize);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -830,9 +866,9 @@ voidpf ZLIB_INTERNAL dfltcc_alloc_window(strm, items, size)
|
||||||
|
voidpf p, w;
|
||||||
|
|
||||||
|
/* To simplify freeing, we store the pointer to the allocated buffer right
|
||||||
|
- * before the window.
|
||||||
|
+ * before the window. Note that DFLTCC always uses HB_SIZE bytes.
|
||||||
|
*/
|
||||||
|
- p = ZALLOC(strm, sizeof(voidpf) + items * size + PAGE_ALIGN,
|
||||||
|
+ p = ZALLOC(strm, sizeof(voidpf) + MAX(items * size, HB_SIZE) + PAGE_ALIGN,
|
||||||
|
sizeof(unsigned char));
|
||||||
|
if (p == NULL)
|
||||||
|
return NULL;
|
||||||
|
@@ -841,6 +877,14 @@ voidpf ZLIB_INTERNAL dfltcc_alloc_window(strm, items, size)
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
|
||||||
|
+void ZLIB_INTERNAL dfltcc_copy_window(dest, src, n)
|
||||||
|
+ void *dest;
|
||||||
|
+ const void *src;
|
||||||
|
+ size_t n;
|
||||||
|
+{
|
||||||
|
+ memcpy(dest, src, MAX(n, HB_SIZE));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void ZLIB_INTERNAL dfltcc_free_window(strm, w)
|
||||||
|
z_streamp strm;
|
||||||
|
voidpf w;
|
||||||
|
@@ -951,6 +995,24 @@ local void append_history(param, history, buf, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+local void get_history OF((struct dfltcc_param_v0 FAR *param,
|
||||||
|
+ const Bytef *history,
|
||||||
|
+ Bytef *buf));
|
||||||
|
+local void get_history(param, history, buf)
|
||||||
|
+ struct dfltcc_param_v0 FAR *param;
|
||||||
|
+ const Bytef *history;
|
||||||
|
+ Bytef *buf;
|
||||||
|
+{
|
||||||
|
+ if (param->ho + param->hl <= HB_SIZE)
|
||||||
|
+ /* Circular history buffer does not wrap - copy one chunk */
|
||||||
|
+ memcpy(buf, history + param->ho, param->hl);
|
||||||
|
+ else {
|
||||||
|
+ /* Circular history buffer wraps - copy two chunks */
|
||||||
|
+ memcpy(buf, history + param->ho, HB_SIZE - param->ho);
|
||||||
|
+ memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(strm, dictionary, dict_length)
|
||||||
|
z_streamp strm;
|
||||||
|
const Bytef *dictionary;
|
||||||
|
@@ -975,20 +1037,43 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(strm, dictionary, dict_length)
|
||||||
|
struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||||
|
struct dfltcc_param_v0 FAR *param = &dfltcc_state->param;
|
||||||
|
|
||||||
|
- if (dictionary) {
|
||||||
|
- if (param->ho + param->hl <= HB_SIZE)
|
||||||
|
- /* Circular history buffer does not wrap - copy one chunk */
|
||||||
|
- zmemcpy(dictionary, state->window + param->ho, param->hl);
|
||||||
|
- else {
|
||||||
|
- /* Circular history buffer wraps - copy two chunks */
|
||||||
|
- zmemcpy(dictionary,
|
||||||
|
- state->window + param->ho,
|
||||||
|
- HB_SIZE - param->ho);
|
||||||
|
- zmemcpy(dictionary + HB_SIZE - param->ho,
|
||||||
|
- state->window,
|
||||||
|
- param->ho + param->hl - HB_SIZE);
|
||||||
|
- }
|
||||||
|
+ if (dictionary)
|
||||||
|
+ get_history(param, state->window, dictionary);
|
||||||
|
+ if (dict_length)
|
||||||
|
+ *dict_length = param->hl;
|
||||||
|
+ return Z_OK;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int ZLIB_INTERNAL dfltcc_inflate_set_dictionary(strm, dictionary, dict_length)
|
||||||
|
+ z_streamp strm;
|
||||||
|
+ const Bytef *dictionary;
|
||||||
|
+ uInt dict_length;
|
||||||
|
+{
|
||||||
|
+ struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||||
|
+ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||||
|
+ struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||||
|
+
|
||||||
|
+ if (inflate_ensure_window(state)) {
|
||||||
|
+ state->mode = MEM;
|
||||||
|
+ return Z_MEM_ERROR;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ append_history(param, state->window, dictionary, dict_length);
|
||||||
|
+ state->havedict = 1;
|
||||||
|
+ return Z_OK;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int ZLIB_INTERNAL dfltcc_inflate_get_dictionary(strm, dictionary, dict_length)
|
||||||
|
+ z_streamp strm;
|
||||||
|
+ Bytef *dictionary;
|
||||||
|
+ uInt *dict_length;
|
||||||
|
+{
|
||||||
|
+ struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||||
|
+ struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||||
|
+ struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||||
|
+
|
||||||
|
+ if (dictionary && state->window)
|
||||||
|
+ get_history(param, state->window, dictionary);
|
||||||
|
if (dict_length)
|
||||||
|
*dict_length = param->hl;
|
||||||
|
return Z_OK;
|
||||||
|
--- a/contrib/s390/dfltcc.h
|
||||||
|
+++ b/contrib/s390/dfltcc.h
|
||||||
|
@@ -11,6 +11,8 @@ void ZLIB_INTERNAL dfltcc_copy_state OF((voidpf dst, const voidpf src,
|
||||||
|
void ZLIB_INTERNAL dfltcc_reset OF((z_streamp strm, uInt size));
|
||||||
|
voidpf ZLIB_INTERNAL dfltcc_alloc_window OF((z_streamp strm, uInt items,
|
||||||
|
uInt size));
|
||||||
|
+void ZLIB_INTERNAL dfltcc_copy_window OF((void *dest, const void *src,
|
||||||
|
+ size_t n));
|
||||||
|
void ZLIB_INTERNAL dfltcc_free_window OF((z_streamp strm, voidpf w));
|
||||||
|
#define DFLTCC_BLOCK_HEADER_BITS 3
|
||||||
|
#define DFLTCC_HLITS_COUNT_BITS 5
|
||||||
|
@@ -44,11 +46,18 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate OF((z_streamp strm,
|
||||||
|
int flush, int *ret));
|
||||||
|
int ZLIB_INTERNAL dfltcc_was_inflate_used OF((z_streamp strm));
|
||||||
|
int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm));
|
||||||
|
+int ZLIB_INTERNAL dfltcc_inflate_set_dictionary OF((z_streamp strm,
|
||||||
|
+ const Bytef *dictionary,
|
||||||
|
+ uInt dict_length));
|
||||||
|
+int ZLIB_INTERNAL dfltcc_inflate_get_dictionary OF((z_streamp strm,
|
||||||
|
+ Bytef *dictionary,
|
||||||
|
+ uInt* dict_length));
|
||||||
|
|
||||||
|
#define ZALLOC_STATE dfltcc_alloc_state
|
||||||
|
#define ZFREE_STATE ZFREE
|
||||||
|
#define ZCOPY_STATE dfltcc_copy_state
|
||||||
|
#define ZALLOC_WINDOW dfltcc_alloc_window
|
||||||
|
+#define ZCOPY_WINDOW dfltcc_copy_window
|
||||||
|
#define ZFREE_WINDOW dfltcc_free_window
|
||||||
|
#define TRY_FREE_WINDOW dfltcc_free_window
|
||||||
|
#define INFLATE_RESET_KEEP_HOOK(strm) \
|
||||||
|
@@ -77,5 +86,15 @@ int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm));
|
||||||
|
do { \
|
||||||
|
if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \
|
||||||
|
} while (0)
|
||||||
|
+#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
||||||
|
+ do { \
|
||||||
|
+ if (dfltcc_can_inflate(strm)) \
|
||||||
|
+ return dfltcc_inflate_set_dictionary(strm, dict, dict_len); \
|
||||||
|
+ } while (0)
|
||||||
|
+#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
||||||
|
+ do { \
|
||||||
|
+ if (dfltcc_can_inflate(strm)) \
|
||||||
|
+ return dfltcc_inflate_get_dictionary(strm, dict, dict_len); \
|
||||||
|
+ } while (0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/inflate.c b/inflate.c
|
||||||
|
index 3750152..a0e2169 100644
|
||||||
|
--- a/inflate.c
|
||||||
|
+++ b/inflate.c
|
||||||
|
@@ -93,6 +93,7 @@
|
||||||
|
#define ZFREE_STATE ZFREE
|
||||||
|
#define ZCOPY_STATE zmemcpy
|
||||||
|
#define ZALLOC_WINDOW ZALLOC
|
||||||
|
+#define ZCOPY_WINDOW zmemcpy
|
||||||
|
#define ZFREE_WINDOW ZFREE
|
||||||
|
#define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0)
|
||||||
|
#define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0)
|
||||||
|
@@ -101,6 +102,8 @@
|
||||||
|
#define INFLATE_NEED_UPDATEWINDOW(strm) 1
|
||||||
|
#define INFLATE_MARK_HOOK(strm) do {} while (0)
|
||||||
|
#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0)
|
||||||
|
+#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
|
||||||
|
+#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef MAKEFIXED
|
||||||
|
@@ -1330,6 +1333,8 @@ uInt *dictLength;
|
||||||
|
if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
|
||||||
|
state = (struct inflate_state FAR *)strm->state;
|
||||||
|
|
||||||
|
+ INFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength);
|
||||||
|
+
|
||||||
|
/* copy dictionary */
|
||||||
|
if (state->whave && dictionary != Z_NULL) {
|
||||||
|
zmemcpy(dictionary, state->window + state->wnext,
|
||||||
|
@@ -1365,6 +1370,8 @@ uInt dictLength;
|
||||||
|
return Z_DATA_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ INFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength);
|
||||||
|
+
|
||||||
|
/* copy dictionary to window using updatewindow(), which will amend the
|
||||||
|
existing dictionary if appropriate */
|
||||||
|
ret = updatewindow(strm, dictionary + dictLength, dictLength);
|
||||||
|
@@ -1529,8 +1536,7 @@ z_streamp source;
|
||||||
|
}
|
||||||
|
copy->next = copy->codes + (state->next - state->codes);
|
||||||
|
if (window != Z_NULL) {
|
||||||
|
- wsize = 1U << state->wbits;
|
||||||
|
- zmemcpy(window, state->window, wsize);
|
||||||
|
+ ZCOPY_WINDOW(window, state->window, 1U << state->wbits);
|
||||||
|
}
|
||||||
|
copy->window = window;
|
||||||
|
dest->state = (struct internal_state FAR *)copy;
|
@ -0,0 +1,69 @@
|
|||||||
|
From 2d80d3f6b52f9fa454c26c89d2d6a1790e1cecb0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Mark Adler <madler@alumni.caltech.edu>
|
||||||
|
Date: Sat, 21 Jan 2017 01:50:26 -0800
|
||||||
|
Subject: [PATCH] Limit hash table inserts after switch from stored deflate.
|
||||||
|
|
||||||
|
This limits hash table inserts to the available data in the window
|
||||||
|
and to the sliding window size in deflate_stored(). The hash table
|
||||||
|
inserts are deferred until deflateParams() switches to a non-zero
|
||||||
|
compression level.
|
||||||
|
---
|
||||||
|
deflate.c | 10 +++++++++-
|
||||||
|
1 file changed, 9 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/deflate.c b/deflate.c
|
||||||
|
index 20bda4f..d368b25 100644
|
||||||
|
--- a/deflate.c
|
||||||
|
+++ b/deflate.c
|
||||||
|
@@ -1513,6 +1513,8 @@ local void fill_window(s)
|
||||||
|
s->match_start -= wsize;
|
||||||
|
s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
|
||||||
|
s->block_start -= (long) wsize;
|
||||||
|
+ if (s->insert > s->strstart)
|
||||||
|
+ s->insert = s->strstart;
|
||||||
|
slide_hash(s);
|
||||||
|
more += wsize;
|
||||||
|
}
|
||||||
|
@@ -1742,6 +1744,7 @@ local block_state deflate_stored(s, flush)
|
||||||
|
s->matches = 2; /* clear hash */
|
||||||
|
zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
|
||||||
|
s->strstart = s->w_size;
|
||||||
|
+ s->insert = s->strstart;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (s->window_size - s->strstart <= used) {
|
||||||
|
@@ -1750,12 +1753,14 @@ local block_state deflate_stored(s, flush)
|
||||||
|
zmemcpy(s->window, s->window + s->w_size, s->strstart);
|
||||||
|
if (s->matches < 2)
|
||||||
|
s->matches++; /* add a pending slide_hash() */
|
||||||
|
+ if (s->insert > s->strstart)
|
||||||
|
+ s->insert = s->strstart;
|
||||||
|
}
|
||||||
|
zmemcpy(s->window + s->strstart, s->strm->next_in - used, used);
|
||||||
|
s->strstart += used;
|
||||||
|
+ s->insert += MIN(used, s->w_size - s->insert);
|
||||||
|
}
|
||||||
|
s->block_start = s->strstart;
|
||||||
|
- s->insert += MIN(used, s->w_size - s->insert);
|
||||||
|
}
|
||||||
|
if (s->high_water < s->strstart)
|
||||||
|
s->high_water = s->strstart;
|
||||||
|
@@ -1779,12 +1784,15 @@ local block_state deflate_stored(s, flush)
|
||||||
|
if (s->matches < 2)
|
||||||
|
s->matches++; /* add a pending slide_hash() */
|
||||||
|
have += s->w_size; /* more space now */
|
||||||
|
+ if (s->insert > s->strstart)
|
||||||
|
+ s->insert = s->strstart;
|
||||||
|
}
|
||||||
|
if (have > s->strm->avail_in)
|
||||||
|
have = s->strm->avail_in;
|
||||||
|
if (have) {
|
||||||
|
read_buf(s->strm, s->window + s->strstart, have);
|
||||||
|
s->strstart += have;
|
||||||
|
+ s->insert += MIN(have, s->w_size - s->insert);
|
||||||
|
}
|
||||||
|
if (s->high_water < s->strstart)
|
||||||
|
s->high_water = s->strstart;
|
||||||
|
--
|
||||||
|
2.39.1
|
||||||
|
|
@ -0,0 +1,282 @@
|
|||||||
|
From 14730a26e830eb2b09d1f7097910616f23c1476e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ilya Leoshkevich <iii@linux.ibm.com>
|
||||||
|
Date: Thu, 2 Feb 2023 19:40:32 +0100
|
||||||
|
Subject: [PATCH] 0001-PATCH-Preparation-for-Power-optimizations.patch
|
||||||
|
|
||||||
|
---
|
||||||
|
CMakeLists.txt | 67 ++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
configure | 66 +++++++++++++++++++++++++++++++++++++++++
|
||||||
|
contrib/README.contrib | 8 +++++
|
||||||
|
contrib/gcc/zifunc.h | 60 +++++++++++++++++++++++++++++++++++++
|
||||||
|
contrib/power/power.h | 4 +++
|
||||||
|
5 files changed, 205 insertions(+)
|
||||||
|
create mode 100644 contrib/gcc/zifunc.h
|
||||||
|
create mode 100644 contrib/power/power.h
|
||||||
|
|
||||||
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||||
|
index 0fe939d..e762023 100644
|
||||||
|
--- a/CMakeLists.txt
|
||||||
|
+++ b/CMakeLists.txt
|
||||||
|
@@ -7,6 +7,7 @@ set(VERSION "1.2.11")
|
||||||
|
|
||||||
|
option(ASM686 "Enable building i686 assembly implementation")
|
||||||
|
option(AMD64 "Enable building amd64 assembly implementation")
|
||||||
|
+option(POWER "Enable building power implementation")
|
||||||
|
|
||||||
|
set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
|
||||||
|
set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
|
||||||
|
@@ -140,6 +141,72 @@ if(CMAKE_COMPILER_IS_GNUCC)
|
||||||
|
add_definitions(-DASMV)
|
||||||
|
set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
|
||||||
|
endif()
|
||||||
|
+
|
||||||
|
+ # test to see if we can use a GNU indirect function to detect and load optimized code at runtime
|
||||||
|
+ CHECK_C_SOURCE_COMPILES("
|
||||||
|
+ static int test_ifunc_native(void)
|
||||||
|
+ {
|
||||||
|
+ return 1;
|
||||||
|
+ }
|
||||||
|
+ static int (*(check_ifunc_native(void)))(void)
|
||||||
|
+ {
|
||||||
|
+ return test_ifunc_native;
|
||||||
|
+ }
|
||||||
|
+ int test_ifunc(void) __attribute__ ((ifunc (\"check_ifunc_native\")));
|
||||||
|
+ int main(void)
|
||||||
|
+ {
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+ " HAS_C_ATTR_IFUNC)
|
||||||
|
+
|
||||||
|
+ if(HAS_C_ATTR_IFUNC)
|
||||||
|
+ add_definitions(-DHAVE_IFUNC)
|
||||||
|
+ set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/gcc/zifunc.h)
|
||||||
|
+ endif()
|
||||||
|
+
|
||||||
|
+ if(POWER)
|
||||||
|
+ # Test to see if we can use the optimizations for Power
|
||||||
|
+ CHECK_C_SOURCE_COMPILES("
|
||||||
|
+ #ifndef _ARCH_PPC
|
||||||
|
+ #error \"Target is not Power\"
|
||||||
|
+ #endif
|
||||||
|
+ #ifndef __BUILTIN_CPU_SUPPORTS__
|
||||||
|
+ #error \"Target doesn't support __builtin_cpu_supports()\"
|
||||||
|
+ #endif
|
||||||
|
+ int main() { return 0; }
|
||||||
|
+ " HAS_POWER_SUPPORT)
|
||||||
|
+
|
||||||
|
+ if(HAS_POWER_SUPPORT AND HAS_C_ATTR_IFUNC)
|
||||||
|
+ add_definitions(-DZ_POWER_OPT)
|
||||||
|
+
|
||||||
|
+ set(CMAKE_REQUIRED_FLAGS -mcpu=power8)
|
||||||
|
+ CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER8)
|
||||||
|
+
|
||||||
|
+ if(POWER8)
|
||||||
|
+ add_definitions(-DZ_POWER8)
|
||||||
|
+ set(ZLIB_POWER8 )
|
||||||
|
+
|
||||||
|
+ set_source_files_properties(
|
||||||
|
+ ${ZLIB_POWER8}
|
||||||
|
+ PROPERTIES COMPILE_FLAGS -mcpu=power8)
|
||||||
|
+ endif()
|
||||||
|
+
|
||||||
|
+ set(CMAKE_REQUIRED_FLAGS -mcpu=power9)
|
||||||
|
+ CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER9)
|
||||||
|
+
|
||||||
|
+ if(POWER9)
|
||||||
|
+ add_definitions(-DZ_POWER9)
|
||||||
|
+ set(ZLIB_POWER9 )
|
||||||
|
+
|
||||||
|
+ set_source_files_properties(
|
||||||
|
+ ${ZLIB_POWER9}
|
||||||
|
+ PROPERTIES COMPILE_FLAGS -mcpu=power9)
|
||||||
|
+ endif()
|
||||||
|
+
|
||||||
|
+ set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/power/power.h)
|
||||||
|
+ set(ZLIB_SRCS ${ZLIB_SRCS} ${ZLIB_POWER8} ${ZLIB_POWER9})
|
||||||
|
+ endif()
|
||||||
|
+ endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
diff --git a/configure b/configure
|
||||||
|
index d026b35..0538d58 100755
|
||||||
|
--- a/configure
|
||||||
|
+++ b/configure
|
||||||
|
@@ -846,6 +846,72 @@ else
|
||||||
|
echo "Checking for sys/sdt.h ... No." | tee -a configure.log
|
||||||
|
fi
|
||||||
|
|
||||||
|
+# test to see if we can use a gnu indirection function to detect and load optimized code at runtime
|
||||||
|
+echo >> configure.log
|
||||||
|
+cat > $test.c <<EOF
|
||||||
|
+static int test_ifunc_native(void)
|
||||||
|
+{
|
||||||
|
+ return 1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int (*(check_ifunc_native(void)))(void)
|
||||||
|
+{
|
||||||
|
+ return test_ifunc_native;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int test_ifunc(void) __attribute__ ((ifunc ("check_ifunc_native")));
|
||||||
|
+EOF
|
||||||
|
+
|
||||||
|
+if tryboth $CC -c $CFLAGS $test.c; then
|
||||||
|
+ SFLAGS="${SFLAGS} -DHAVE_IFUNC"
|
||||||
|
+ CFLAGS="${CFLAGS} -DHAVE_IFUNC"
|
||||||
|
+ echo "Checking for attribute(ifunc) support... Yes." | tee -a configure.log
|
||||||
|
+else
|
||||||
|
+ echo "Checking for attribute(ifunc) support... No." | tee -a configure.log
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+# Test to see if we can use the optimizations for Power
|
||||||
|
+echo >> configure.log
|
||||||
|
+cat > $test.c <<EOF
|
||||||
|
+#ifndef _ARCH_PPC
|
||||||
|
+ #error "Target is not Power"
|
||||||
|
+#endif
|
||||||
|
+#ifndef HAVE_IFUNC
|
||||||
|
+ #error "Target doesn't support ifunc"
|
||||||
|
+#endif
|
||||||
|
+#ifndef __BUILTIN_CPU_SUPPORTS__
|
||||||
|
+ #error "Target doesn't support __builtin_cpu_supports()"
|
||||||
|
+#endif
|
||||||
|
+EOF
|
||||||
|
+
|
||||||
|
+if tryboth $CC -c $CFLAGS $test.c; then
|
||||||
|
+ echo "int main(void){return 0;}" > $test.c
|
||||||
|
+
|
||||||
|
+ if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then
|
||||||
|
+ POWER8="-DZ_POWER8"
|
||||||
|
+ PIC_OBJC="${PIC_OBJC}"
|
||||||
|
+ OBJC="${OBJC}"
|
||||||
|
+ echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log
|
||||||
|
+ else
|
||||||
|
+ echo "Checking for -mcpu=power8 support... No." | tee -a configure.log
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
+ if tryboth $CC -c $CFLAGS -mcpu=power9 $test.c; then
|
||||||
|
+ POWER9="-DZ_POWER9"
|
||||||
|
+ PIC_OBJC="${PIC_OBJC}"
|
||||||
|
+ OBJC="${OBJC}"
|
||||||
|
+ echo "Checking for -mcpu=power9 support... Yes." | tee -a configure.log
|
||||||
|
+ else
|
||||||
|
+ echo "Checking for -mcpu=power9 support... No." | tee -a configure.log
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
+ SFLAGS="${SFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
|
||||||
|
+ CFLAGS="${CFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT"
|
||||||
|
+ echo "Checking for Power optimizations support... Yes." | tee -a configure.log
|
||||||
|
+else
|
||||||
|
+ echo "Checking for Power optimizations support... No." | tee -a configure.log
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
# show the results in the log
|
||||||
|
echo >> configure.log
|
||||||
|
echo ALL = $ALL >> configure.log
|
||||||
|
diff --git a/contrib/README.contrib b/contrib/README.contrib
|
||||||
|
index b4d3b18..2a53f90 100644
|
||||||
|
--- a/contrib/README.contrib
|
||||||
|
+++ b/contrib/README.contrib
|
||||||
|
@@ -19,6 +19,10 @@ asm686/ by Brian Raiter <breadbox@muppetlabs.com>
|
||||||
|
blast/ by Mark Adler <madler@alumni.caltech.edu>
|
||||||
|
Decompressor for output of PKWare Data Compression Library (DCL)
|
||||||
|
|
||||||
|
+gcc/ by Matheus Castanho <msc@linux.ibm.com>
|
||||||
|
+ and Rogerio Alves <rcardoso@linux.ibm.com>
|
||||||
|
+ Optimization helpers using GCC-specific extensions
|
||||||
|
+
|
||||||
|
delphi/ by Cosmin Truta <cosmint@cs.ubbcluj.ro>
|
||||||
|
Support for Delphi and C++ Builder
|
||||||
|
|
||||||
|
@@ -63,6 +67,10 @@ minizip/ by Gilles Vollant <info@winimage.com>
|
||||||
|
pascal/ by Bob Dellaca <bobdl@xtra.co.nz> et al.
|
||||||
|
Support for Pascal
|
||||||
|
|
||||||
|
+power/ by Matheus Castanho <msc@linux.ibm.com>
|
||||||
|
+ and Rogerio Alves <rcardoso@linux.ibm.com>
|
||||||
|
+ Optimized functions for Power processors
|
||||||
|
+
|
||||||
|
puff/ by Mark Adler <madler@alumni.caltech.edu>
|
||||||
|
Small, low memory usage inflate. Also serves to provide an
|
||||||
|
unambiguous description of the deflate format.
|
||||||
|
diff --git a/contrib/gcc/zifunc.h b/contrib/gcc/zifunc.h
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..daf4fe4
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/contrib/gcc/zifunc.h
|
||||||
|
@@ -0,0 +1,60 @@
|
||||||
|
+/* Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
|
||||||
|
+ * 2019 Rogerio Alves <rogerio.alves@ibm.com>, IBM
|
||||||
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#ifndef Z_IFUNC_H_
|
||||||
|
+#define Z_IFUNC_H_
|
||||||
|
+
|
||||||
|
+/* Helpers for arch optimizations */
|
||||||
|
+
|
||||||
|
+#define Z_IFUNC(fname) \
|
||||||
|
+ typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \
|
||||||
|
+ local typeof(fname) *fname##_resolver(void)
|
||||||
|
+/* This is a helper macro to declare a resolver for an indirect function
|
||||||
|
+ * (ifunc). Let's say you have function
|
||||||
|
+ *
|
||||||
|
+ * int foo (int a);
|
||||||
|
+ *
|
||||||
|
+ * for which you want to provide different implementations, for example:
|
||||||
|
+ *
|
||||||
|
+ * int foo_clever (int a) {
|
||||||
|
+ * ... clever things ...
|
||||||
|
+ * }
|
||||||
|
+ *
|
||||||
|
+ * int foo_smart (int a) {
|
||||||
|
+ * ... smart things ...
|
||||||
|
+ * }
|
||||||
|
+ *
|
||||||
|
+ * You will have to declare foo() as an indirect function and also provide a
|
||||||
|
+ * resolver for it, to choose between foo_clever() and foo_smart() based on
|
||||||
|
+ * some criteria you define (e.g. processor features).
|
||||||
|
+ *
|
||||||
|
+ * Since most likely foo() has a default implementation somewhere in zlib, you
|
||||||
|
+ * may have to rename it so the 'foo' symbol can be used by the ifunc without
|
||||||
|
+ * conflicts.
|
||||||
|
+ *
|
||||||
|
+ * #define foo foo_default
|
||||||
|
+ * int foo (int a) {
|
||||||
|
+ * ...
|
||||||
|
+ * }
|
||||||
|
+ * #undef foo
|
||||||
|
+ *
|
||||||
|
+ * Now you just have to provide a resolver function to choose which function
|
||||||
|
+ * should be used (decided at runtime on the first call to foo()):
|
||||||
|
+ *
|
||||||
|
+ * Z_IFUNC(foo) {
|
||||||
|
+ * if (... some condition ...)
|
||||||
|
+ * return foo_clever;
|
||||||
|
+ *
|
||||||
|
+ * if (... other condition ...)
|
||||||
|
+ * return foo_smart;
|
||||||
|
+ *
|
||||||
|
+ * return foo_default;
|
||||||
|
+ * }
|
||||||
|
+ *
|
||||||
|
+ * All calls to foo() throughout the code can remain untouched, all the magic
|
||||||
|
+ * will be done by the linker using the resolver function.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#endif /* Z_IFUNC_H_ */
|
||||||
|
diff --git a/contrib/power/power.h b/contrib/power/power.h
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..b42c7d6
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/contrib/power/power.h
|
||||||
|
@@ -0,0 +1,4 @@
|
||||||
|
+/* Copyright (C) 2019 Matheus Castanho <msc@linux.ibm.com>, IBM
|
||||||
|
+ * 2019 Rogerio Alves <rogerio.alves@ibm.com>, IBM
|
||||||
|
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
||||||
|
+ */
|
||||||
|
--
|
||||||
|
2.39.1
|
||||||
|
|
@ -1,365 +0,0 @@
|
|||||||
From 27a84de4a30cd35f8565937397f6d1205b912818 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Ondrej Dubaj <odubaj@redhat.com>
|
|
||||||
Date: Thu, 5 Sep 2019 09:16:35 +0200
|
|
||||||
Subject: [PATCH 1/2] fix: power8 crc32 - return 0 with 0 ptr passed
|
|
||||||
|
|
||||||
---
|
|
||||||
contrib/power8-crc/vec_crc32.c | 1 +
|
|
||||||
1 file changed, 1 insertion(+)
|
|
||||||
|
|
||||||
diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c
|
|
||||||
index bb2204b..5ce9cd2 100644
|
|
||||||
--- a/contrib/power8-crc/vec_crc32.c
|
|
||||||
+++ b/contrib/power8-crc/vec_crc32.c
|
|
||||||
@@ -74,6 +74,7 @@ unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p,
|
|
||||||
unsigned int prealign;
|
|
||||||
unsigned int tail;
|
|
||||||
|
|
||||||
+ if (p == (const unsigned char *) 0x0) return 0;
|
|
||||||
#ifdef CRC_XOR
|
|
||||||
crc ^= 0xffffffff;
|
|
||||||
#endif
|
|
||||||
--
|
|
||||||
2.19.1
|
|
||||||
|
|
||||||
|
|
||||||
From c066ac92982a2ffe5b1e9bd36000058927437bd5 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Ondrej Dubaj <odubaj@redhat.com>
|
|
||||||
Date: Thu, 5 Sep 2019 09:36:47 +0200
|
|
||||||
Subject: [PATCH 2/2] Add CRC32 tests (crc32_test)
|
|
||||||
|
|
||||||
This commit includes a CRC32 test (crc32_test). This tests are important
|
|
||||||
since some architectures may want include CPU dependent optimizations for
|
|
||||||
CRC32 algorithm like using vector instructions and we may want to
|
|
||||||
validate those.
|
|
||||||
---
|
|
||||||
Makefile.in | 35 +++++---
|
|
||||||
test/crc32_test.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
2 files changed, 230 insertions(+), 10 deletions(-)
|
|
||||||
create mode 100644 test/crc32_test.c
|
|
||||||
|
|
||||||
diff --git a/Makefile.in b/Makefile.in
|
|
||||||
index 40b5cfb..6070dcc 100644
|
|
||||||
--- a/Makefile.in
|
|
||||||
+++ b/Makefile.in
|
|
||||||
@@ -75,11 +75,11 @@ PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA)
|
|
||||||
|
|
||||||
all: static shared
|
|
||||||
|
|
||||||
-static: example$(EXE) minigzip$(EXE)
|
|
||||||
+static: crc32_test$(EXE) example$(EXE) minigzip$(EXE)
|
|
||||||
|
|
||||||
-shared: examplesh$(EXE) minigzipsh$(EXE)
|
|
||||||
+shared: crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE)
|
|
||||||
|
|
||||||
-all64: example64$(EXE) minigzip64$(EXE)
|
|
||||||
+all64: crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE)
|
|
||||||
|
|
||||||
check: test
|
|
||||||
|
|
||||||
@@ -87,7 +87,7 @@ test: all teststatic testshared
|
|
||||||
|
|
||||||
teststatic: static
|
|
||||||
@TMPST=tmpst_$$; \
|
|
||||||
- if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST ; then \
|
|
||||||
+ if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST && ./crc32_test; then \
|
|
||||||
echo ' *** zlib test OK ***'; \
|
|
||||||
else \
|
|
||||||
echo ' *** zlib test FAILED ***'; false; \
|
|
||||||
@@ -100,7 +100,7 @@ testshared: shared
|
|
||||||
DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \
|
|
||||||
SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \
|
|
||||||
TMPSH=tmpsh_$$; \
|
|
||||||
- if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH; then \
|
|
||||||
+ if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH && ./crc32_testsh; then \
|
|
||||||
echo ' *** zlib shared test OK ***'; \
|
|
||||||
else \
|
|
||||||
echo ' *** zlib shared test FAILED ***'; false; \
|
|
||||||
@@ -109,7 +109,7 @@ testshared: shared
|
|
||||||
|
|
||||||
test64: all64
|
|
||||||
@TMP64=tmp64_$$; \
|
|
||||||
- if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64; then \
|
|
||||||
+ if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64 && ./crc32_test64; then \
|
|
||||||
echo ' *** zlib 64-bit test OK ***'; \
|
|
||||||
else \
|
|
||||||
echo ' *** zlib 64-bit test FAILED ***'; false; \
|
|
||||||
@@ -157,6 +157,12 @@ example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
|
|
||||||
minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h
|
|
||||||
$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c
|
|
||||||
|
|
||||||
+crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
|
|
||||||
+ $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c
|
|
||||||
+
|
|
||||||
+crc32_test64.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h
|
|
||||||
+ $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/crc32_test.c
|
|
||||||
+
|
|
||||||
example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
|
|
||||||
$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c
|
|
||||||
|
|
||||||
@@ -307,12 +313,21 @@ example$(EXE): example.o $(STATICLIB)
|
|
||||||
minigzip$(EXE): minigzip.o $(STATICLIB)
|
|
||||||
$(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS)
|
|
||||||
|
|
||||||
+crc32_test$(EXE): crc32_test.o $(STATICLIB)
|
|
||||||
+ $(CC) $(CFLAGS) -o $@ crc32_test.o $(TEST_LDFLAGS)
|
|
||||||
+
|
|
||||||
+crc32_testsh$(EXE): crc32_test.o $(SHAREDLIBV)
|
|
||||||
+ $(CC) $(CFLAGS) -o $@ crc32_test.o -L. $(SHAREDLIBV)
|
|
||||||
+
|
|
||||||
examplesh$(EXE): example.o $(SHAREDLIBV)
|
|
||||||
$(CC) $(CFLAGS) -o $@ example.o -L. $(SHAREDLIBV)
|
|
||||||
|
|
||||||
minigzipsh$(EXE): minigzip.o $(SHAREDLIBV)
|
|
||||||
$(CC) $(CFLAGS) -o $@ minigzip.o -L. $(SHAREDLIBV)
|
|
||||||
|
|
||||||
+crc32_test64$(EXE): crc32_test64.o $(STATICLIB)
|
|
||||||
+ $(CC) $(CFLAGS) -o $@ crc32_test64.o $(TEST_LDFLAGS)
|
|
||||||
+
|
|
||||||
example64$(EXE): example64.o $(STATICLIB)
|
|
||||||
$(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS)
|
|
||||||
|
|
||||||
@@ -382,8 +397,8 @@ zconf: $(SRCDIR)zconf.h.in
|
|
||||||
mostlyclean: clean
|
|
||||||
clean:
|
|
||||||
rm -f *.o *.lo *~ \
|
|
||||||
- example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
|
|
||||||
- example64$(EXE) minigzip64$(EXE) \
|
|
||||||
+ crc32_test$(EXE) example$(EXE) minigzip$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
|
|
||||||
+ crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) \
|
|
||||||
infcover \
|
|
||||||
libz.* foo.gz so_locations \
|
|
||||||
_match.s maketree contrib/infback9/*.o
|
|
||||||
@@ -407,7 +422,7 @@ tags:
|
|
||||||
|
|
||||||
adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
|
|
||||||
gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
|
|
||||||
-compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
|
|
||||||
+compress.o crc32_test.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
|
|
||||||
crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
|
|
||||||
deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
|
|
||||||
infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
|
|
||||||
@@ -417,7 +432,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr
|
|
||||||
|
|
||||||
adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
|
|
||||||
gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
|
|
||||||
-compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
|
|
||||||
+compress.lo crc32_test.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
|
|
||||||
crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
|
|
||||||
deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
|
|
||||||
infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
|
|
||||||
diff --git a/test/crc32_test.c b/test/crc32_test.c
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..5d73128
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/test/crc32_test.c
|
|
||||||
@@ -0,0 +1,205 @@
|
|
||||||
+/* crc32_tes.c -- unit test for crc32 in the zlib compression library
|
|
||||||
+ * Copyright (C) 1995-2006, 2010, 2011, 2016, 2019 Rogerio Alves
|
|
||||||
+ * For conditions of distribution and use, see copyright notice in zlib.h
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+#include "zlib.h"
|
|
||||||
+#include <stdio.h>
|
|
||||||
+
|
|
||||||
+#ifdef STDC
|
|
||||||
+# include <string.h>
|
|
||||||
+# include <stdlib.h>
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+void test_crc32 OF((uLong crc, Byte* buf, z_size_t len, uLong chk, int line));
|
|
||||||
+int main OF((void));
|
|
||||||
+
|
|
||||||
+typedef struct {
|
|
||||||
+ int line;
|
|
||||||
+ uLong crc;
|
|
||||||
+ Byte* buf;
|
|
||||||
+ int len;
|
|
||||||
+ uLong expect;
|
|
||||||
+} crc32_test;
|
|
||||||
+
|
|
||||||
+void test_crc32(crc, buf, len, chk, line)
|
|
||||||
+ uLong crc;
|
|
||||||
+ Byte *buf;
|
|
||||||
+ z_size_t len;
|
|
||||||
+ uLong chk;
|
|
||||||
+ int line;
|
|
||||||
+{
|
|
||||||
+ uLong res = crc32(crc, buf, len);
|
|
||||||
+ if (res != chk) {
|
|
||||||
+ fprintf(stderr, "FAIL [%d]: crc32 returned 0x%08X expected 0x%08X\n",
|
|
||||||
+ line, (unsigned int)res, (unsigned int)chk);
|
|
||||||
+ exit(1);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static const crc32_test tests[] = {
|
|
||||||
+ {__LINE__, 0x0, 0x0, 0, 0x0},
|
|
||||||
+ {__LINE__, 0xffffffff, 0x0, 0, 0x0},
|
|
||||||
+ {__LINE__, 0x0, 0x0, 255, 0x0}, /* BZ 174799. */
|
|
||||||
+ {__LINE__, 0x0, 0x0, 256, 0x0},
|
|
||||||
+ {__LINE__, 0x0, 0x0, 257, 0x0},
|
|
||||||
+ {__LINE__, 0x0, 0x0, 32767, 0x0},
|
|
||||||
+ {__LINE__, 0x0, 0x0, 32768, 0x0},
|
|
||||||
+ {__LINE__, 0x0, 0x0, 32769, 0x0},
|
|
||||||
+ {__LINE__, 0x0, "", 0, 0x0},
|
|
||||||
+ {__LINE__, 0xffffffff, "", 0, 0xffffffff},
|
|
||||||
+ {__LINE__, 0x0, "abacus", 6, 0xc3d7115b},
|
|
||||||
+ {__LINE__, 0x0, "backlog", 7, 0x269205},
|
|
||||||
+ {__LINE__, 0x0, "campfire", 8, 0x22a515f8},
|
|
||||||
+ {__LINE__, 0x0, "delta", 5, 0x9643fed9},
|
|
||||||
+ {__LINE__, 0x0, "executable", 10, 0xd68eda01},
|
|
||||||
+ {__LINE__, 0x0, "file", 4, 0x8c9f3610},
|
|
||||||
+ {__LINE__, 0x0, "greatest", 8, 0xc1abd6cd},
|
|
||||||
+ {__LINE__, 0x0, "hello", 5, 0x3610a686},
|
|
||||||
+ {__LINE__, 0x0, "inverter", 8, 0xc9e962c9},
|
|
||||||
+ {__LINE__, 0x0, "jigsaw", 6, 0xce4e3f69},
|
|
||||||
+ {__LINE__, 0x0, "karate", 6, 0x890be0e2},
|
|
||||||
+ {__LINE__, 0x0, "landscape", 9, 0xc4e0330b},
|
|
||||||
+ {__LINE__, 0x0, "machine", 7, 0x1505df84},
|
|
||||||
+ {__LINE__, 0x0, "nanometer", 9, 0xd4e19f39},
|
|
||||||
+ {__LINE__, 0x0, "oblivion", 8, 0xdae9de77},
|
|
||||||
+ {__LINE__, 0x0, "panama", 6, 0x66b8979c},
|
|
||||||
+ {__LINE__, 0x0, "quest", 5, 0x4317f817},
|
|
||||||
+ {__LINE__, 0x0, "resource", 8, 0xbc91f416},
|
|
||||||
+ {__LINE__, 0x0, "secret", 6, 0x5ca2e8e5},
|
|
||||||
+ {__LINE__, 0x0, "test", 4, 0xd87f7e0c},
|
|
||||||
+ {__LINE__, 0x0, "ultimate", 8, 0x3fc79b0b},
|
|
||||||
+ {__LINE__, 0x0, "vector", 6, 0x1b6e485b},
|
|
||||||
+ {__LINE__, 0x0, "walrus", 6, 0xbe769b97},
|
|
||||||
+ {__LINE__, 0x0, "xeno", 4, 0xe7a06444},
|
|
||||||
+ {__LINE__, 0x0, "yelling", 7, 0xfe3944e5},
|
|
||||||
+ {__LINE__, 0x0, "zlib", 4, 0x73887d3a},
|
|
||||||
+ {__LINE__, 0x0, "4BJD7PocN1VqX0jXVpWB", 20, 0xd487a5a1},
|
|
||||||
+ {__LINE__, 0x0, "F1rPWI7XvDs6nAIRx41l", 20, 0x61a0132e},
|
|
||||||
+ {__LINE__, 0x0, "ldhKlsVkPFOveXgkGtC2", 20, 0xdf02f76},
|
|
||||||
+ {__LINE__, 0x0, "5KKnGOOrs8BvJ35iKTOS", 20, 0x579b2b0a},
|
|
||||||
+ {__LINE__, 0x0, "0l1tw7GOcem06Ddu7yn4", 20, 0xf7d16e2d},
|
|
||||||
+ {__LINE__, 0x0, "MCr47CjPIn9R1IvE1Tm5", 20, 0x731788f5},
|
|
||||||
+ {__LINE__, 0x0, "UcixbzPKTIv0SvILHVdO", 20, 0x7112bb11},
|
|
||||||
+ {__LINE__, 0x0, "dGnAyAhRQDsWw0ESou24", 20, 0xf32a0dac},
|
|
||||||
+ {__LINE__, 0x0, "di0nvmY9UYMYDh0r45XT", 20, 0x625437bb},
|
|
||||||
+ {__LINE__, 0x0, "2XKDwHfAhFsV0RhbqtvH", 20, 0x896930f9},
|
|
||||||
+ {__LINE__, 0x0, "ZhrANFIiIvRnqClIVyeD", 20, 0x8579a37},
|
|
||||||
+ {__LINE__, 0x0, "v7Q9ehzioTOVeDIZioT1", 20, 0x632aa8e0},
|
|
||||||
+ {__LINE__, 0x0, "Yod5hEeKcYqyhfXbhxj2", 20, 0xc829af29},
|
|
||||||
+ {__LINE__, 0x0, "GehSWY2ay4uUKhehXYb0", 20, 0x1b08b7e8},
|
|
||||||
+ {__LINE__, 0x0, "kwytJmq6UqpflV8Y8GoE", 20, 0x4e33b192},
|
|
||||||
+ {__LINE__, 0x0, "70684206568419061514", 20, 0x59a179f0},
|
|
||||||
+ {__LINE__, 0x0, "42015093765128581010", 20, 0xcd1013d7},
|
|
||||||
+ {__LINE__, 0x0, "88214814356148806939", 20, 0xab927546},
|
|
||||||
+ {__LINE__, 0x0, "43472694284527343838", 20, 0x11f3b20c},
|
|
||||||
+ {__LINE__, 0x0, "49769333513942933689", 20, 0xd562d4ca},
|
|
||||||
+ {__LINE__, 0x0, "54979784887993251199", 20, 0x233395f7},
|
|
||||||
+ {__LINE__, 0x0, "58360544869206793220", 20, 0x2d167fd5},
|
|
||||||
+ {__LINE__, 0x0, "27347953487840714234", 20, 0x8b5108ba},
|
|
||||||
+ {__LINE__, 0x0, "07650690295365319082", 20, 0xc46b3cd8},
|
|
||||||
+ {__LINE__, 0x0, "42655507906821911703", 20, 0xc10b2662},
|
|
||||||
+ {__LINE__, 0x0, "29977409200786225655", 20, 0xc9a0f9d2},
|
|
||||||
+ {__LINE__, 0x0, "85181542907229116674", 20, 0x9341357b},
|
|
||||||
+ {__LINE__, 0x0, "87963594337989416799", 20, 0xf0424937},
|
|
||||||
+ {__LINE__, 0x0, "21395988329504168551", 20, 0xd7c4c31f},
|
|
||||||
+ {__LINE__, 0x0, "51991013580943379423", 20, 0xf11edcc4},
|
|
||||||
+ {__LINE__, 0x0, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x40795df4},
|
|
||||||
+ {__LINE__, 0x0, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0xdd61a631},
|
|
||||||
+ {__LINE__, 0x0, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xca907a99},
|
|
||||||
+ {__LINE__, 0x0, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0xf652deac},
|
|
||||||
+ {__LINE__, 0x0, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0xaf39a5a9},
|
|
||||||
+ {__LINE__, 0x0, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x6bebb4cf},
|
|
||||||
+ {__LINE__, 0x0, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0x76430bac},
|
|
||||||
+ {__LINE__, 0x0, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x6c80c388},
|
|
||||||
+ {__LINE__, 0x0, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xd54d977d},
|
|
||||||
+ {__LINE__, 0x0, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0xe3966ad5},
|
|
||||||
+ {__LINE__, 0x0, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xe7c71db9},
|
|
||||||
+ {__LINE__, 0x0, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xeaa52777},
|
|
||||||
+ {__LINE__, 0x0, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xcd472048},
|
|
||||||
+ {__LINE__, 0x7a30360d, "abacus", 6, 0xf8655a84},
|
|
||||||
+ {__LINE__, 0x6fd767ee, "backlog", 7, 0x1ed834b1},
|
|
||||||
+ {__LINE__, 0xefeb7589, "campfire", 8, 0x686cfca},
|
|
||||||
+ {__LINE__, 0x61cf7e6b, "delta", 5, 0x1554e4b1},
|
|
||||||
+ {__LINE__, 0xdc712e2, "executable", 10, 0x761b4254},
|
|
||||||
+ {__LINE__, 0xad23c7fd, "file", 4, 0x7abdd09b},
|
|
||||||
+ {__LINE__, 0x85cb2317, "greatest", 8, 0x4ba91c6b},
|
|
||||||
+ {__LINE__, 0x9eed31b0, "inverter", 8, 0xd5e78ba5},
|
|
||||||
+ {__LINE__, 0xb94f34ca, "jigsaw", 6, 0x23649109},
|
|
||||||
+ {__LINE__, 0xab058a2, "karate", 6, 0xc5591f41},
|
|
||||||
+ {__LINE__, 0x5bff2b7a, "landscape", 9, 0xf10eb644},
|
|
||||||
+ {__LINE__, 0x605c9a5f, "machine", 7, 0xbaa0a636},
|
|
||||||
+ {__LINE__, 0x51bdeea5, "nanometer", 9, 0x6af89afb},
|
|
||||||
+ {__LINE__, 0x85c21c79, "oblivion", 8, 0xecae222b},
|
|
||||||
+ {__LINE__, 0x97216f56, "panama", 6, 0x47dffac4},
|
|
||||||
+ {__LINE__, 0x18444af2, "quest", 5, 0x70c2fe36},
|
|
||||||
+ {__LINE__, 0xbe6ce359, "resource", 8, 0x1471d925},
|
|
||||||
+ {__LINE__, 0x843071f1, "secret", 6, 0x50c9a0db},
|
|
||||||
+ {__LINE__, 0xf2480c60, "ultimate", 8, 0xf973daf8},
|
|
||||||
+ {__LINE__, 0x2d2feb3d, "vector", 6, 0x344ac03d},
|
|
||||||
+ {__LINE__, 0x7490310a, "walrus", 6, 0x6d1408ef},
|
|
||||||
+ {__LINE__, 0x97d247d4, "xeno", 4, 0xe62670b5},
|
|
||||||
+ {__LINE__, 0x93cf7599, "yelling", 7, 0x1b36da38},
|
|
||||||
+ {__LINE__, 0x73c84278, "zlib", 4, 0x6432d127},
|
|
||||||
+ {__LINE__, 0x228a87d1, "4BJD7PocN1VqX0jXVpWB", 20, 0x997107d0},
|
|
||||||
+ {__LINE__, 0xa7a048d0, "F1rPWI7XvDs6nAIRx41l", 20, 0xdc567274},
|
|
||||||
+ {__LINE__, 0x1f0ded40, "ldhKlsVkPFOveXgkGtC2", 20, 0xdcc63870},
|
|
||||||
+ {__LINE__, 0xa804a62f, "5KKnGOOrs8BvJ35iKTOS", 20, 0x6926cffd},
|
|
||||||
+ {__LINE__, 0x508fae6a, "0l1tw7GOcem06Ddu7yn4", 20, 0xb52b38bc},
|
|
||||||
+ {__LINE__, 0xe5adaf4f, "MCr47CjPIn9R1IvE1Tm5", 20, 0xf83b8178},
|
|
||||||
+ {__LINE__, 0x67136a40, "UcixbzPKTIv0SvILHVdO", 20, 0xc5213070},
|
|
||||||
+ {__LINE__, 0xb00c4a10, "dGnAyAhRQDsWw0ESou24", 20, 0xbc7648b0},
|
|
||||||
+ {__LINE__, 0x2e0c84b5, "di0nvmY9UYMYDh0r45XT", 20, 0xd8123a72},
|
|
||||||
+ {__LINE__, 0x81238d44, "2XKDwHfAhFsV0RhbqtvH", 20, 0xd5ac5620},
|
|
||||||
+ {__LINE__, 0xf853aa92, "ZhrANFIiIvRnqClIVyeD", 20, 0xceae099d},
|
|
||||||
+ {__LINE__, 0x5a692325, "v7Q9ehzioTOVeDIZioT1", 20, 0xb07d2b24},
|
|
||||||
+ {__LINE__, 0x3275b9f, "Yod5hEeKcYqyhfXbhxj2", 20, 0x24ce91df},
|
|
||||||
+ {__LINE__, 0x38371feb, "GehSWY2ay4uUKhehXYb0", 20, 0x707b3b30},
|
|
||||||
+ {__LINE__, 0xafc8bf62, "kwytJmq6UqpflV8Y8GoE", 20, 0x16abc6a9},
|
|
||||||
+ {__LINE__, 0x9b07db73, "70684206568419061514", 20, 0xae1fb7b7},
|
|
||||||
+ {__LINE__, 0xe75b214, "42015093765128581010", 20, 0xd4eecd2d},
|
|
||||||
+ {__LINE__, 0x72d0fe6f, "88214814356148806939", 20, 0x4660ec7},
|
|
||||||
+ {__LINE__, 0xf857a4b1, "43472694284527343838", 20, 0xfd8afdf7},
|
|
||||||
+ {__LINE__, 0x54b8e14, "49769333513942933689", 20, 0xc6d1b5f2},
|
|
||||||
+ {__LINE__, 0xd6aa5616, "54979784887993251199", 20, 0x32476461},
|
|
||||||
+ {__LINE__, 0x11e63098, "58360544869206793220", 20, 0xd917cf1a},
|
|
||||||
+ {__LINE__, 0xbe92385, "27347953487840714234", 20, 0x4ad14a12},
|
|
||||||
+ {__LINE__, 0x49511de0, "07650690295365319082", 20, 0xe37b5c6c},
|
|
||||||
+ {__LINE__, 0x3db13bc1, "42655507906821911703", 20, 0x7cc497f1},
|
|
||||||
+ {__LINE__, 0xbb899bea, "29977409200786225655", 20, 0x99781bb2},
|
|
||||||
+ {__LINE__, 0xf6cd9436, "85181542907229116674", 20, 0x132256a1},
|
|
||||||
+ {__LINE__, 0x9109e6c3, "87963594337989416799", 20, 0xbfdb2c83},
|
|
||||||
+ {__LINE__, 0x75770fc, "21395988329504168551", 20, 0x8d9d1e81},
|
|
||||||
+ {__LINE__, 0x69b1d19b, "51991013580943379423", 20, 0x7b6d4404},
|
|
||||||
+ {__LINE__, 0xc6132975, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x8619f010},
|
|
||||||
+ {__LINE__, 0xd58cb00c, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0x15746ac3},
|
|
||||||
+ {__LINE__, 0xb63b8caa, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xaccf812f},
|
|
||||||
+ {__LINE__, 0x8a45a2b8, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0x78af45de},
|
|
||||||
+ {__LINE__, 0xcbe95b78, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0x25b06b59},
|
|
||||||
+ {__LINE__, 0x4ef8a54b, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x4ba0d08f},
|
|
||||||
+ {__LINE__, 0x76ad267a, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0xe26b6aac},
|
|
||||||
+ {__LINE__, 0x569e613c, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x7e2b0a66},
|
|
||||||
+ {__LINE__, 0x36aa61da, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xb3430dc7},
|
|
||||||
+ {__LINE__, 0xf67222df, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0x626c17a},
|
|
||||||
+ {__LINE__, 0x74b34fd3, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xccf98060},
|
|
||||||
+ {__LINE__, 0x351fd770, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xd8b95312},
|
|
||||||
+ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xbb1c9912},
|
|
||||||
+ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
||||||
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
||||||
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
||||||
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
||||||
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&"
|
|
||||||
+ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 600, 0x888AFA5B}
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static const int test_size = sizeof(tests) / sizeof(tests[0]);
|
|
||||||
+
|
|
||||||
+int main(void)
|
|
||||||
+{
|
|
||||||
+ int i;
|
|
||||||
+ for (i = 0; i < test_size; i++) {
|
|
||||||
+ test_crc32(tests[i].crc, tests[i].buf, tests[i].len,
|
|
||||||
+ tests[i].expect, tests[i].line);
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
\ No newline at end of file
|
|
||||||
--
|
|
||||||
2.19.1
|
|
||||||
|
|
@ -0,0 +1,428 @@
|
|||||||
|
From cfbf97cb54a6d06a80e86c85869331e4e2871129 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ilya Leoshkevich <iii@linux.ibm.com>
|
||||||
|
Date: Thu, 19 Mar 2020 11:52:03 +0100
|
||||||
|
Subject: [PATCH] s390x: vectorize crc32
|
||||||
|
|
||||||
|
Use vector extensions when compiling for s390x and binutils knows
|
||||||
|
about them. At runtime, check whether kernel supports vector
|
||||||
|
extensions (it has to be not just the CPU, but also the kernel) and
|
||||||
|
choose between the regular and the vectorized implementations.
|
||||||
|
---
|
||||||
|
Makefile.in | 9 ++
|
||||||
|
configure | 28 +++++
|
||||||
|
contrib/gcc/zifunc.h | 21 +++-
|
||||||
|
contrib/s390/crc32-vx.c | 195 ++++++++++++++++++++++++++++++++
|
||||||
|
contrib/s390/crc32_z_resolver.c | 41 +++++++
|
||||||
|
crc32.c | 11 +-
|
||||||
|
6 files changed, 301 insertions(+), 4 deletions(-)
|
||||||
|
create mode 100644 contrib/s390/crc32-vx.c
|
||||||
|
create mode 100644 contrib/s390/crc32_z_resolver.c
|
||||||
|
|
||||||
|
diff --git a/Makefile.in b/Makefile.in
|
||||||
|
index d392616..63f76da 100644
|
||||||
|
--- a/Makefile.in
|
||||||
|
+++ b/Makefile.in
|
||||||
|
@@ -29,6 +29,7 @@ LDFLAGS=
|
||||||
|
TEST_LDFLAGS=-L. libz.a
|
||||||
|
LDSHARED=$(CC)
|
||||||
|
CPP=$(CC) -E
|
||||||
|
+VGFMAFLAG=
|
||||||
|
|
||||||
|
STATICLIB=libz.a
|
||||||
|
SHAREDLIB=libz.so
|
||||||
|
@@ -179,6 +180,9 @@ crc32.o: $(SRCDIR)crc32.c
|
||||||
|
crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c
|
||||||
|
$(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c
|
||||||
|
|
||||||
|
+crc32-vx.o: $(SRCDIR)contrib/s390/crc32-vx.c
|
||||||
|
+ $(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/crc32-vx.c
|
||||||
|
+
|
||||||
|
deflate.o: $(SRCDIR)deflate.c
|
||||||
|
$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
|
||||||
|
|
||||||
|
@@ -229,6 +233,11 @@ crc32.lo: $(SRCDIR)crc32.c
|
||||||
|
$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
|
||||||
|
-@mv objs/crc32.o $@
|
||||||
|
|
||||||
|
+crc32-vx.lo: $(SRCDIR)contrib/s390/crc32-vx.c
|
||||||
|
+ -@mkdir objs 2>/dev/null || test -d objs
|
||||||
|
+ $(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32-vx.o $(SRCDIR)contrib/s390/crc32-vx.c
|
||||||
|
+ -@mv objs/crc32-vx.o $@
|
||||||
|
+
|
||||||
|
crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c
|
||||||
|
-@mkdir objs 2>/dev/null || test -d objs
|
||||||
|
$(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c
|
||||||
|
diff --git a/configure b/configure
|
||||||
|
index e37dac8..a4606b8 100755
|
||||||
|
--- a/configure
|
||||||
|
+++ b/configure
|
||||||
|
@@ -915,6 +915,32 @@ else
|
||||||
|
echo "Checking for Power optimizations support... No." | tee -a configure.log
|
||||||
|
fi
|
||||||
|
|
||||||
|
+# check if we are compiling for s390 and binutils support vector extensions
|
||||||
|
+VGFMAFLAG=-march=z13
|
||||||
|
+cat > $test.c <<EOF
|
||||||
|
+#ifndef __s390__
|
||||||
|
+#error
|
||||||
|
+#endif
|
||||||
|
+EOF
|
||||||
|
+if try $CC -c $CFLAGS $VGFMAFLAG $test.c; then
|
||||||
|
+ CFLAGS="$CFLAGS -DHAVE_S390X_VX"
|
||||||
|
+ SFLAGS="$SFLAGS -DHAVE_S390X_VX"
|
||||||
|
+ OBJC="$OBJC crc32-vx.o"
|
||||||
|
+ PIC_OBJC="$PIC_OBJC crc32-vx.lo"
|
||||||
|
+ echo "Checking for s390 vector extensions... Yes." | tee -a configure.log
|
||||||
|
+
|
||||||
|
+ for flag in -mzarch -fzvector; do
|
||||||
|
+ if try $CC -c $CFLAGS $VGFMAFLAG $flag $test.c; then
|
||||||
|
+ VGFMAFLAG="$VGFMAFLAG $flag"
|
||||||
|
+ echo "Checking for $flag... Yes." | tee -a configure.log
|
||||||
|
+ else
|
||||||
|
+ echo "Checking for $flag... No." | tee -a configure.log
|
||||||
|
+ fi
|
||||||
|
+ done
|
||||||
|
+else
|
||||||
|
+ echo "Checking for s390 vector extensions... No." | tee -a configure.log
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
# show the results in the log
|
||||||
|
echo >> configure.log
|
||||||
|
echo ALL = $ALL >> configure.log
|
||||||
|
@@ -947,6 +973,7 @@ echo mandir = $mandir >> configure.log
|
||||||
|
echo prefix = $prefix >> configure.log
|
||||||
|
echo sharedlibdir = $sharedlibdir >> configure.log
|
||||||
|
echo uname = $uname >> configure.log
|
||||||
|
+echo VGFMAFLAG = $VGFMAFLAG >> configure.log
|
||||||
|
|
||||||
|
# udpate Makefile with the configure results
|
||||||
|
sed < ${SRCDIR}Makefile.in "
|
||||||
|
@@ -956,6 +983,7 @@ sed < ${SRCDIR}Makefile.in "
|
||||||
|
/^LDFLAGS *=/s#=.*#=$LDFLAGS#
|
||||||
|
/^LDSHARED *=/s#=.*#=$LDSHARED#
|
||||||
|
/^CPP *=/s#=.*#=$CPP#
|
||||||
|
+/^VGFMAFLAG *=/s#=.*#=$VGFMAFLAG#
|
||||||
|
/^STATICLIB *=/s#=.*#=$STATICLIB#
|
||||||
|
/^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
|
||||||
|
/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#
|
||||||
|
diff --git a/contrib/gcc/zifunc.h b/contrib/gcc/zifunc.h
|
||||||
|
index daf4fe4..b62379e 100644
|
||||||
|
--- a/contrib/gcc/zifunc.h
|
||||||
|
+++ b/contrib/gcc/zifunc.h
|
||||||
|
@@ -8,9 +8,28 @@
|
||||||
|
|
||||||
|
/* Helpers for arch optimizations */
|
||||||
|
|
||||||
|
+#if defined(__clang__)
|
||||||
|
+#if __has_feature(coverage_sanitizer)
|
||||||
|
+#define Z_IFUNC_NO_SANCOV __attribute__((no_sanitize("coverage")))
|
||||||
|
+#else /* __has_feature(coverage_sanitizer) */
|
||||||
|
+#define Z_IFUNC_NO_SANCOV
|
||||||
|
+#endif /* __has_feature(coverage_sanitizer) */
|
||||||
|
+#else /* __clang__ */
|
||||||
|
+#define Z_IFUNC_NO_SANCOV
|
||||||
|
+#endif /* __clang__ */
|
||||||
|
+
|
||||||
|
+#ifdef __s390__
|
||||||
|
+#define Z_IFUNC_PARAMS unsigned long hwcap
|
||||||
|
+#define Z_IFUNC_ATTRS Z_IFUNC_NO_SANCOV
|
||||||
|
+#else /* __s390__ */
|
||||||
|
+#define Z_IFUNC_PARAMS void
|
||||||
|
+#define Z_IFUNC_ATTRS
|
||||||
|
+#endif /* __s390__ */
|
||||||
|
+
|
||||||
|
#define Z_IFUNC(fname) \
|
||||||
|
typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \
|
||||||
|
- local typeof(fname) *fname##_resolver(void)
|
||||||
|
+ Z_IFUNC_ATTRS \
|
||||||
|
+ local typeof(fname) *fname##_resolver(Z_IFUNC_PARAMS)
|
||||||
|
/* This is a helper macro to declare a resolver for an indirect function
|
||||||
|
* (ifunc). Let's say you have function
|
||||||
|
*
|
||||||
|
diff --git a/contrib/s390/crc32-vx.c b/contrib/s390/crc32-vx.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..fa5387c
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/contrib/s390/crc32-vx.c
|
||||||
|
@@ -0,0 +1,195 @@
|
||||||
|
+/*
|
||||||
|
+ * Hardware-accelerated CRC-32 variants for Linux on z Systems
|
||||||
|
+ *
|
||||||
|
+ * Use the z/Architecture Vector Extension Facility to accelerate the
|
||||||
|
+ * computing of bitreflected CRC-32 checksums.
|
||||||
|
+ *
|
||||||
|
+ * This CRC-32 implementation algorithm is bitreflected and processes
|
||||||
|
+ * the least-significant bit first (Little-Endian).
|
||||||
|
+ *
|
||||||
|
+ * This code was originally written by Hendrik Brueckner
|
||||||
|
+ * <brueckner@linux.vnet.ibm.com> for use in the Linux kernel and has been
|
||||||
|
+ * relicensed under the zlib license.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "../../zutil.h"
|
||||||
|
+
|
||||||
|
+#include <stdint.h>
|
||||||
|
+#include <vecintrin.h>
|
||||||
|
+
|
||||||
|
+typedef unsigned char uv16qi __attribute__((vector_size(16)));
|
||||||
|
+typedef unsigned int uv4si __attribute__((vector_size(16)));
|
||||||
|
+typedef unsigned long long uv2di __attribute__((vector_size(16)));
|
||||||
|
+
|
||||||
|
+uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, size_t len) {
|
||||||
|
+ /*
|
||||||
|
+ * The CRC-32 constant block contains reduction constants to fold and
|
||||||
|
+ * process particular chunks of the input data stream in parallel.
|
||||||
|
+ *
|
||||||
|
+ * For the CRC-32 variants, the constants are precomputed according to
|
||||||
|
+ * these definitions:
|
||||||
|
+ *
|
||||||
|
+ * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
|
||||||
|
+ * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
|
||||||
|
+ * R3 = [(x128+32 mod P'(x) << 32)]' << 1
|
||||||
|
+ * R4 = [(x128-32 mod P'(x) << 32)]' << 1
|
||||||
|
+ * R5 = [(x64 mod P'(x) << 32)]' << 1
|
||||||
|
+ * R6 = [(x32 mod P'(x) << 32)]' << 1
|
||||||
|
+ *
|
||||||
|
+ * The bitreflected Barret reduction constant, u', is defined as
|
||||||
|
+ * the bit reversal of floor(x**64 / P(x)).
|
||||||
|
+ *
|
||||||
|
+ * where P(x) is the polynomial in the normal domain and the P'(x) is the
|
||||||
|
+ * polynomial in the reversed (bitreflected) domain.
|
||||||
|
+ *
|
||||||
|
+ * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
|
||||||
|
+ *
|
||||||
|
+ * P(x) = 0x04C11DB7
|
||||||
|
+ * P'(x) = 0xEDB88320
|
||||||
|
+ */
|
||||||
|
+ const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */
|
||||||
|
+ const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */
|
||||||
|
+ const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */
|
||||||
|
+ const uv2di r5 = {0, 0x163CD6124}; /* R5 */
|
||||||
|
+ const uv2di ru_poly = {0, 0x1F7011641}; /* u' */
|
||||||
|
+ const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Load the initial CRC value.
|
||||||
|
+ *
|
||||||
|
+ * The CRC value is loaded into the rightmost word of the
|
||||||
|
+ * vector register and is later XORed with the LSB portion
|
||||||
|
+ * of the loaded input data.
|
||||||
|
+ */
|
||||||
|
+ uv2di v0 = {0, 0};
|
||||||
|
+ v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3);
|
||||||
|
+
|
||||||
|
+ /* Load a 64-byte data chunk and XOR with CRC */
|
||||||
|
+ uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be);
|
||||||
|
+ uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be);
|
||||||
|
+ uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be);
|
||||||
|
+ uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be);
|
||||||
|
+
|
||||||
|
+ v1 ^= v0;
|
||||||
|
+ buf += 64;
|
||||||
|
+ len -= 64;
|
||||||
|
+
|
||||||
|
+ while (len >= 64) {
|
||||||
|
+ /* Load the next 64-byte data chunk */
|
||||||
|
+ uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be);
|
||||||
|
+ uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be);
|
||||||
|
+ uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be);
|
||||||
|
+ uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Perform a GF(2) multiplication of the doublewords in V1 with
|
||||||
|
+ * the R1 and R2 reduction constants in V0. The intermediate result
|
||||||
|
+ * is then folded (accumulated) with the next data chunk in PART1 and
|
||||||
|
+ * stored in V1. Repeat this step for the register contents
|
||||||
|
+ * in V2, V3, and V4 respectively.
|
||||||
|
+ */
|
||||||
|
+ v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1);
|
||||||
|
+ v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2);
|
||||||
|
+ v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3);
|
||||||
|
+ v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4);
|
||||||
|
+
|
||||||
|
+ buf += 64;
|
||||||
|
+ len -= 64;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
|
||||||
|
+ * and R4 and accumulating the next 128-bit chunk until a single 128-bit
|
||||||
|
+ * value remains.
|
||||||
|
+ */
|
||||||
|
+ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2);
|
||||||
|
+ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3);
|
||||||
|
+ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4);
|
||||||
|
+
|
||||||
|
+ while (len >= 16) {
|
||||||
|
+ /* Load next data chunk */
|
||||||
|
+ v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be);
|
||||||
|
+
|
||||||
|
+ /* Fold next data chunk */
|
||||||
|
+ v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2);
|
||||||
|
+
|
||||||
|
+ buf += 16;
|
||||||
|
+ len -= 16;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Set up a vector register for byte shifts. The shift value must
|
||||||
|
+ * be loaded in bits 1-4 in byte element 7 of a vector register.
|
||||||
|
+ * Shift by 8 bytes: 0x40
|
||||||
|
+ * Shift by 4 bytes: 0x20
|
||||||
|
+ */
|
||||||
|
+ uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
|
+ v9 = vec_insert((unsigned char)0x40, v9, 7);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
|
||||||
|
+ * to move R4 into the rightmost doubleword and set the leftmost
|
||||||
|
+ * doubleword to 0x1.
|
||||||
|
+ */
|
||||||
|
+ v0 = vec_srb(r4r3, (uv2di)v9);
|
||||||
|
+ v0[0] = 1;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Compute GF(2) product of V1 and V0. The rightmost doubleword
|
||||||
|
+ * of V1 is multiplied with R4. The leftmost doubleword of V1 is
|
||||||
|
+ * multiplied by 0x1 and is then XORed with rightmost product.
|
||||||
|
+ * Implicitly, the intermediate leftmost product becomes padded
|
||||||
|
+ */
|
||||||
|
+ v1 = (uv2di)vec_gfmsum_128(v0, v1);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Now do the final 32-bit fold by multiplying the rightmost word
|
||||||
|
+ * in V1 with R5 and XOR the result with the remaining bits in V1.
|
||||||
|
+ *
|
||||||
|
+ * To achieve this by a single VGFMAG, right shift V1 by a word
|
||||||
|
+ * and store the result in V2 which is then accumulated. Use the
|
||||||
|
+ * vector unpack instruction to load the rightmost half of the
|
||||||
|
+ * doubleword into the rightmost doubleword element of V1; the other
|
||||||
|
+ * half is loaded in the leftmost doubleword.
|
||||||
|
+ * The vector register with CONST_R5 contains the R5 constant in the
|
||||||
|
+ * rightmost doubleword and the leftmost doubleword is zero to ignore
|
||||||
|
+ * the leftmost product of V1.
|
||||||
|
+ */
|
||||||
|
+ v9 = vec_insert((unsigned char)0x20, v9, 7);
|
||||||
|
+ v2 = vec_srb(v1, (uv2di)v9);
|
||||||
|
+ v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */
|
||||||
|
+ v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Apply a Barret reduction to compute the final 32-bit CRC value.
|
||||||
|
+ *
|
||||||
|
+ * The input values to the Barret reduction are the degree-63 polynomial
|
||||||
|
+ * in V1 (R(x)), degree-32 generator polynomial, and the reduction
|
||||||
|
+ * constant u. The Barret reduction result is the CRC value of R(x) mod
|
||||||
|
+ * P(x).
|
||||||
|
+ *
|
||||||
|
+ * The Barret reduction algorithm is defined as:
|
||||||
|
+ *
|
||||||
|
+ * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
|
||||||
|
+ * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
|
||||||
|
+ * 3. C(x) = R(x) XOR T2(x) mod x^32
|
||||||
|
+ *
|
||||||
|
+ * Note: The leftmost doubleword of vector register containing
|
||||||
|
+ * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
|
||||||
|
+ * is zero and does not contribute to the final result.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+ /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
|
||||||
|
+ v2 = vec_unpackl((uv4si)v1);
|
||||||
|
+ v2 = (uv2di)vec_gfmsum_128(ru_poly, v2);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Compute the GF(2) product of the CRC polynomial with T1(x) in
|
||||||
|
+ * V2 and XOR the intermediate result, T2(x), with the value in V1.
|
||||||
|
+ * The final result is stored in word element 2 of V2.
|
||||||
|
+ */
|
||||||
|
+ v2 = vec_unpackl((uv4si)v2);
|
||||||
|
+ v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1);
|
||||||
|
+
|
||||||
|
+ return ((uv4si)v2)[2];
|
||||||
|
+}
|
||||||
|
diff --git a/contrib/s390/crc32_z_resolver.c b/contrib/s390/crc32_z_resolver.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..9749cab
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/contrib/s390/crc32_z_resolver.c
|
||||||
|
@@ -0,0 +1,41 @@
|
||||||
|
+#include <sys/auxv.h>
|
||||||
|
+#include "../gcc/zifunc.h"
|
||||||
|
+
|
||||||
|
+#define VX_MIN_LEN 64
|
||||||
|
+#define VX_ALIGNMENT 16L
|
||||||
|
+#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
|
||||||
|
+
|
||||||
|
+unsigned int crc32_le_vgfm_16(unsigned int crc, const unsigned char FAR *buf, z_size_t len);
|
||||||
|
+
|
||||||
|
+local unsigned long s390_crc32_vx(unsigned long crc, const unsigned char FAR *buf, z_size_t len)
|
||||||
|
+{
|
||||||
|
+ uintptr_t prealign, aligned, remaining;
|
||||||
|
+
|
||||||
|
+ if (buf == Z_NULL) return 0UL;
|
||||||
|
+
|
||||||
|
+ if (len < VX_MIN_LEN + VX_ALIGN_MASK)
|
||||||
|
+ return crc32_z_default(crc, buf, len);
|
||||||
|
+
|
||||||
|
+ if ((uintptr_t)buf & VX_ALIGN_MASK) {
|
||||||
|
+ prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK);
|
||||||
|
+ len -= prealign;
|
||||||
|
+ crc = crc32_z_default(crc, buf, prealign);
|
||||||
|
+ buf += prealign;
|
||||||
|
+ }
|
||||||
|
+ aligned = len & ~VX_ALIGN_MASK;
|
||||||
|
+ remaining = len & VX_ALIGN_MASK;
|
||||||
|
+
|
||||||
|
+ crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff;
|
||||||
|
+
|
||||||
|
+ if (remaining)
|
||||||
|
+ crc = crc32_z_default(crc, buf + aligned, remaining);
|
||||||
|
+
|
||||||
|
+ return crc;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+Z_IFUNC(crc32_z)
|
||||||
|
+{
|
||||||
|
+ if (hwcap & HWCAP_S390_VX)
|
||||||
|
+ return s390_crc32_vx;
|
||||||
|
+ return crc32_z_default;
|
||||||
|
+}
|
||||||
|
diff --git a/crc32.c b/crc32.c
|
||||||
|
index b0cda20..379fac3 100644
|
||||||
|
--- a/crc32.c
|
||||||
|
+++ b/crc32.c
|
||||||
|
@@ -199,12 +199,12 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
|
||||||
|
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
||||||
|
|
||||||
|
/* ========================================================================= */
|
||||||
|
-#ifdef Z_POWER_OPT
|
||||||
|
+#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX)
|
||||||
|
/* Rename function so resolver can use its symbol. The default version will be
|
||||||
|
* returned by the resolver if the host has no support for an optimized version.
|
||||||
|
*/
|
||||||
|
#define crc32_z crc32_z_default
|
||||||
|
-#endif /* Z_POWER_OPT */
|
||||||
|
+#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */
|
||||||
|
|
||||||
|
unsigned long ZEXPORT crc32_z(crc, buf, len)
|
||||||
|
unsigned long crc;
|
||||||
|
@@ -240,10 +240,15 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
|
||||||
|
return crc ^ 0xffffffffUL;
|
||||||
|
}
|
||||||
|
|
||||||
|
-#ifdef Z_POWER_OPT
|
||||||
|
+#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX)
|
||||||
|
#undef crc32_z
|
||||||
|
+#ifdef Z_POWER_OPT
|
||||||
|
#include "contrib/power/crc32_z_resolver.c"
|
||||||
|
#endif /* Z_POWER_OPT */
|
||||||
|
+#ifdef HAVE_S390X_VX
|
||||||
|
+#include "contrib/s390/crc32_z_resolver.c"
|
||||||
|
+#endif /* HAVE_S390X_VX */
|
||||||
|
+#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */
|
||||||
|
|
||||||
|
/* ========================================================================= */
|
||||||
|
unsigned long ZEXPORT crc32(crc, buf, len)
|
||||||
|
--
|
||||||
|
2.39.1
|
||||||
|
|
Loading…
Reference in new issue