You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
401 lines
13 KiB
401 lines
13 KiB
1 month ago
|
From 170b3e6e07613f83afa320bec3e0e0e6da53f583 Mon Sep 17 00:00:00 2001
|
||
|
From: Sameera Deshpande <sameera.deshpande@linaro.org>
|
||
|
Date: Fri, 15 Feb 2019 07:46:16 +0530
|
||
|
Subject: [PATCH 1/8] Add support for FNMADD and FNMSUB.
|
||
|
|
||
|
---
|
||
|
src/lj_asm_arm64.h | 32 +++++++++++++++++++++++++++++++-
|
||
|
1 file changed, 31 insertions(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
|
||
|
index 5b40f4cc..cc842b53 100644
|
||
|
--- a/src/lj_asm_arm64.h
|
||
|
+++ b/src/lj_asm_arm64.h
|
||
|
@@ -361,6 +361,35 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+/* Fuse FP neg-multiply-add/sub. */
|
||
|
+static int asm_fusenmadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
|
||
|
+{
|
||
|
+ IRRef ref = ir->op1;
|
||
|
+ IRIns *irn = IR(ref);
|
||
|
+ if (irn->o != IR_ADD && irn->o != IR_SUB)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ if (!mayfuse(as, ref))
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ IRRef lref = irn->op1, rref = irn->op2;
|
||
|
+ IRIns *irm;
|
||
|
+ if (lref != rref &&
|
||
|
+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
|
||
|
+ ra_noreg(irm->r)) ||
|
||
|
+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
|
||
|
+ (rref = lref, ra_noreg(irm->r))))) {
|
||
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
||
|
+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
|
||
|
+ Reg left = ra_alloc2(as, irm,
|
||
|
+ rset_exclude(rset_exclude(RSET_FPR, dest), add));
|
||
|
+ Reg right = (left >> 8); left &= 255;
|
||
|
+ emit_dnma(as, (irn->o == IR_ADD ? ai : air), (dest & 31), (left & 31), (right & 31), (add & 31));
|
||
|
+ return 1;
|
||
|
+ }
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
/* Fuse BAND + BSHL/BSHR into UBFM. */
|
||
|
static int asm_fuseandshift(ASMState *as, IRIns *ir)
|
||
|
{
|
||
|
@@ -1461,7 +1490,8 @@ static void asm_mul(ASMState *as, IRIns *ir)
|
||
|
static void asm_neg(ASMState *as, IRIns *ir)
|
||
|
{
|
||
|
if (irt_isnum(ir->t)) {
|
||
|
- asm_fpunary(as, ir, A64I_FNEGd);
|
||
|
+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
|
||
|
+ asm_fpunary(as, ir, A64I_FNEGd);
|
||
|
return;
|
||
|
}
|
||
|
asm_intneg(as, ir);
|
||
|
--
|
||
|
2.43.2
|
||
|
|
||
|
|
||
|
From 09b3c908b4e6397655e0c476ca7d3528d2b4773d Mon Sep 17 00:00:00 2001
|
||
|
From: Vivien HENRIET <bubuabu@bubuabu.org>
|
||
|
Date: Wed, 30 Jan 2019 23:44:51 +0100
|
||
|
Subject: [PATCH 2/8] Fix os.date() for timezone change awareness
|
||
|
|
||
|
On POSIX target, system timezone change are not taken into account.
|
||
|
To reproduce,
|
||
|
1. call os.date()
|
||
|
2. change your timezone
|
||
|
3. call os.date() within the same luajit instance
|
||
|
|
||
|
On POSIX target, os.date use localtime_r to retrieve time.
|
||
|
On other target, the function localtime is used. But there is a behaviour
|
||
|
diference between these two function. localtime acts as if it called tzset
|
||
|
which localtime_r don't.
|
||
|
|
||
|
To fix the issue tzset is called before localtime_r.
|
||
|
---
|
||
|
src/lib_os.c | 1 +
|
||
|
1 file changed, 1 insertion(+)
|
||
|
|
||
|
diff --git a/src/lib_os.c b/src/lib_os.c
|
||
|
index cf0df281..d9dda853 100644
|
||
|
--- a/src/lib_os.c
|
||
|
+++ b/src/lib_os.c
|
||
|
@@ -185,6 +185,7 @@ LJLIB_CF(os_date)
|
||
|
#endif
|
||
|
} else {
|
||
|
#if LJ_TARGET_POSIX
|
||
|
+ tzset();
|
||
|
stm = localtime_r(&t, &rtm);
|
||
|
#else
|
||
|
stm = localtime(&t);
|
||
|
--
|
||
|
2.43.2
|
||
|
|
||
|
|
||
|
From 78ec3dbc5a3f6c0198e3dbe7901d80f7feb77344 Mon Sep 17 00:00:00 2001
|
||
|
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||
|
Date: Thu, 14 Mar 2019 23:08:24 +0530
|
||
|
Subject: [PATCH 3/8] Revert "FFI: Make FP to U64 conversions match JIT backend
|
||
|
behavior."
|
||
|
|
||
|
This reverts commit f5d424afe8b9395f0df05aba905e0e1f6a2262b8.
|
||
|
|
||
|
The patch breaks test 279, i.e.
|
||
|
|
||
|
assert(tostring(bit.band(1ll, 1, 1ull, -1)) == "1ULL")
|
||
|
|
||
|
The patch was put in to make the JIT and interpreter behaviour
|
||
|
consistent[1] for float to unsigned int conversions but it ended up
|
||
|
making things worse. There needs to be a better fix for this.
|
||
|
|
||
|
[1] https://github.com/LuaJIT/LuaJIT/pull/415
|
||
|
---
|
||
|
src/lj_obj.h | 18 +++++-------------
|
||
|
1 file changed, 5 insertions(+), 13 deletions(-)
|
||
|
|
||
|
diff --git a/src/lj_obj.h b/src/lj_obj.h
|
||
|
index 2d4386e1..d40f7264 100644
|
||
|
--- a/src/lj_obj.h
|
||
|
+++ b/src/lj_obj.h
|
||
|
@@ -996,22 +996,14 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
|
||
|
|
||
|
#define lj_num2int(n) ((int32_t)(n))
|
||
|
|
||
|
-/*
|
||
|
-** This must match the JIT backend behavior. In particular for archs
|
||
|
-** that don't have a common hardware instruction for this conversion.
|
||
|
-** Note that signed FP to unsigned int conversions have an undefined
|
||
|
-** result and should never be relied upon in portable FFI code.
|
||
|
-** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
|
||
|
-*/
|
||
|
static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
|
||
|
{
|
||
|
-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
|
||
|
- int64_t i = (int64_t)n;
|
||
|
- if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
|
||
|
- return (uint64_t)i;
|
||
|
-#else
|
||
|
- return (uint64_t)n;
|
||
|
+#ifdef _MSC_VER
|
||
|
+ if (n >= 9223372036854775808.0) /* They think it's a feature. */
|
||
|
+ return (uint64_t)(int64_t)(n - 18446744073709551616.0);
|
||
|
+ else
|
||
|
#endif
|
||
|
+ return (uint64_t)n;
|
||
|
}
|
||
|
|
||
|
static LJ_AINLINE int32_t numberVint(cTValue *o)
|
||
|
--
|
||
|
2.43.2
|
||
|
|
||
|
|
||
|
From b71187a97405c03c64caa9a295e94c7708ffab35 Mon Sep 17 00:00:00 2001
|
||
|
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||
|
Date: Sun, 17 Mar 2019 11:34:04 +0530
|
||
|
Subject: [PATCH 4/8] Guard against undefined behaviour when casting from float
|
||
|
to unsigned
|
||
|
|
||
|
Only range (-1.0, UINT64_MAX) can be safely converted to unsigned
|
||
|
directly, and (-INT64_MAX,INT_64_MAX) through a cast to int64_t first.
|
||
|
The remaining range is undefined.
|
||
|
|
||
|
TODO: Do the same for JIT as well as for float to other ranges.
|
||
|
---
|
||
|
src/lj_obj.h | 8 +++++++-
|
||
|
1 file changed, 7 insertions(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/src/lj_obj.h b/src/lj_obj.h
|
||
|
index d40f7264..f79cd02c 100644
|
||
|
--- a/src/lj_obj.h
|
||
|
+++ b/src/lj_obj.h
|
||
|
@@ -998,12 +998,18 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
|
||
|
|
||
|
static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
|
||
|
{
|
||
|
+ /* Undefined behaviour. This is deliberately not a full check because we
|
||
|
+ don't want to slow down compliant code. */
|
||
|
+ lj_assertX(n >= -9223372036854775809.0, "Overflow");
|
||
|
#ifdef _MSC_VER
|
||
|
if (n >= 9223372036854775808.0) /* They think it's a feature. */
|
||
|
return (uint64_t)(int64_t)(n - 18446744073709551616.0);
|
||
|
else
|
||
|
#endif
|
||
|
- return (uint64_t)n;
|
||
|
+ if (n > -1.0)
|
||
|
+ return (uint64_t)n;
|
||
|
+ else
|
||
|
+ return (uint64_t)(int64_t)n;
|
||
|
}
|
||
|
|
||
|
static LJ_AINLINE int32_t numberVint(cTValue *o)
|
||
|
--
|
||
|
2.43.2
|
||
|
|
||
|
|
||
|
From 19908a818ccd8d5888bab6f4a4d518701570eaf8 Mon Sep 17 00:00:00 2001
|
||
|
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||
|
Date: Mon, 25 Mar 2019 17:56:53 +0530
|
||
|
Subject: [PATCH 5/8] Fix build erro with fnmsub fusing
|
||
|
|
||
|
---
|
||
|
src/lj_asm_arm64.h | 2 +-
|
||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
|
||
|
index cc842b53..d56a376a 100644
|
||
|
--- a/src/lj_asm_arm64.h
|
||
|
+++ b/src/lj_asm_arm64.h
|
||
|
@@ -1490,7 +1490,7 @@ static void asm_mul(ASMState *as, IRIns *ir)
|
||
|
static void asm_neg(ASMState *as, IRIns *ir)
|
||
|
{
|
||
|
if (irt_isnum(ir->t)) {
|
||
|
- if (!asm_fusenmadd(as, ir, A64I_FNMADDd))
|
||
|
+ if (!asm_fusenmadd(as, ir, A64I_FNMADDd, A64I_FNMSUBd))
|
||
|
asm_fpunary(as, ir, A64I_FNEGd);
|
||
|
return;
|
||
|
}
|
||
|
--
|
||
|
2.43.2
|
||
|
|
||
|
|
||
|
From e8279fb7d556553adb6f645f481ba399f144c80b Mon Sep 17 00:00:00 2001
|
||
|
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||
|
Date: Thu, 28 Mar 2019 09:19:34 +0530
|
||
|
Subject: [PATCH 6/8] aarch64: better float to unsigned int conversion
|
||
|
|
||
|
A straight float to unsigned conversion has a limited range of (-1.0,
|
||
|
UTYPE_MAX) which should be fine in general but for the sake of
|
||
|
consistency across the interpreter and the JIT compiler, it is
|
||
|
necessary to work a wee bit harder to expand this range to (TYPE_MIN,
|
||
|
UTYPE_MAX), which can be done with a simple range check. This adds a
|
||
|
couple of branches but only one of the branches should have a
|
||
|
noticeable performance impact on most processors with branch
|
||
|
predictors, and that too only if the input number varies wildly in
|
||
|
range.
|
||
|
|
||
|
This currently works only for 64-bit conversions, 32-bit is still WIP.
|
||
|
---
|
||
|
src/lj_asm_arm64.h | 30 ++++++++++++++++++++++--------
|
||
|
src/lj_target_arm64.h | 1 +
|
||
|
2 files changed, 23 insertions(+), 8 deletions(-)
|
||
|
|
||
|
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
|
||
|
index d56a376a..a1b44ec2 100644
|
||
|
--- a/src/lj_asm_arm64.h
|
||
|
+++ b/src/lj_asm_arm64.h
|
||
|
@@ -665,14 +665,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
|
||
|
} else {
|
||
|
Reg left = ra_alloc1(as, lref, RSET_FPR);
|
||
|
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||
|
- A64Ins ai = irt_is64(ir->t) ?
|
||
|
- (st == IRT_NUM ?
|
||
|
- (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
|
||
|
- (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
|
||
|
- (st == IRT_NUM ?
|
||
|
- (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
|
||
|
- (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
|
||
|
- emit_dn(as, ai, dest, (left & 31));
|
||
|
+
|
||
|
+ A64Ins ai_signed = st == IRT_NUM ?
|
||
|
+ (irt_is64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_S32_F64) :
|
||
|
+ (irt_is64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_S32_F32);
|
||
|
+
|
||
|
+ if (irt_isi64(ir->t) || irt_isint(ir->t))
|
||
|
+ emit_dn(as, ai_signed, dest, (left & 31));
|
||
|
+ else {
|
||
|
+ A64Ins ai_unsigned = st == IRT_NUM ?
|
||
|
+ (irt_is64(ir->t) ? A64I_FCVT_U64_F64 : A64I_FCVT_U32_F64) :
|
||
|
+ (irt_is64(ir->t) ? A64I_FCVT_U64_F32 : A64I_FCVT_U32_F32);
|
||
|
+
|
||
|
+ MCLabel l_done = emit_label(as);
|
||
|
+ emit_dn(as, ai_unsigned, dest, (left & 31));
|
||
|
+ MCLabel l_signed = emit_label(as);
|
||
|
+ emit_jmp(as, l_done);
|
||
|
+ emit_dn(as, ai_signed, dest, (left & 31));
|
||
|
+ /* The valid range for float to unsigned int conversion is (-1.0,
|
||
|
+ UINT{,64}_MAX-1), but we just compare with 0 to save a load. */
|
||
|
+ emit_cond_branch(as, CC_PL, l_signed);
|
||
|
+ emit_nm(as, st == IRT_NUM ? A64I_FCMPZd : A64I_FCMPZs, left & 31, 0);
|
||
|
+ }
|
||
|
}
|
||
|
} else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
|
||
|
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||
|
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
|
||
|
index c34f1e59..1e2f19ea 100644
|
||
|
--- a/src/lj_target_arm64.h
|
||
|
+++ b/src/lj_target_arm64.h
|
||
|
@@ -288,6 +288,7 @@ typedef enum A64Ins {
|
||
|
A64I_STPs = 0x2d000000,
|
||
|
A64I_STPd = 0x6d000000,
|
||
|
A64I_FCMPd = 0x1e602000,
|
||
|
+ A64I_FCMPZs = 0x1e202008,
|
||
|
A64I_FCMPZd = 0x1e602008,
|
||
|
A64I_FCSELd = 0x1e600c00,
|
||
|
A64I_FRINTMd = 0x1e654000,
|
||
|
--
|
||
|
2.43.2
|
||
|
|
||
|
|
||
|
From f37d7e3dac8648771171038c13408811ab3e2694 Mon Sep 17 00:00:00 2001
|
||
|
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||
|
Date: Thu, 28 Mar 2019 10:50:23 +0530
|
||
|
Subject: [PATCH 7/8] Better behaviour for float to uint32_t conversions
|
||
|
|
||
|
This is the uint32_t part of the float to unsigned int conversions for
|
||
|
the interpreter. The cast ends up working correctly for x86 but not
|
||
|
for aarch64 since fcvtzu sets the result to zero on negative inputs.
|
||
|
Work slightly harder to make sure that negative number inputs behave
|
||
|
like x86.
|
||
|
|
||
|
This fixes the interpreter but not the JIT compiler, which errors out
|
||
|
during the narrowing pass.
|
||
|
---
|
||
|
src/lj_cconv.c | 8 +++++++-
|
||
|
1 file changed, 7 insertions(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
|
||
|
index 419a8f45..ebb98521 100644
|
||
|
--- a/src/lj_cconv.c
|
||
|
+++ b/src/lj_cconv.c
|
||
|
@@ -203,7 +203,13 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
|
||
|
else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
|
||
|
else *(int8_t *)dp = (int8_t)i;
|
||
|
} else if (dsize == 4) {
|
||
|
- *(uint32_t *)dp = (uint32_t)n;
|
||
|
+ /* Undefined behaviour. This is deliberately not a full check because we
|
||
|
+ * don't want to slow down compliant code. */
|
||
|
+ lj_assertX(n >= -2147483649.0, "Overflow");
|
||
|
+ if (n > -1.0)
|
||
|
+ *(uint32_t *)dp = (uint32_t)n;
|
||
|
+ else
|
||
|
+ *(uint32_t *)dp = (uint32_t)(int32_t)n;
|
||
|
} else if (dsize == 8) {
|
||
|
if (!(dinfo & CTF_UNSIGNED))
|
||
|
*(int64_t *)dp = (int64_t)n;
|
||
|
--
|
||
|
2.43.2
|
||
|
|
||
|
|
||
|
From 9d79974a1e059f34ad9d2ad38419be34acd6c343 Mon Sep 17 00:00:00 2001
|
||
|
From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej@sury.org>
|
||
|
Date: Thu, 19 Nov 2015 16:29:02 +0200
|
||
|
Subject: [PATCH 8/8] Get rid of LUAJIT_VERSION_SYM that changes ABI on every
|
||
|
patch release
|
||
|
|
||
|
---
|
||
|
src/lj_dispatch.c | 5 -----
|
||
|
src/luajit.c | 1 -
|
||
|
src/luajit_rolling.h | 3 ---
|
||
|
3 files changed, 9 deletions(-)
|
||
|
|
||
|
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
|
||
|
index b9748bba..d09238f8 100644
|
||
|
--- a/src/lj_dispatch.c
|
||
|
+++ b/src/lj_dispatch.c
|
||
|
@@ -318,11 +318,6 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
|
||
|
return 1; /* OK. */
|
||
|
}
|
||
|
|
||
|
-/* Enforce (dynamic) linker error for version mismatches. See luajit.c. */
|
||
|
-LUA_API void LUAJIT_VERSION_SYM(void)
|
||
|
-{
|
||
|
-}
|
||
|
-
|
||
|
/* -- Hooks --------------------------------------------------------------- */
|
||
|
|
||
|
/* This function can be called asynchronously (e.g. during a signal). */
|
||
|
diff --git a/src/luajit.c b/src/luajit.c
|
||
|
index 73e29d44..31fdba18 100644
|
||
|
--- a/src/luajit.c
|
||
|
+++ b/src/luajit.c
|
||
|
@@ -515,7 +515,6 @@ static int pmain(lua_State *L)
|
||
|
int argn;
|
||
|
int flags = 0;
|
||
|
globalL = L;
|
||
|
- LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
|
||
|
|
||
|
argn = collectargs(argv, &flags);
|
||
|
if (argn < 0) { /* Invalid args? */
|
||
|
diff --git a/src/luajit_rolling.h b/src/luajit_rolling.h
|
||
|
index 2d04402c..5ab4167d 100644
|
||
|
--- a/src/luajit_rolling.h
|
||
|
+++ b/src/luajit_rolling.h
|
||
|
@@ -73,8 +73,5 @@ LUA_API void luaJIT_profile_stop(lua_State *L);
|
||
|
LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
|
||
|
int depth, size_t *len);
|
||
|
|
||
|
-/* Enforce (dynamic) linker error for version mismatches. Call from main. */
|
||
|
-LUA_API void LUAJIT_VERSION_SYM(void);
|
||
|
-
|
||
|
#error "DO NOT USE luajit_rolling.h -- only include build-generated luajit.h"
|
||
|
#endif
|
||
|
--
|
||
|
2.43.2
|
||
|
|