201 lines
4.8 KiB
Diff
201 lines
4.8 KiB
Diff
|
From f3ed8bfe8a82af1870ddc8696ed4cc1d5aa6b441 Mon Sep 17 00:00:00 2001
|
||
|
From: Rich Felker <dalias@aerifal.cx>
|
||
|
Date: Mon, 5 Aug 2019 18:41:47 -0400
|
||
|
Subject: [PATCH] fix x87 stack imbalance in corner cases of i386 math asm
|
||
|
|
||
|
commit 31c5fb80b9eae86f801be4f46025bc6532a554c5 introduced underflow
|
||
|
code paths for the i386 math asm, along with checks on the fpu status
|
||
|
word to skip the underflow-generation instructions if the underflow
|
||
|
flag was already raised. unfortunately, at least one such path, in
|
||
|
log1p, returned with 2 items on the x87 stack rather than just 1 item
|
||
|
for the return value. this is a violation of the ABI's calling
|
||
|
convention, and could cause subsequent floating point code to produce
|
||
|
NANs due to x87 stack overflow. if floating point results are used in
|
||
|
flow control, this can lead to runaway wrong code execution.
|
||
|
|
||
|
rather than reviewing each "underflow already raised" code path for
|
||
|
correctness, remove them all. they're likely slower than just
|
||
|
performing the underflow code unconditionally, and significantly more
|
||
|
complex.
|
||
|
|
||
|
all of this code should be ripped out and replaced by C source files
|
||
|
with inline asm. doing so would preclude this kind of error by having
|
||
|
the compiler perform all x87 stack register allocation and stack
|
||
|
manipulation, and would produce comparable or better code. however
|
||
|
such a change is a much larger project.
|
||
|
|
||
|
Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
|
||
|
---
|
||
|
src/math/i386/asin.s | 10 ++--------
|
||
|
src/math/i386/atan.s | 7 ++-----
|
||
|
src/math/i386/atan2.s | 5 +----
|
||
|
src/math/i386/atan2f.s | 5 +----
|
||
|
src/math/i386/atanf.s | 7 ++-----
|
||
|
src/math/i386/exp.s | 10 ++--------
|
||
|
src/math/i386/log1p.s | 7 ++-----
|
||
|
src/math/i386/log1pf.s | 7 ++-----
|
||
|
8 files changed, 14 insertions(+), 44 deletions(-)
|
||
|
|
||
|
diff --git a/src/math/i386/asin.s b/src/math/i386/asin.s
|
||
|
index a9f691bf..920d967a 100644
|
||
|
--- a/src/math/i386/asin.s
|
||
|
+++ b/src/math/i386/asin.s
|
||
|
@@ -7,13 +7,10 @@ asinf:
|
||
|
cmp $0x01000000,%eax
|
||
|
jae 1f
|
||
|
# subnormal x, return x with underflow
|
||
|
- fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 2f
|
||
|
fld %st(0)
|
||
|
fmul %st(1)
|
||
|
fstps 4(%esp)
|
||
|
-2: ret
|
||
|
+ ret
|
||
|
|
||
|
.global asinl
|
||
|
.type asinl,@function
|
||
|
@@ -30,11 +27,8 @@ asin:
|
||
|
cmp $0x00200000,%eax
|
||
|
jae 1f
|
||
|
# subnormal x, return x with underflow
|
||
|
- fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 2f
|
||
|
fsts 4(%esp)
|
||
|
-2: ret
|
||
|
+ ret
|
||
|
1: fld %st(0)
|
||
|
fld1
|
||
|
fsub %st(0),%st(1)
|
||
|
diff --git a/src/math/i386/atan.s b/src/math/i386/atan.s
|
||
|
index d73137b2..a26feae1 100644
|
||
|
--- a/src/math/i386/atan.s
|
||
|
+++ b/src/math/i386/atan.s
|
||
|
@@ -10,8 +10,5 @@ atan:
|
||
|
fpatan
|
||
|
ret
|
||
|
# subnormal x, return x with underflow
|
||
|
-1: fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 2f
|
||
|
- fsts 4(%esp)
|
||
|
-2: ret
|
||
|
+1: fsts 4(%esp)
|
||
|
+ ret
|
||
|
diff --git a/src/math/i386/atan2.s b/src/math/i386/atan2.s
|
||
|
index a7d2979b..1fa0524d 100644
|
||
|
--- a/src/math/i386/atan2.s
|
||
|
+++ b/src/math/i386/atan2.s
|
||
|
@@ -10,8 +10,5 @@ atan2:
|
||
|
cmp $0x00200000,%eax
|
||
|
jae 1f
|
||
|
# subnormal x, return x with underflow
|
||
|
- fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 1f
|
||
|
fsts 4(%esp)
|
||
|
-1: ret
|
||
|
+ ret
|
||
|
diff --git a/src/math/i386/atan2f.s b/src/math/i386/atan2f.s
|
||
|
index 14b88ce5..0b264726 100644
|
||
|
--- a/src/math/i386/atan2f.s
|
||
|
+++ b/src/math/i386/atan2f.s
|
||
|
@@ -10,10 +10,7 @@ atan2f:
|
||
|
cmp $0x01000000,%eax
|
||
|
jae 1f
|
||
|
# subnormal x, return x with underflow
|
||
|
- fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 1f
|
||
|
fld %st(0)
|
||
|
fmul %st(1)
|
||
|
fstps 4(%esp)
|
||
|
-1: ret
|
||
|
+ ret
|
||
|
diff --git a/src/math/i386/atanf.s b/src/math/i386/atanf.s
|
||
|
index 8caddefa..893beac5 100644
|
||
|
--- a/src/math/i386/atanf.s
|
||
|
+++ b/src/math/i386/atanf.s
|
||
|
@@ -10,10 +10,7 @@ atanf:
|
||
|
fpatan
|
||
|
ret
|
||
|
# subnormal x, return x with underflow
|
||
|
-1: fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 2f
|
||
|
- fld %st(0)
|
||
|
+1: fld %st(0)
|
||
|
fmul %st(1)
|
||
|
fstps 4(%esp)
|
||
|
-2: ret
|
||
|
+ ret
|
||
|
diff --git a/src/math/i386/exp.s b/src/math/i386/exp.s
|
||
|
index c7aa5b6e..df87c497 100644
|
||
|
--- a/src/math/i386/exp.s
|
||
|
+++ b/src/math/i386/exp.s
|
||
|
@@ -7,13 +7,10 @@ expm1f:
|
||
|
cmp $0x01000000,%eax
|
||
|
jae 1f
|
||
|
# subnormal x, return x with underflow
|
||
|
- fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 2f
|
||
|
fld %st(0)
|
||
|
fmul %st(1)
|
||
|
fstps 4(%esp)
|
||
|
-2: ret
|
||
|
+ ret
|
||
|
|
||
|
.global expm1l
|
||
|
.type expm1l,@function
|
||
|
@@ -30,11 +27,8 @@ expm1:
|
||
|
cmp $0x00200000,%eax
|
||
|
jae 1f
|
||
|
# subnormal x, return x with underflow
|
||
|
- fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 2f
|
||
|
fsts 4(%esp)
|
||
|
-2: ret
|
||
|
+ ret
|
||
|
1: fldl2e
|
||
|
fmulp
|
||
|
mov $0xc2820000,%eax
|
||
|
diff --git a/src/math/i386/log1p.s b/src/math/i386/log1p.s
|
||
|
index 6b6929c7..354f391a 100644
|
||
|
--- a/src/math/i386/log1p.s
|
||
|
+++ b/src/math/i386/log1p.s
|
||
|
@@ -16,9 +16,6 @@ log1p:
|
||
|
fyl2x
|
||
|
ret
|
||
|
# subnormal x, return x with underflow
|
||
|
-2: fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 1f
|
||
|
- fsts 4(%esp)
|
||
|
+2: fsts 4(%esp)
|
||
|
fstp %st(1)
|
||
|
-1: ret
|
||
|
+ ret
|
||
|
diff --git a/src/math/i386/log1pf.s b/src/math/i386/log1pf.s
|
||
|
index c0bcd30f..4d3484cd 100644
|
||
|
--- a/src/math/i386/log1pf.s
|
||
|
+++ b/src/math/i386/log1pf.s
|
||
|
@@ -16,10 +16,7 @@ log1pf:
|
||
|
fyl2x
|
||
|
ret
|
||
|
# subnormal x, return x with underflow
|
||
|
-2: fnstsw %ax
|
||
|
- and $16,%ax
|
||
|
- jnz 1f
|
||
|
- fxch
|
||
|
+2: fxch
|
||
|
fmul %st(1)
|
||
|
fstps 4(%esp)
|
||
|
-1: ret
|
||
|
+ ret
|
||
|
--
|
||
|
2.11.0
|
||
|
|