From 316ef0d22b6f41722cc71316a6b59e60e7c9576c Mon Sep 17 00:00:00 2001
From: Julian Seward <jseward.tech@gmail.com>
Date: Mon, 11 Mar 2024 17:53:14 +0100
Subject: [PATCH] Handle gcc __builtin_strcmp using 128/256 bit vectors with
sse4.1, avx/avx2
* amd64 front end: redo the translation into IR for PTEST, so as to
use only IROps which we know Memcheck can do exact instrumentation
for. Handling for both the 128- and 256-bit cases is has been
changed.
* ir_opt.c: add some constant folding rules to support the above. In
particular, for the case `ptest %reg, %reg` (the same reg twice), we
want rflags.C to be set to a defined-1 even if %reg is completely
undefined. Doing that requires folding `x and not(x)` to zero when
x has type V128 or V256.
* memcheck/tests/amd64/rh2257546_{128,256}.c: new test cases
https://bugzilla.redhat.com/show_bug.cgi?id=2257546
---
VEX/priv/guest_amd64_toIR.c | 64 +++++++++----------
VEX/priv/ir_opt.c | 34 ++++++++++
memcheck/tests/amd64/Makefile.am | 7 +-
memcheck/tests/amd64/rh2257546_128.c | 32 ++++++++++
memcheck/tests/amd64/rh2257546_128.stderr.exp | 0
memcheck/tests/amd64/rh2257546_128.stdout.exp | 1 +
memcheck/tests/amd64/rh2257546_128.vgtest | 2 +
memcheck/tests/amd64/rh2257546_256.c | 32 ++++++++++
memcheck/tests/amd64/rh2257546_256.stderr.exp | 0
memcheck/tests/amd64/rh2257546_256.stdout.exp | 1 +
memcheck/tests/amd64/rh2257546_256.vgtest | 3 +
11 files changed, 141 insertions(+), 35 deletions(-)
create mode 100644 memcheck/tests/amd64/rh2257546_128.c
create mode 100644 memcheck/tests/amd64/rh2257546_128.stderr.exp
create mode 100644 memcheck/tests/amd64/rh2257546_128.stdout.exp
create mode 100644 memcheck/tests/amd64/rh2257546_128.vgtest
create mode 100644 memcheck/tests/amd64/rh2257546_256.c
create mode 100644 memcheck/tests/amd64/rh2257546_256.stderr.exp
create mode 100644 memcheck/tests/amd64/rh2257546_256.stdout.exp
create mode 100644 memcheck/tests/amd64/rh2257546_256.vgtest
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
index 0414aa5c5..d7c25042d 100644
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -16826,13 +16826,18 @@ static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
{
- /* Set Z=1 iff (vecE & vecG) == 0
- Set C=1 iff (vecE & not vecG) == 0
+ /* Set Z=1 iff (vecE & vecG) == 0--(128)--0
+ Set C=1 iff (vecE & not vecG) == 0--(128)--0
+
+ For the case `sign == 0`, be careful to use only IROps that can be
+ instrumented exactly by memcheck. This is because PTEST is used for
+ __builtin_strcmp in gcc14. See
+ https://bugzilla.redhat.com/show_bug.cgi?id=2257546
*/
/* andV, andnV: vecE & vecG, vecE and not(vecG) */
- /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
+ /* andV resp. andnV, are reduced to 64-bit values by or-ing the top
and bottom 64-bits together. It relies on this trick:
InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
@@ -16862,11 +16867,13 @@ static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
binop(Iop_InterleaveHI64x2,
mkexpr(andnV), mkexpr(andnV)))));
+ // Make z64 and c64 be either all-0s or all-1s
IRTemp z64 = newTemp(Ity_I64);
IRTemp c64 = newTemp(Ity_I64);
+
if (sign == 64) {
- /* When only interested in the most significant bit, just shift
- arithmetically right and negate. */
+ /* When only interested in the most significant bit, just copy bit 63
+ into all bit positions, then invert. */
assign(z64,
unop(Iop_Not64,
binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
@@ -16874,37 +16881,28 @@ static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
assign(c64,
unop(Iop_Not64,
binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
- } else {
- if (sign == 32) {
- /* When interested in bit 31 and bit 63, mask those bits and
- fallthrough into the PTEST handling. */
- IRTemp t0 = newTemp(Ity_I64);
- IRTemp t1 = newTemp(Ity_I64);
- IRTemp t2 = newTemp(Ity_I64);
- assign(t0, mkU64(0x8000000080000000ULL));
- assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
- assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
- and64 = t1;
- andn64 = t2;
- }
- /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
- slice out the Z and C bits conveniently. We use the standard
- trick all-zeroes -> all-zeroes, anything-else -> all-ones
- done by "(x | -x) >>s (word-size - 1)".
- */
+ } else if (sign == 32) {
+ /* If we're interested into bits 63 and 31, OR bit 31 into bit 63, copy
+ bit 63 into all bit positions, then invert. */
+ IRTemp and3264 = newTemp(Ity_I64);
+ assign(and3264, binop(Iop_Or64, mkexpr(and64),
+ binop(Iop_Shl64, mkexpr(and64), mkU8(32))));
assign(z64,
unop(Iop_Not64,
- binop(Iop_Sar64,
- binop(Iop_Or64,
- binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
- mkexpr(and64)), mkU8(63))));
+ binop(Iop_Sar64, mkexpr(and3264), mkU8(63))));
+ IRTemp andn3264 = newTemp(Ity_I64);
+ assign(andn3264, binop(Iop_Or64, mkexpr(andn64),
+ binop(Iop_Shl64, mkexpr(andn64), mkU8(32))));
assign(c64,
unop(Iop_Not64,
- binop(Iop_Sar64,
- binop(Iop_Or64,
- binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
- mkexpr(andn64)), mkU8(63))));
+ binop(Iop_Sar64, mkexpr(andn3264), mkU8(63))));
+ } else {
+ vassert(sign == 0);
+ assign(z64, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(and64), mkU64(0)),
+ mkU64(~0ULL), mkU64(0ULL)));
+ assign(c64, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(andn64), mkU64(0)),
+ mkU64(~0ULL), mkU64(0ULL)));
}
/* And finally, slice out the Z and C flags and set the flags
@@ -16966,9 +16964,7 @@ static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx,
IRTemp andnV = newTemp(Ity_V128);
assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
assign(andnV, binop(Iop_AndV128,
- mkexpr(vecE),
- binop(Iop_XorV128, mkexpr(vecG),
- mkV128(0xFFFF))));
+ mkexpr(vecE), unop(Iop_NotV128, mkexpr(vecG))));
finish_xTESTy ( andV, andnV, sign );
return delta;
diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c
index f918e9f85..6453f4fdf 100644
--- a/VEX/priv/ir_opt.c
+++ b/VEX/priv/ir_opt.c
@@ -1693,6 +1693,17 @@ static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e )
break;
}
+ /* Similarly .. */
+ case Iop_V256toV128_0: case Iop_V256toV128_1: {
+ UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
+ if (v256 == 0x00000000) {
+ e2 = IRExpr_Const(IRConst_V128(0x0000));
+ } else {
+ goto unhandled;
+ }
+ break;
+ }
+
case Iop_ZeroHI64ofV128: {
/* Could do better here -- only need to look at the bottom 64 bits
of the argument, really. */
@@ -2129,6 +2140,8 @@ static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e )
}
/* -- V128 stuff -- */
+ case Iop_InterleaveLO64x2: // This, and the HI64, are created
+ case Iop_InterleaveHI64x2: // by the amd64 PTEST translation
case Iop_InterleaveLO8x16: {
/* This turns up a lot in Memcheck instrumentation of
Icc generated code. I don't know why. */
@@ -2321,6 +2334,27 @@ static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e )
break;
}
}
+ /* AndV128( x, NotV128( x ) ) ==> 0...0 and
+ AndV256( x, NotV256( x ) ) ==> 0...0
+ This is generated by amd64 `ptest %xmmReg, %xmmReg`
+ (the same reg both times)
+ See https://bugzilla.redhat.com/show_bug.cgi?id=2257546 */
+ if (e->Iex.Binop.op == Iop_AndV128
+ || e->Iex.Binop.op == Iop_AndV256) {
+ Bool isV256 = e->Iex.Binop.op == Iop_AndV256;
+ IRExpr* x1 = chase(env, e->Iex.Binop.arg1);
+ IRExpr* rhs = chase(env, e->Iex.Binop.arg2);
+ if (x1 && rhs
+ && rhs->tag == Iex_Unop
+ && rhs->Iex.Unop.op == (isV256 ? Iop_NotV256
+ : Iop_NotV128)) {
+ IRExpr* x2 = chase(env, rhs->Iex.Unop.arg);
+ if (x2 && sameIRExprs(env, x1, x2)) {
+ e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
+ break;
+ }
+ }
+ }
break;
case Iop_OrV128:
diff --git a/memcheck/tests/amd64/Makefile.am b/memcheck/tests/amd64/Makefile.am
index 0d2812dd8..906b8f393 100644
--- a/memcheck/tests/amd64/Makefile.am
+++ b/memcheck/tests/amd64/Makefile.am
@@ -20,6 +20,10 @@ EXTRA_DIST = \
insn-pmovmskb.stderr.exp-clang \
more_x87_fp.stderr.exp more_x87_fp.stdout.exp more_x87_fp.vgtest \
pcmpgt.stderr.exp pcmpgt.vgtest \
+ rh2257546_128.vgtest \
+ rh2257546_128.stderr.exp rh2257546_128.stdout.exp \
+ rh2257546_256.vgtest \
+ rh2257546_256.stderr.exp rh2257546_256.stdout.exp \
sh-mem-vec128-plo-no.vgtest \
sh-mem-vec128-plo-no.stderr.exp \
sh-mem-vec128-plo-no.stdout.exp \
@@ -46,12 +50,13 @@ check_PROGRAMS = \
insn-bsfl \
insn-pmovmskb \
pcmpgt \
+ rh2257546_128 \
sh-mem-vec128 \
sse_memory \
xor-undef-amd64
if BUILD_AVX_TESTS
- check_PROGRAMS += sh-mem-vec256 xsave-avx
+ check_PROGRAMS += rh2257546_256 sh-mem-vec256 xsave-avx
endif
if HAVE_ASM_CONSTRAINT_P
check_PROGRAMS += insn-pcmpistri
diff --git a/memcheck/tests/amd64/rh2257546_128.c b/memcheck/tests/amd64/rh2257546_128.c
new file mode 100644
index 000000000..a405aa775
--- /dev/null
+++ b/memcheck/tests/amd64/rh2257546_128.c
@@ -0,0 +1,32 @@
+
+// This should run on memcheck without reporting an undef-value error.
+// See https://bugzilla.redhat.com/show_bug.cgi?id=2257546
+
+#include <stdio.h>
+#include <malloc.h>
+
+int main ( void )
+{
+ char* c1 = malloc(16);
+ c1[0] = 'x'; c1[1] = 'y'; c1[2] = 'x'; c1[3] = 0;
+
+ char* c2 = "foobarxyzzyfoobarzyzzy"; // strlen > 16
+
+ long long int res;
+ __asm__ __volatile__(
+ "movdqu (%1), %%xmm4" "\n\t"
+ "movdqu (%2), %%xmm5" "\n\t"
+ "pxor %%xmm4, %%xmm5" "\n\t"
+ "ptest %%xmm5, %%xmm5" "\n\t"
+ "je zzz1f" "\n\t"
+ "mov $99, %0" "\n\t"
+ "jmp zzzafter" "\n"
+ "zzz1f:" "\n\t"
+ "mov $88, %0" "\n"
+ "zzzafter:" "\n\t"
+ : /*OUT*/"=r"(res) : /*IN*/"r"(c1),"r"(c2) : /*TRASH*/"xmm4","xmm5","cc"
+ );
+ printf("res = %lld\n", res);
+ free(c1);
+ return 0;
+}
diff --git a/memcheck/tests/amd64/rh2257546_128.stderr.exp b/memcheck/tests/amd64/rh2257546_128.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/memcheck/tests/amd64/rh2257546_128.stdout.exp b/memcheck/tests/amd64/rh2257546_128.stdout.exp
new file mode 100644
index 000000000..454131089
--- /dev/null
+++ b/memcheck/tests/amd64/rh2257546_128.stdout.exp
@@ -0,0 +1 @@
+res = 99
diff --git a/memcheck/tests/amd64/rh2257546_128.vgtest b/memcheck/tests/amd64/rh2257546_128.vgtest
new file mode 100644
index 000000000..94414cd6f
--- /dev/null
+++ b/memcheck/tests/amd64/rh2257546_128.vgtest
@@ -0,0 +1,2 @@
+prog: rh2257546_128
+vgopts: -q
diff --git a/memcheck/tests/amd64/rh2257546_256.c b/memcheck/tests/amd64/rh2257546_256.c
new file mode 100644
index 000000000..235005ca6
--- /dev/null
+++ b/memcheck/tests/amd64/rh2257546_256.c
@@ -0,0 +1,32 @@
+
+// This should run on memcheck without reporting an undef-value error.
+// See https://bugzilla.redhat.com/show_bug.cgi?id=2257546
+
+#include <stdio.h>
+#include <malloc.h>
+
+int main ( void )
+{
+ char* c1 = malloc(32);
+ c1[0] = 'x'; c1[1] = 'y'; c1[2] = 'x'; c1[3] = 0;
+
+ char* c2 = "foobarxyzzyfoobarzyzzyandawholelotmoretoo"; // strlen > 32
+
+ long long int res;
+ __asm__ __volatile__(
+ "vmovdqu (%1), %%ymm4" "\n\t"
+ "vmovdqu (%2), %%ymm5" "\n\t"
+ "vpxor %%ymm4, %%ymm5, %%ymm5" "\n\t"
+ "vptest %%ymm5, %%ymm5" "\n\t"
+ "je zzz1f" "\n\t"
+ "mov $99, %0" "\n\t"
+ "jmp zzzafter" "\n"
+ "zzz1f:" "\n\t"
+ "mov $88, %0" "\n"
+ "zzzafter:" "\n\t"
+ : /*OUT*/"=r"(res) : /*IN*/"r"(c1),"r"(c2) : /*TRASH*/"ymm4","ymm5","cc"
+ );
+ printf("res = %lld\n", res);
+ free(c1);
+ return 0;
+}
diff --git a/memcheck/tests/amd64/rh2257546_256.stderr.exp b/memcheck/tests/amd64/rh2257546_256.stderr.exp
new file mode 100644
index 000000000..e69de29bb
diff --git a/memcheck/tests/amd64/rh2257546_256.stdout.exp b/memcheck/tests/amd64/rh2257546_256.stdout.exp
new file mode 100644
index 000000000..454131089
--- /dev/null
+++ b/memcheck/tests/amd64/rh2257546_256.stdout.exp
@@ -0,0 +1 @@
+res = 99
diff --git a/memcheck/tests/amd64/rh2257546_256.vgtest b/memcheck/tests/amd64/rh2257546_256.vgtest
new file mode 100644
index 000000000..86eef8fe2
--- /dev/null
+++ b/memcheck/tests/amd64/rh2257546_256.vgtest
@@ -0,0 +1,3 @@
+prereq: test -e rh2257546_256
+prog: rh2257546_256
+vgopts: -q
--
2.43.0