Peter Bergner - Re: [PATCH] Fix PR middle-end/28690, indexed load/store performance + r (original) (raw)
This is the mail archive of the gcc-patches@gcc.gnu.orgmailing list for the GCC project.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
- From: Peter Bergner
- To: gcc-patches at gcc dot gnu dot org
- Cc: Richard Henderson , Jan Hubicha , Paolo Bonzini , David Edelsohn
- Date: Fri, 23 Feb 2007 13:44:15 -0600
- Subject: Re: [PATCH] Fix PR middle-end/28690, indexed load/store performance + reload bug
- References: 20061205050808.GA13002@vervain.rchland.ibm.com <20070212180752.GA11079@vnet.ibm.com> <20070212201113.GB1924@redhat.com> <1172005423.4861.13.camel@otta>
On Tue, Feb 20, 2007 at 03:03:43PM -0600, Peter Bergner wrote:
Jan, can you shed any light on this? Do we still need the gcc_assert in emit_cmp_and_jump_insns? If we do, would falling back to the change Richard made here:
Ok, I talked with Jan on IRC and he explained some of the problem to me. He suggested the following change which I have bootstrapped and regression tested with a slightly older version of mainline with no problems. I'm rerunning the bootstrap and regression tests on current mainline now.
/* Swap operands and condition to ensure canonical RTL. / if (swap_commutative_operands_p (x, y)) { / If we're not emitting a branch, callers are required to pass operands in an order conforming to canonical RTL. We relax this for commutative comparsions so callers using EQ don't need to do swapping by hand. */ gcc_assert (label || (comparison == swap_condition (comparison)));
op0 = y, op1 = x;
comparison = swap_condition (comparison);
}
Paolo, would you mind rerunning SPEC with this slightly modified patch on x86 to make sure we haven't regressed performance from the last patch you tested? If things look good, I'll submit the patch again for formal review.
Peter
Index: gcc/optabs.c
--- gcc/optabs.c (revision 122263) +++ gcc/optabs.c (working copy) @@ -1194,29 +1194,6 @@ expand_simple_binop (enum machine_mode m return expand_binop (mode, binop, op0, op1, target, unsignedp, methods); } -/* Return whether OP0 and OP1 should be swapped when expanding a commutative - binop. Order them according to commutative_operand_precedence and, if - possible, try to put TARGET or a pseudo first. */ -static bool -swap_commutative_operands_with_target (rtx target, rtx op0, rtx op1) -{ - int op0_prec = commutative_operand_precedence (op0); - int op1_prec = commutative_operand_precedence (op1);
- if (op0_prec < op1_prec)
- return true;
- if (op0_prec > op1_prec)
- return false;
- /* With equal precedence, both orders are ok, but it is better if the
first operand is TARGET, or if both TARGET and OP0 are pseudos. */
- if (target == 0 || REG_P (target))
- return (REG_P (op1) && !REG_P (op0)) || target == op1;
- else
- return rtx_equal_p (op1, target);
-}
/* Generate code to perform an operation specified by BINOPTAB on operands OP0 and OP1, with result having machine-mode MODE. @@ -1293,7 +1270,7 @@ expand_binop (enum machine_mode mode, op { commutative_op = 1; - if (swap_commutative_operands_with_target (target, op0, op1)) + if (swap_commutative_operands_p (op0, op1)) { temp = op1; op1 = op0; @@ -3988,9 +3965,11 @@ emit_cmp_and_jump_insns (rtx x, rtx y, e /* Swap operands and condition to ensure canonical RTL. / if (swap_commutative_operands_p (x, y)) { - / If we're not emitting a branch, this means some caller - is out of sync. / - gcc_assert (label); + / If we're not emitting a branch, callers are required to pass + operands in an order conforming to canonical RTL. We relax this + for commutative comparsions so callers using EQ don't need to do + swapping by hand. */ + gcc_assert (label || (comparison == swap_condition (comparison))); op0 = y, op1 = x; comparison = swap_condition (comparison); Index: gcc/rtlanal.c
--- gcc/rtlanal.c (revision 122263)
+++ gcc/rtlanal.c (working copy)
@@ -2801,9 +2801,9 @@ commutative_operand_precedence (rtx op)
/* Constants always come the second operand. Prefer "nice" constants. /
if (code == CONST_INT)
- return -7;
+ return -10;
if (code == CONST_DOUBLE)
- return -6;
+ return -9;
op = avoid_constant_pool_reference (op);
code = GET_CODE (op);
@@ -2811,26 +2811,29 @@ commutative_operand_precedence (rtx op)
{
case RTX_CONST_OBJ:
if (code == CONST_INT)
- return -5;
+ return -8;
if (code == CONST_DOUBLE)
- return -4;
- return -3;
+ return -7;
+ return -6;
case RTX_EXTRA:
/ SUBREGs of objects should come second. /
if (code == SUBREG && OBJECT_P (SUBREG_REG (op)))
- return -2;
+ return -5;
if (!CONSTANT_P (op))
return 0;
else
/ As for RTX_CONST_OBJ. /
- return -3;
+ return -6;
case RTX_OBJ:
/ Complex expressions should be the first, so decrease priority
of objects. /
- return -1;
+ if (REG_P (op))
+ return (REG_POINTER (op)) ? -1 : -3;
+ else
+ return (MEM_P (op) && MEM_POINTER (op)) ? -2 : -4;
case RTX_COMM_ARITH:
/ Prefer operands that are themselves commutative to be first.
@@ -2857,11 +2860,19 @@ commutative_operand_precedence (rtx op)
/* Return 1 iff it is necessary to swap operands of commutative operation
in order to canonicalize expression. /
-int
+bool
swap_commutative_operands_p (rtx x, rtx y)
{
- return (commutative_operand_precedence (x)
- < commutative_operand_precedence (y));
+ int result = (commutative_operand_precedence (x)
+ - commutative_operand_precedence (y));
+ if (result)
+ return result < 0;
+
+ /* Group together equal REGs to do more simplification. */
+ if (REG_P (x) && REG_P (y))
+ return REGNO (x) > REGNO (y);
+
+ return false;
}
/ Return 1 if X is an autoincrement side effect and the register is
Index: gcc/tree-ssa-address.c
--- gcc/tree-ssa-address.c (revision 122263) +++ gcc/tree-ssa-address.c (working copy) @@ -125,7 +125,7 @@ gen_addr_rtx (rtx symbol, rtx base, rtx if (base) { if (*addr) - *addr = gen_rtx_PLUS (Pmode, *addr, base); + *addr = simplify_gen_binary (PLUS, Pmode, base, *addr); else *addr = base; } Index: gcc/rtl.h
--- gcc/rtl.h (revision 122263) +++ gcc/rtl.h (working copy) @@ -1678,7 +1678,7 @@ extern int reg_referenced_p (rtx, rtx); extern int reg_used_between_p (rtx, rtx, rtx); extern int reg_set_between_p (rtx, rtx, rtx); extern int commutative_operand_precedence (rtx); -extern int swap_commutative_operands_p (rtx, rtx); +extern bool swap_commutative_operands_p (rtx, rtx); extern int modified_between_p (rtx, rtx, rtx); extern int no_labels_between_p (rtx, rtx); extern int modified_in_p (rtx, rtx);
- References:
- Re: [PATCH] Fix PR middle-end/28690, indexed load/store performance + reload bug
* From: Peter Bergner - Re: [PATCH] Fix PR middle-end/28690, indexed load/store performance + reload bug
* From: Richard Henderson - Re: [PATCH] Fix PR middle-end/28690, indexed load/store performance + reload bug
* From: Peter Bergner
- Re: [PATCH] Fix PR middle-end/28690, indexed load/store performance + reload bug
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |