@@ -22025,6 +22025,15 @@ vec_fp_conversion_cost (const struct processor_costs *cost, int size) |
|
|
|
22025 |
return cost->vcvtps2pd512; |
22025 |
return cost->vcvtps2pd512; |
22026 |
} |
22026 |
} |
22027 |
22027 |
|
|
22028 |
/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP. */ |
|
|
22029 |
|
|
|
22030 |
static bool |
|
|
22031 |
unspec_pcmp_p (rtx x) |
|
|
22032 |
{ |
|
|
22033 |
return GET_CODE (x) == UNSPEC |
|
|
22034 |
&& (XINT (x, 1) == UNSPEC_PCMP | |
XINT (x, 1) == UNSPEC_UNSIGNED_PCMP); |
|
22035 |
} |
|
|
22036 |
|
|
|
22028 |
/* Compute a (partial) cost for rtx X. Return true if the complete |
22037 |
/* Compute a (partial) cost for rtx X. Return true if the complete |
22029 |
cost has been computed, and false if subexpressions should be |
22038 |
cost has been computed, and false if subexpressions should be |
22030 |
scanned. In either case, *TOTAL contains the cost result. */ |
22039 |
scanned. In either case, *TOTAL contains the cost result. */ |
@@ -22807,14 +22816,77 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, |
|
|
|
22807 |
22816 |
|
|
22808 |
case VEC_MERGE: |
22817 |
case VEC_MERGE: |
22809 |
mask = XEXP (x, 2); |
22818 |
mask = XEXP (x, 2); |
22819 |
/* Scalar versions of SSE instructions may be represented as: |
|
|
22820 |
|
|
|
22821 |
(vec_merge (vec_duplicate (operation ....)) |
|
|
22822 |
(register or memory) |
|
|
22823 |
(const_int 1)) |
|
|
22824 |
|
|
|
22825 |
In this case vec_merge and vec_duplicate is for free. |
|
|
22826 |
Just recurse into operation and second operand. */ |
|
|
22827 |
if (mask == const1_rtx |
|
|
22828 |
&& GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE) |
|
|
22829 |
{ |
|
|
22830 |
*total = rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
|
|
22831 |
outer_code, opno, speed) |
|
|
22832 |
+ rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
|
|
22833 |
return true; |
|
|
22834 |
} |
|
|
22810 |
/* This is masked instruction, assume the same cost, |
22835 |
/* This is masked instruction, assume the same cost, |
22811 |
as nonmasked variant. */ |
22836 |
as nonmasked variant. */ |
22812 |
if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) |
22837 |
else if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) |
22813 |
*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); |
22838 |
{ |
22839 |
*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
|
|
22840 |
+ rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
|
|
22841 |
return true; |
|
|
22842 |
} |
|
|
22843 |
/* Combination of the two above: |
|
|
22844 |
|
|
|
22845 |
(vec_merge (vec_merge (vec_duplicate (operation ...)) |
|
|
22846 |
(register or memory) |
|
|
22847 |
(reg:QI mask)) |
|
|
22848 |
(register or memory) |
|
|
22849 |
(const_int 1)) |
|
|
22850 |
|
|
|
22851 |
i.e. avx512fp16_vcvtss2sh_mask. */ |
|
|
22852 |
else if (TARGET_AVX512F |
|
|
22853 |
&& mask == const1_rtx |
|
|
22854 |
&& GET_CODE (XEXP (x, 0)) == VEC_MERGE |
|
|
22855 |
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE |
|
|
22856 |
&& register_operand (XEXP (XEXP (x, 0), 2), |
|
|
22857 |
GET_MODE (XEXP (XEXP (x, 0), 2)))) |
|
|
22858 |
{ |
|
|
22859 |
*total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), |
|
|
22860 |
mode, outer_code, opno, speed) |
|
|
22861 |
+ rtx_cost (XEXP (XEXP (x, 0), 1), |
|
|
22862 |
mode, outer_code, opno, speed) |
|
|
22863 |
+ rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed); |
|
|
22864 |
return true; |
|
|
22865 |
} |
|
|
22866 |
/* vcmp. */ |
|
|
22867 |
else if (unspec_pcmp_p (mask) |
|
|
22868 |
| |
(GET_CODE (mask) == NOT |
|
22869 |
&& unspec_pcmp_p (XEXP (mask, 0)))) |
|
|
22870 |
{ |
|
|
22871 |
rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask; |
|
|
22872 |
rtx unsop0 = XVECEXP (uns, 0, 0); |
|
|
22873 |
/* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0) |
|
|
22874 |
cost the same as register. |
|
|
22875 |
This is used by avx_cmp3_ltint_not. */ |
|
|
22876 |
if (GET_CODE (unsop0) == SUBREG) |
|
|
22877 |
unsop0 = XEXP (unsop0, 0); |
|
|
22878 |
if (GET_CODE (unsop0) == NOT) |
|
|
22879 |
unsop0 = XEXP (unsop0, 0); |
|
|
22880 |
*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) |
|
|
22881 |
+ rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) |
|
|
22882 |
+ rtx_cost (unsop0, mode, UNSPEC, opno, speed) |
|
|
22883 |
+ rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed) |
|
|
22884 |
+ cost->sse_op; |
|
|
22885 |
return true; |
|
|
22886 |
} |
|
|
22814 |
else |
22887 |
else |
22815 |
/* ??? We should still recruse when computing cost. */ |
|
|
22816 |
*total = cost->sse_op; |
22888 |
*total = cost->sse_op; |
22817 |
return true; |
22889 |
return false; |
22818 |
22890 |
|
|
22819 |
case MEM: |
22891 |
case MEM: |
22820 |
/* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast. |
22892 |
/* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast. |
@@ -22831,7 +22903,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, |
|
|
|
22831 |
} |
22903 |
} |
22832 |
22904 |
|
|
22833 |
/* An insn that accesses memory is slightly more expensive |
22905 |
/* An insn that accesses memory is slightly more expensive |
22834 |
than one that does not. */ |
22906 |
than one that does not. */ |
22835 |
if (speed) |
22907 |
if (speed) |
22836 |
{ |
22908 |
{ |
22837 |
*total += 1; |
22909 |
*total += 1; |