gcc - GNU Compiler Collection (original) (raw)

author Jan Hubicka hubicka@ucw.cz 2025-05-04 10:52:35 +0200
committer Jan Hubicka hubicka@ucw.cz 2025-05-04 10:53:23 +0200
commit 064cac730f88dc71c6da578f9ae5b8e092ab6cd4 (patch)
tree 6b78502bba1db757618182c109ee1dcdbabbe25f
parent RISC-V: Remove unnecessary frm restore volatile define_insn (diff)

Improve maybe_hot handling in inliner heuristicsHEADtrunkmaster

Inliner currently applies different heuristics to hot and cold calls (the second are inlined only if the code size will shrink). It may happen that the call itself is hot, but the significant time is spent in callee and inlining makes it faster. For this reason we want to check if the anticipated speedup is considered hot which is done by this patch (that is similar ot my earlier ipa-cp change). In general I think this is less important compared to ipa-cp change, since large benefit from inlining happens only when something useful is propagated into the callee and should be handled earlier by ipa-cp. However the patch improves SPEC2k17 imagick runtime by about 9% as discussed in PR 11900 though it is mostly problem of bad train data set which does not train well parts of program that are hot for ref data set. As discussed in the PR log, the particular call that needs to be inlined has count that falls very slightly bellow the cutoff and scaling it up by expected savings enables inlining. gcc/ChangeLog: PR target/119900 * cgraph.cc (cgraph_edge::maybe_hot_p): Add a variant accepting a sreal scale; use reliability of profile. * cgraph.h (cgraph_edge::maybe_hot_p): Declare a varaint accepting a sreal scale. * ipa-inline.cc (callee_speedup): New function. (want_inline_small_function_p): add early return and avoid duplicated lookup of summaries; use scaled maybe_hot predicate.

-rw-r--r-- gcc/cgraph.cc 42
-rw-r--r-- gcc/cgraph.h 9
-rw-r--r-- gcc/ipa-inline.cc 41

3 files changed, 73 insertions, 19 deletions

diff --git a/gcc/cgraph.cc b/gcc/cgraph.ccindex 6ae6a97f6f56..1a2ec38374ab 100644--- a/gcc/cgraph.cc+++ b/gcc/cgraph.cc
@@ -2984,13 +2984,22 @@ cgraph_edge::cannot_lead_to_return_p (void)
2984 return callee->cannot_return_p (); 2984 return callee->cannot_return_p ();
2985 } 2985 }
2986 2986
2987 /* Return true if the edge may be considered hot. */ 2987 /* Return true if the edge after scaling it profile by SCALE
2988 may be considered hot. */
2988 2989
2989 bool 2990 bool
2990 cgraph_edge::maybe_hot_p (void) 2991 cgraph_edge::maybe_hot_p (sreal scale)
2991 { 2992 {
2992 if (!maybe_hot_count_p (NULL, count.ipa ())) 2993 /* Never consider calls in functions optimized for size hot. */
2994 if (opt_for_fn (caller->decl, optimize_size))
2993 return false; 2995 return false;
2996
2997 /* If reliable IPA count is available, just use it. */
2998 profile_count c = count.ipa ();
2999 if (c.reliable_p ())
3000 return maybe_hot_count_p (NULL, c * scale);
3001
3002 /* See if we can determine hotness using caller frequency. */
2994 if (caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED 3003 if (caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
2995 | (callee 3004
2996 && callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)) 3005 && callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
@@ -2999,25 +3008,42 @@ cgraph_edge::maybe_hot_p (void)
2999 && (callee 3008 && (callee
3000 && callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE)) 3009 && callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
3001 return false; 3010 return false;
3002 if (opt_for_fn (caller->decl, optimize_size)) 3011 /* ??? This may make sense for hot functions determined by
3003 return false; 3012 user attribute, but if function is hot by profile, it may
3013 contains non-hot calls. In most practical cases this case
3014 is handled by the reliable ipa count above, but i.e. after
3015 inlining function with no profile to function with profile
3016 we get here.. */
3004 if (caller->frequency == NODE_FREQUENCY_HOT) 3017 if (caller->frequency == NODE_FREQUENCY_HOT)
3005 return true; 3018 return true;
3019
3020 /* Use IPA count and if it s not available appy local heuristics. */
3021 if (c.initialized_p ())
3022 return maybe_hot_count_p (NULL, c * scale);
3006 if (!count.initialized_p ()) 3023 if (!count.initialized_p ())
3007 return true; 3024 return true;
3008 cgraph_node *where = caller->inlined_to ? caller->inlined_to : caller; 3025 cgraph_node *where = caller->inlined_to ? caller->inlined_to : caller;
3009 if (!where->count.initialized_p ()) 3026 if (!where->count.initialized_p ())
3010 return false; 3027 return true;
3028 c = count * scale;
3011 if (caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE) 3029 if (caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE)
3012 { 3030 {
3013 if (count * 2 < where->count * 3) 3031 if (c * 2 < where->count * 3)
3014 return false; 3032 return false;
3015 } 3033 }
3016 else if (count * param_hot_bb_frequency_fraction < where->count) 3034 else if (c * param_hot_bb_frequency_fraction < where->count)
3017 return false; 3035 return false;
3018 return true; 3036 return true;
3019 } 3037 }
3020 3038
3039 /* Return true if the edge may be considered hot. */
3040
3041 bool
3042 cgraph_edge::maybe_hot_p ()
3043 {
3044 return maybe_hot_p (1);
3045 }
3046
3021 /* Worker for cgraph_can_remove_if_no_direct_calls_p. */ 3047 /* Worker for cgraph_can_remove_if_no_direct_calls_p. */
3022 3048
3023 static bool 3049 static bool
diff --git a/gcc/cgraph.h b/gcc/cgraph.hindex abde770ba2b3..f7b67ed0a6c5 100644--- a/gcc/cgraph.h+++ b/gcc/cgraph.h
@@ -1872,8 +1872,13 @@ public:
1872 /* Return true when the edge represents a direct recursion. */ 1872 /* Return true when the edge represents a direct recursion. */
1873 bool recursive_p (void); 1873 bool recursive_p (void);
1874 1874
1875 /* Return true if the edge may be considered hot. */ 1875 /* Return true if the edge may be considered hot after scalling its count. */
1876 bool maybe_hot_p (void); 1876 bool maybe_hot_p ();
1877
1878 /* Return true if the edge may be considered hot after scalling its count
1879 (i.e. assume that optimization would reduce runtime for callee,
1880 possibly significantly). */
1881 bool maybe_hot_p (sreal scale);
1877 1882
1878 /* Get unique identifier of the edge. */ 1883 /* Get unique identifier of the edge. */
1879 inline int get_uid () 1884 inline int get_uid ()
diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.ccindex 7c2feeeffbb6..38fdbfde1b3b 100644--- a/gcc/ipa-inline.cc+++ b/gcc/ipa-inline.cc
@@ -931,6 +931,18 @@ inlining_speedup (struct cgraph_edge *edge,
931 return speedup; 931 return speedup;
932 } 932 }
933 933
934 /* Return expected speedup of the callee function alone
935 (i.e. not estimate of call overhead and also no scalling
936 by call frequency. */
937
938 static sreal
939 callee_speedup (struct cgraph_edge *e)
940 {
941 sreal unspec_time;
942 sreal spec_time = estimate_edge_time (e, &unspec_time);
943 return unspec_time - spec_time;
944 }
945
934 /* Return true if the speedup for inlining E is bigger than 946 /* Return true if the speedup for inlining E is bigger than
935 param_inline_min_speedup. */ 947 param_inline_min_speedup. */
936 948
@@ -968,28 +980,39 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
968 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) 980 if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
969 want_inline = false; 981 want_inline = false;
970 else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl)) 982 else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
971 ; 983 return true;
972 else if (!DECL_DECLARED_INLINE_P (callee->decl) 984 else if (!DECL_DECLARED_INLINE_P (callee->decl)
973 && !opt_for_fn (e->caller->decl, flag_inline_small_functions)) 985 && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
974 { 986 {
975 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE; 987 e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
976 want_inline = false; 988 want_inline = false;
977 } 989 }
990
991 /* Early return before lookup of summaries. */
992 if (!want_inline)
993 {
994 if (report)
995 report_inline_failed_reason (e);
996 return false;
997 }
998
999 ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1000 ipa_call_summary *call_info = ipa_call_summaries->get (e);
1001
978 /* Do fast and conservative check if the function can be good 1002 /* Do fast and conservative check if the function can be good
979 inline candidate. */ 1003 inline candidate. */
980 else if ((!DECL_DECLARED_INLINE_P (callee->decl) 1004 if ((!DECL_DECLARED_INLINE_P (callee->decl)
981 && (!e->count.ipa ().initialized_p () | !e->maybe_hot_p ())) 1005
982 && ipa_fn_summaries->get (callee)->min_size 1006 |
983 - ipa_call_summaries->get (e)->call_stmt_size 1007 && callee_info->min_size - call_info->call_stmt_size
984 > inline_insns_auto (e->caller, true, true)) 1008 > inline_insns_auto (e->caller, true, true))
985 { 1009 {
986 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; 1010 e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
987 want_inline = false; 1011 want_inline = false;
988 } 1012 }
989 else if ((DECL_DECLARED_INLINE_P (callee->decl) 1013 else if ((DECL_DECLARED_INLINE_P (callee->decl)
990 | e->count.ipa ().nonzero_p ()) 1014
991 && ipa_fn_summaries->get (callee)->min_size 1015 && callee_info->min_size - call_info->call_stmt_size
992 - ipa_call_summaries->get (e)->call_stmt_size
993 > inline_insns_single (e->caller, true, true)) 1016 > inline_insns_single (e->caller, true, true))
994 { 1017 {
995 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl) 1018 e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
@@ -1060,7 +1083,7 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
1060 } 1083 }
1061 } 1084 }
1062 /* If call is cold, do not inline when function body would grow. */ 1085 /* If call is cold, do not inline when function body would grow. */
1063 else if (!e->maybe_hot_p () 1086 else if (!e->maybe_hot_p (callee_speedup (e))
1064 && (growth >= inline_insns_single (e->caller, false, false) 1087 && (growth >= inline_insns_single (e->caller, false, false)
1065 | growth_positive_p (callee, e, growth))) 1088
1066 { 1089 {