gcc - GNU Compiler Collection (original) (raw)
This patch makes the AFDO's VPT to happen during early inlining. This should make the einline pass inside afdo pass unnecesary, but some inlining still happens there - I will need to debug why that happens and will try to drop the afdo's inliner incrementally. get_inline_stack_in_node can now be used to produce inline stack out of callgraph nodes which are marked as inline clones, so we do not need to iterate tree-inline and IPA decisions phases like old code did. I also added some debug facilities - dumping of decisions and inline stacks, so one can match them with data in gcov profile. Former VPT pass identified all caes where in train run indirect call was inlined and the inlined callee collected some samples. In this case it forced inline without doing any checks, such as whether inlining is possible. New code simply introduces speculative edges into callgraph and lets afdo inlining to decide. Old code also marked statements that were introduced during promotion to prevent doing double speculation i.e. if (ptr == foo) .. inlined foo ... else ptr (); to if (ptr == foo) .. inlined foo ... else if (ptr == foo) foo (); // for IPA inlining else ptr (); Since inlning now happens much earlier, tracking the statements would be quite hard. Instead I simply remove the targets from profile data which sould have same effect. I also noticed that there is nothing setting max_count so all non-0 profile is considered hot which I fixed too. Training with ref run I now get 500.perlbench_r 1 160 9.93 * 1 162 9.84 * 502.gcc_r NR NR 505.mcf_r 1 186 8.68 * 1 194 8.34 * 520.omnetpp_r 1 183 7.15 * 1 208 6.32 * 523.xalancbmk_r NR NR 525.x264_r 1 85.2 20.5 * 1 85.8 20.4 * 531.deepsjeng_r 1 165 6.93 * 1 176 6.51 * 541.leela_r 1 268 6.18 * 1 282 5.87 * 548.exchange2_r 1 86.3 30.4 * 1 88.9 29.5 * 557.xz_r 1 224 4.81 * 1 224 4.82 * Est. SPECrate2017_int_base 9.72 Est. SPECrate2017_int_peak 9.33 503.bwaves_r NR NR 507.cactuBSSN_r 1 107 11.9 * 1 105 12.0 * 508.namd_r 1 108 8.79 * 1 116 8.18 * 510.parest_r 1 143 18.3 * 1 156 16.8 * 511.povray_r 1 188 12.4 * 1 163 14.4 * 519.lbm_r 1 72.0 14.6 * 1 75.0 14.1 * 521.wrf_r 1 106 21.1 * 1 106 21.1 * 526.blender_r 1 147 10.3 * 1 147 10.4 * 527.cam4_r 1 110 15.9 * 1 118 14.8 * 538.imagick_r 1 104 23.8 * 1 105 23.7 * 544.nab_r 1 146 11.6 * 1 143 11.8 * 549.fotonik3d_r 1 134 29.0 * 1 169 23.1 * 554.roms_r 1 86.6 18.4 * 1 89.3 17.8 * Est. SPECrate2017_fp_base 15.4 Est. SPECrate2017_fp_peak 14.9 Base is without profile feedback and peak is AFDO. gcc/ChangeLog: * auto-profile.cc (dump_inline_stack): New function. (get_inline_stack_in_node): New function. (get_relative_location_for_stmt): Add FN parameter. (has_indirect_call): Remove. (function_instance::find_icall_target_map): Add FN parameter. (function_instance::remove_icall_target): New function. (function_instance::read_function_instance): Set sum_max. (autofdo_source_profile::get_count_info): Add NODE parameter. (autofdo_source_profile::update_inlined_ind_target): Add NODE parameter. (autofdo_source_profile::remove_icall_target): New function. (afdo_indirect_call): Add INDIRECT_EDGE parameter; dump reason for failure; do not check for recursion; do not inline call. (afdo_vpt): Add INDIRECT_EDGE parameter. (afdo_set_bb_count): Do not take PROMOTED set. (afdo_vpt_for_early_inline): Remove. (afdo_annotate_cfg): Do not take PROMOTED set. (auto_profile): Do not call afdo_vpt_for_early_inline. (afdo_callsite_hot_enough_for_early_inline): Dump count. (remove_afdo_speculative_target): New function. * auto-profile.h (afdo_vpt_for_early_inline): Declare. (remove_afdo_speculative_target): Declare. * ipa-inline.cc (inline_functions_by_afdo): Do VPT. (early_inliner): Redirecct edges if inlining happened. * tree-inline.cc (expand_call_inline): Add sanity check. gcc/testsuite/ChangeLog: * gcc.dg/tree-prof/afdo-vpt-earlyinline.c: Update template. * gcc.dg/tree-prof/indir-call-prof-2.c: Update template.
@@ -243,9 +243,13 @@ public:
is found. */
is found. */
bool get_count_info (location_t loc, count_info *info) const;
bool get_count_info (location_t loc, count_info *info) const;
/* Read the inlined indirect call target profile for STMT and store it in
/* Read the inlined indirect call target profile for STMT in FN and store it
MAP, return the total count for all inlined indirect calls. */
in MAP, return the total count for all inlined indirect calls. */
gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
gcov_type find_icall_target_map (tree fn, gcall *stmt,
icall_target_map *map) const;
/* Remove inlined indirect call target profile for STMT in FN. */
void remove_icall_target (tree fn, gcall *stmt);
/* Mark LOC as annotated. */
/* Mark LOC as annotated. */
void mark_annotated (location_t loc);
void mark_annotated (location_t loc);
@@ -302,20 +306,26 @@ public:
function_instance *get_function_instance_by_decl (tree decl) const;
function_instance *get_function_instance_by_decl (tree decl) const;
/* Find count_info for a given gimple STMT. If found, store the count_info
/* Find count_info for a given gimple STMT. If found, store the count_info
in INFO and return true; otherwise return false. */
in INFO and return true; otherwise return false.
bool get_count_info (gimple *stmt, count_info *info) const;
NODE can be used to specify particular inline clone. */
bool get_count_info (gimple *stmt, count_info *info,
cgraph_node *node = NULL) const;
/* Find count_info for a given gimple location GIMPLE_LOC. If found,
/* Find count_info for a given gimple location GIMPLE_LOC. If found,
store the count_info in INFO and return true; otherwise return false. */
store the count_info in INFO and return true; otherwise return false.
bool get_count_info (location_t gimple_loc, count_info *info) const;
NODE can be used to specify particular inline clone. */
bool get_count_info (location_t gimple_loc, count_info *info,
cgraph_node *node = NULL) const;
/* Find total count of the callee of EDGE. */
/* Find total count of the callee of EDGE. */
gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
/* Update value profile INFO for STMT from the inlined indirect callsite.
/* Update value profile INFO for STMT within NODE from the inlined indirect
Return true if INFO is updated. */
callsite. Return true if INFO is updated. */
bool update_inlined_ind_target (gcall *stmt, count_info *info);
bool update_inlined_ind_target (gcall *stmt, count_info *info,
cgraph_node *node);
void remove_icall_target (cgraph_edge *e);
private:
private:
/* Map from function_instance name index (in string_table) to
/* Map from function_instance name index (in string_table) to
function_instance. */
function_instance. */
@@ -383,6 +393,24 @@ get_function_decl_from_block (tree block)
return BLOCK_ABSTRACT_ORIGIN (block);
return BLOCK_ABSTRACT_ORIGIN (block);
}
}
/* Dump STACK to F. */
static void
dump_inline_stack (FILE *f, inline_stack *stack)
{
bool first = true;
for (decl_lineno &p : *stack)
{
fprintf(f, "%s%s:%i.%i",
first ? "" : "; ",
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (p.first)),
p.second >> 16,
p.second & 65535);
first = false;
}
fprintf (f, "\n");
}
/* Store inline stack for STMT in STACK. */
/* Store inline stack for STMT in STACK. */
static void
static void
@@ -412,12 +440,35 @@ get_inline_stack (location_t locus, inline_stack *stack,
stack->safe_push (std::make_pair (fn, get_combined_location (locus, fn)));
stack->safe_push (std::make_pair (fn, get_combined_location (locus, fn)));
}
}
/* Same as get_inline_stack for a given node which may be
an inline clone. If NODE is NULL, assume current_function_decl. */
static void
get_inline_stack_in_node (location_t locus, inline_stack *stack,
cgraph_node *node)
{
if (!node)
return get_inline_stack (locus, stack);
do
{
get_inline_stack (locus, stack, node->decl);
/* If caller is inlined, continue building stack. */
if (!node->inlined_to)
node = NULL;
else
{
locus = gimple_location (node->callers->call_stmt);
node = node->callers->caller;
}
}
while (node);
}
/* Return STMT's combined location, which is a 32bit integer in which
/* Return STMT's combined location, which is a 32bit integer in which
higher 16 bits stores the line offset of LOC to the start lineno
higher 16 bits stores the line offset of LOC to the start lineno
of DECL, The lower 16 bits stores the discriminator. */
of DECL, The lower 16 bits stores the discriminator. */
static unsigned
static unsigned
get_relative_location_for_stmt (gimple *stmt)
get_relative_location_for_stmt (tree fn, gimple *stmt)
{
{
location_t locus = gimple_location (stmt);
location_t locus = gimple_location (stmt);
if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
@@ -428,25 +479,7 @@ get_relative_location_for_stmt (gimple *stmt)
if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
return get_combined_location (locus,
return get_combined_location (locus,
get_function_decl_from_block (block));
get_function_decl_from_block (block));
return get_combined_location (locus, current_function_decl);
return get_combined_location (locus, fn);
}
/* Return true if BB contains indirect call. */
static bool
has_indirect_call (basic_block bb)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
&& (gimple_call_fn (stmt) == NULL
|| TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
return true;
}
return false;
}
}
/* Member functions for string_table. */
/* Member functions for string_table. */
@@ -584,8 +617,7 @@ void function_instance::merge (function_instance *other)
pos_counts[iter->first].count += iter->second.count;
pos_counts[iter->first].count += iter->second.count;
}
}
/* Store the profile info for LOC in INFO. Return TRUE if profile info
/* Return profile info for LOC in INFO. */
is found. */
bool
bool
function_instance::get_count_info (location_t loc, count_info *info) const
function_instance::get_count_info (location_t loc, count_info *info) const
@@ -601,11 +633,11 @@ function_instance::get_count_info (location_t loc, count_info *info) const
MAP, return the total count for all inlined indirect calls. */
MAP, return the total count for all inlined indirect calls. */
gcov_type
gcov_type
function_instance::find_icall_target_map (gcall *stmt,
function_instance::find_icall_target_map (tree fn, gcall *stmt,
icall_target_map *map) const
icall_target_map *map) const
{
{
gcov_type ret = 0;
gcov_type ret = 0;
unsigned stmt_offset = get_relative_location_for_stmt (stmt);
unsigned stmt_offset = get_relative_location_for_stmt (fn, stmt);
for (callsite_map::const_iterator iter = callsites.begin ();
for (callsite_map::const_iterator iter = callsites.begin ();
iter != callsites.end (); ++iter)
iter != callsites.end (); ++iter)
@@ -624,6 +656,28 @@ function_instance::find_icall_target_map (gcall *stmt,
return ret;
return ret;
}
}
/* Remove the inlined indirect call target profile for STMT. */
void
function_instance::remove_icall_target (tree fn, gcall *stmt)
{
unsigned stmt_offset = get_relative_location_for_stmt (fn, stmt);
int n = 0;
for (callsite_map::const_iterator iter = callsites.begin ();
iter != callsites.end ();)
if (iter->first.first != stmt_offset)
++iter;
else
{
iter = callsites.erase (iter);
n++;
}
/* TODO: If we add support for multiple targets, we may want to
remove only those we succesfully inlined. */
gcc_assert (n);
}
/* Read the profile and create a function_instance with head count as
/* Read the profile and create a function_instance with head count as
HEAD_COUNT. Recursively read callsites to create nested function_instances
HEAD_COUNT. Recursively read callsites to create nested function_instances
too. STACK is used to track the recursive creation process. */
too. STACK is used to track the recursive creation process. */
@@ -671,6 +725,9 @@ function_instance::read_function_instance (function_instance_stack *stack,
unsigned num_targets = gcov_read_unsigned ();
unsigned num_targets = gcov_read_unsigned ();
gcov_type count = gcov_read_counter ();
gcov_type count = gcov_read_counter ();
s->pos_counts[offset].count = count;
s->pos_counts[offset].count = count;
afdo_profile_info->sum_max = std::max (afdo_profile_info->sum_max,
count);
for (unsigned j = 0; j < stack->length (); j++)
for (unsigned j = 0; j < stack->length (); j++)
(*stack)[j]->total_count_ += count;
(*stack)[j]->total_count_ += count;
for (unsigned j = 0; j < num_targets; j++)
for (unsigned j = 0; j < num_targets; j++)
@@ -718,20 +775,22 @@ autofdo_source_profile::get_function_instance_by_decl (tree decl) const
in INFO and return true; otherwise return false. */
in INFO and return true; otherwise return false. */
bool
bool
autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
autofdo_source_profile::get_count_info (gimple *stmt, count_info *info,
cgraph_node *node) const
{
{
return get_count_info (gimple_location (stmt), info);
return get_count_info (gimple_location (stmt), info, node);
}
}
bool
bool
autofdo_source_profile::get_count_info (location_t gimple_loc,
autofdo_source_profile::get_count_info (location_t gimple_loc,
count_info *info) const
count_info *info,
cgraph_node *node) const
{
{
if (LOCATION_LOCUS (gimple_loc) == cfun->function_end_locus)
if (LOCATION_LOCUS (gimple_loc) == cfun->function_end_locus)
return false;
return false;
inline_stack stack;
inline_stack stack;
get_inline_stack (gimple_loc, &stack);
get_inline_stack_in_node (gimple_loc, &stack, node);
if (stack.length () == 0)
if (stack.length () == 0)
return false;
return false;
function_instance *s = get_function_instance_by_inline_stack (stack);
function_instance *s = get_function_instance_by_inline_stack (stack);
@@ -745,7 +804,8 @@ autofdo_source_profile::get_count_info (location_t gimple_loc,
bool
bool
autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
count_info *info)
count_info *info,
cgraph_node *node)
{
{
if (dump_file)
if (dump_file)
{
{
@@ -756,12 +816,12 @@ autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
{
{
if (dump_file)
if (dump_file)
fprintf (dump_file, " good locus\n");
fprintf (dump_file, " bad locus (funciton end)\n");
return false;
return false;
}
}
count_info old_info;
count_info old_info;
get_count_info (stmt, &old_info);
get_count_info (stmt, &old_info, node);
gcov_type total = 0;
gcov_type total = 0;
for (icall_target_map::const_iterator iter = old_info.targets.begin ();
for (icall_target_map::const_iterator iter = old_info.targets.begin ();
iter != old_info.targets.end (); ++iter)
iter != old_info.targets.end (); ++iter)
@@ -784,7 +844,7 @@ autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
}
}
inline_stack stack;
inline_stack stack;
get_inline_stack (gimple_location (stmt), &stack);
get_inline_stack_in_node (gimple_location (stmt), &stack, node);
if (stack.length () == 0)
if (stack.length () == 0)
{
{
if (dump_file)
if (dump_file)
@@ -795,24 +855,46 @@ autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
if (s == NULL)
if (s == NULL)
{
{
if (dump_file)
if (dump_file)
fprintf (dump_file, " function not found in inline stack\n");
{
fprintf (dump_file, " function not found in inline stack:");
dump_inline_stack (dump_file, &stack);
}
return false;
return false;
}
}
icall_target_map map;
icall_target_map map;
if (s->find_icall_target_map (stmt, &map) == 0)
if (s->find_icall_target_map (node ? node->decl
: current_function_decl,
stmt, &map) == 0)
{
{
if (dump_file)
if (dump_file)
fprintf (dump_file, " no target map\n");
{
fprintf (dump_file, " no target map for stack: ");
dump_inline_stack (dump_file, &stack);
}
return false;
return false;
}
}
for (icall_target_map::const_iterator iter = map.begin ();
for (icall_target_map::const_iterator iter = map.begin ();
iter != map.end (); ++iter)
iter != map.end (); ++iter)
info->targets[iter->first] = iter->second;
info->targets[iter->first] = iter->second;
if (dump_file)
if (dump_file)
fprintf (dump_file, " looks good\n");
{
fprintf (dump_file, " looks good; stack:");
dump_inline_stack (dump_file, &stack);
}
return true;
return true;
}
}
void
autofdo_source_profile::remove_icall_target (cgraph_edge *e)
{
autofdo::inline_stack stack;
autofdo::get_inline_stack_in_node (gimple_location (e->call_stmt),
&stack, e->caller);
autofdo::function_instance *s
= get_function_instance_by_inline_stack (stack);
s->remove_icall_target (e->caller->decl, e->call_stmt);
}
/* Find total count of the callee of EDGE. */
/* Find total count of the callee of EDGE. */
gcov_type
gcov_type
@@ -822,18 +904,8 @@ autofdo_source_profile::get_callsite_total_count (
inline_stack stack;
inline_stack stack;
stack.safe_push (std::make_pair (edge->callee->decl, 0));
stack.safe_push (std::make_pair (edge->callee->decl, 0));
cgraph_edge *e = edge;
get_inline_stack_in_node (gimple_location (edge->call_stmt), &stack,
do
edge->caller);
{
get_inline_stack (gimple_location (e->call_stmt), &stack,
e->caller->decl);
/* If caller is inlined, continue building stack. */
if (!e->caller->inlined_to)
e = NULL;
else
e = e->caller->callers;
}
while (e);
function_instance *s = get_function_instance_by_inline_stack (stack);
function_instance *s = get_function_instance_by_inline_stack (stack);
if (s == NULL
if (s == NULL
@@ -1000,19 +1072,35 @@ read_profile (void)
decide if it needs to promote and inline. */
decide if it needs to promote and inline. */
static bool
static bool
afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
afdo_indirect_call (gcall *stmt, const icall_target_map &map,
bool transform)
bool transform, cgraph_edge *indirect_edge)
{
{
gimple *gs = gsi_stmt (*gsi);
tree callee;
tree callee;
if (map.size () == 0)
if (map.size () == 0)
return false;
{
gcall *stmt = dyn_cast <gcall *> (gs);
if (dump_file)
if (!stmt
fprintf (dump_file, "No targets found\n");
|| gimple_call_internal_p (stmt)
return false;
|| gimple_call_fndecl (stmt) != NULL_TREE)
}
return false;
if (!stmt)
{
if (dump_file)
fprintf (dump_file, "No call statement\n");
return false;
}
if (gimple_call_internal_p (stmt))
{
if (dump_file)
fprintf (dump_file, "Internal call\n");
return false;
}
if (gimple_call_fndecl (stmt) != NULL_TREE)
{
if (dump_file)
fprintf (dump_file, "Call is already direct\n");
return false;
}
gcov_type total = 0;
gcov_type total = 0;
icall_target_map::const_iterator max_iter = map.end ();
icall_target_map::const_iterator max_iter = map.end ();
@@ -1026,38 +1114,47 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
}
}
struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
get_identifier (afdo_string_table->get_name (max_iter->first)));
get_identifier (afdo_string_table->get_name (max_iter->first)));
if (direct_call == NULL || !direct_call->profile_id)
if (direct_call == NULL)
return false;
{
if (dump_file)
fprintf (dump_file, "Failed to find cgraph node for %s\n",
afdo_string_table->get_name (max_iter->first));
return false;
}
callee = gimple_call_fn (stmt);
callee = gimple_call_fn (stmt);
histogram_value hist = gimple_alloc_histogram_value (
cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
hist->n_counters = 4;
hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
gimple_add_histogram_value (cfun, stmt, hist);
/* Total counter */
hist->hvalue.counters[0] = total;
/* Number of value/counter pairs */
hist->hvalue.counters[1] = 1;
/* Value */
hist->hvalue.counters[2] = direct_call->profile_id;
/* Counter */
hist->hvalue.counters[3] = max_iter->second;
if (!transform)
if (!transform)
return false;
{
if (!direct_call->profile_id)
cgraph_node* current_function_node = cgraph_node::get (current_function_decl);
{
if (dump_file)
/* If the direct call is a recursive call, don't promote it since
fprintf (dump_file, "No profile id\n");
we are not set up to inline recursive calls at this stage. */
return false;
if (direct_call == current_function_node)
}
return false;
histogram_value hist = gimple_alloc_histogram_value (
cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
struct cgraph_edge *indirect_edge
hist->n_counters = 4;
= current_function_node->get_edge (stmt);
hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
gimple_add_histogram_value (cfun, stmt, hist);
/* Total counter */
hist->hvalue.counters[0] = total;
/* Number of value/counter pairs */
hist->hvalue.counters[1] = 1;
/* Value */
hist->hvalue.counters[2] = direct_call->profile_id;
/* Counter */
hist->hvalue.counters[3] = max_iter->second;
if (!direct_call->profile_id)
{
if (dump_file)
fprintf (dump_file, "Histogram attached\n");
return false;
}
return false;
}
if (dump_file)
if (dump_file)
{
{
@@ -1067,16 +1164,10 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
}
}
if (direct_call == NULL)
if (!direct_call->definition)
{
if (dump_file)
fprintf (dump_file, " not transforming\n");
return false;
}
if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
{
{
if (dump_file)
if (dump_file)
fprintf (dump_file, " no declaration\n");
fprintf (dump_file, " no definition available\n");
return false;
return false;
}
}
@@ -1087,13 +1178,9 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
fprintf (dump_file, "\n");
fprintf (dump_file, "\n");
}
}
struct cgraph_edge *new_edge
indirect_edge->make_speculative
= indirect_edge->make_speculative
(direct_call,
(direct_call,
gimple_bb (stmt)->count.apply_scale (99, 100));
gimple_bb (stmt)->count.apply_scale (99, 100));
cgraph_edge::redirect_call_stmt_to_callee (new_edge);
gimple_remove_histogram_value (cfun, stmt, hist);
inline_call (new_edge, true, NULL, NULL, false);
return true;
return true;
}
}
@@ -1101,10 +1188,10 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
histograms and adds them to list VALUES. */
histograms and adds them to list VALUES. */
static bool
static bool
afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
afdo_vpt (gcall *gs, const icall_target_map &map,
bool transform)
bool transform, cgraph_edge *indirect_edge)
{
{
return afdo_indirect_call (gsi, map, transform);
return afdo_indirect_call (gs, map, transform, indirect_edge);
}
}
typedef std::set<basic_block> bb_set;
typedef std::set<basic_block> bb_set;
@@ -1150,7 +1237,7 @@ update_count_by_afdo_count (profile_count *count, gcov_type c)
Return TRUE if BB is annotated. */
Return TRUE if BB is annotated. */
static bool
static bool
afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
afdo_set_bb_count (basic_block bb)
{
{
gimple_stmt_iterator gsi;
gimple_stmt_iterator gsi;
gcov_type max_count = 0;
gcov_type max_count = 0;
@@ -1163,22 +1250,26 @@ afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
count_info info;
count_info info;
gimple *stmt = gsi_stmt (gsi);
gimple *stmt = gsi_stmt (gsi);
if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
continue;
continue;
if (afdo_source_profile->get_count_info (stmt, &info))
if (afdo_source_profile->get_count_info (stmt, &info))
{
{
if (info.count > max_count)
if (info.count > max_count)
max_count = info.count;
max_count = info.count;
if (dump_file && info.count)
if (dump_file && info.count)
{
{
fprintf (dump_file, " count %" PRIu64 " in stmt: ",
fprintf (dump_file, " count %" PRIu64 " in stmt: ",
(int64_t)info.count);
(int64_t)info.count);
print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
}
}
has_annotated = true;
has_annotated = true;
if (info.targets.size () > 0
gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
&& promoted.find (stmt) == promoted.end ())
/* TODO; if inlined early and indirect call was not optimized out,
afdo_vpt (&gsi, info.targets, false);
we will end up speculating again. Early inliner should remove
}
all targets for edges it speculated into safely. */
if (call
&& info.targets.size () > 0)
afdo_vpt (call, info.targets, false, NULL);
}
}
}
if (!has_annotated)
if (!has_annotated)
@@ -1899,76 +1990,10 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
}
}
}
}
/* Perform value profile transformation using AutoFDO profile. Add the
/* Annotate auto profile to the control flow graph. */
promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
indirect call promoted. */
static bool
afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
{
basic_block bb;
if (afdo_source_profile->get_function_instance_by_decl (
current_function_decl) == NULL)
return false;
compute_fn_summary (cgraph_node::get (current_function_decl), true);
bool has_vpt = false;
FOR_EACH_BB_FN (bb, cfun)
{
if (!has_indirect_call (bb))
continue;
gimple_stmt_iterator gsi;
gcov_type bb_count = 0;
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
count_info info;
gimple *stmt = gsi_stmt (gsi);
if (afdo_source_profile->get_count_info (stmt, &info))
bb_count = MAX (bb_count, info.count);
}
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
/* IC_promotion and early_inline_2 is done in multiple iterations.
No need to promoted the stmt if its in promoted_stmts (means
it is already been promoted in the previous iterations). */
if ((!stmt) || gimple_call_fn (stmt) == NULL
|| TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
|| promoted_stmts->find (stmt) != promoted_stmts->end ())
continue;
count_info info;
afdo_source_profile->get_count_info (stmt, &info);
info.count = bb_count;
if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
{
/* Promote the indirect call and update the promoted_stmts. */
promoted_stmts->insert (stmt);
if (afdo_vpt (&gsi, info.targets, true))
has_vpt = true;
}
}
}
if (has_vpt)
{
unsigned todo = optimize_inline_calls (current_function_decl);
if (todo & TODO_update_ssa_any)
update_ssa (TODO_update_ssa);
return true;
}
return false;
}
/* Annotate auto profile to the control flow graph. Do not annotate value
profile for stmts in PROMOTED_STMTS. */
static void
static void
afdo_annotate_cfg (const stmt_set &promoted_stmts)
afdo_annotate_cfg (void)
{
{
basic_block bb;
basic_block bb;
bb_set annotated_bb;
bb_set annotated_bb;
@@ -1997,7 +2022,7 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
bool profile_found = head_count > 0;
bool profile_found = head_count > 0;
FOR_EACH_BB_FN (bb, cfun)
FOR_EACH_BB_FN (bb, cfun)
{
{
if (afdo_set_bb_count (bb, promoted_stmts))
if (afdo_set_bb_count (bb))
{
{
if (bb->count.quality () == AFDO)
if (bb->count.quality () == AFDO)
{
{
@@ -2125,45 +2150,8 @@ auto_profile (void)
push_cfun (DECL_STRUCT_FUNCTION (node->decl));
push_cfun (DECL_STRUCT_FUNCTION (node->decl));
/* First do indirect call promotion and early inline to make the
unsigned int todo = early_inline ();
IR match the profiled binary before actual annotation.
autofdo::afdo_annotate_cfg ();
This is needed because an indirect call might have been promoted
and inlined in the profiled binary. If we do not promote and
inline these indirect calls before annotation, the profile for
these promoted functions will be lost.
e.g. foo() --indirect_call--> bar()
In profiled binary, the callsite is promoted and inlined, making
the profile look like:
foo: {
loc_foo_1: count_1
bar@loc_foo_2: {
loc_bar_1: count_2
loc_bar_2: count_3
}
}
Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
If we perform annotation on it, the profile inside bar@loc_foo2
will be wasted.
To avoid this, we promote loc_foo_2 and inline the promoted bar
function before annotation, so the profile inside bar@loc_foo2
will be useful. */
autofdo::stmt_set promoted_stmts;
unsigned int todo = 0;
for (int i = 0; i < 10; i++)
{
if (!flag_value_profile_transformations
|| !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
break;
todo |= early_inline ();
}
todo |= early_inline ();
autofdo::afdo_annotate_cfg (promoted_stmts);
compute_function_frequency ();
compute_function_frequency ();
/* Local pure-const may imply need to fixup the cfg. */
/* Local pure-const may imply need to fixup the cfg. */
@@ -2225,6 +2213,14 @@ afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
temporarily set it to afdo_profile_info to calculate hotness. */
temporarily set it to afdo_profile_info to calculate hotness. */
profile_info = autofdo::afdo_profile_info;
profile_info = autofdo::afdo_profile_info;
is_hot = maybe_hot_count_p (NULL, pcount);
is_hot = maybe_hot_count_p (NULL, pcount);
if (dump_file)
{
fprintf (dump_file, "Call %s -> %s has %s afdo profile count ",
edge->caller->dump_name (), edge->callee->dump_name (),
is_hot ? "hot" : "cold");
pcount.dump (dump_file);
fprintf (dump_file, "\n");
}
profile_info = saved_profile_info;
profile_info = saved_profile_info;
return is_hot;
return is_hot;
}
}
@@ -2232,6 +2228,78 @@ afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
return false;
return false;
}
}
/* Do indirect call promotion during early inlining to make the
IR match the profiled binary before actual annotation.
This is needed because an indirect call might have been promoted
and inlined in the profiled binary. If we do not promote and
inline these indirect calls before annotation, the profile for
these promoted functions will be lost.
e.g. foo() --indirect_call--> bar()
In profiled binary, the callsite is promoted and inlined, making
the profile look like:
foo: {
loc_foo_1: count_1
bar@loc_foo_2: {
loc_bar_1: count_2
loc_bar_2: count_3
}
}
Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
If we perform annotation on it, the profile inside bar@loc_foo2
will be wasted.
To avoid this, we promote loc_foo_2 and inline the promoted bar
function before annotation, so the profile inside bar@loc_foo2
will be useful. */
bool
afdo_vpt_for_early_inline (cgraph_node *node)
{
if (!node->indirect_calls)
return false;
bool changed = false;
cgraph_node *outer = node->inlined_to ? node->inlined_to : node;
if (autofdo::afdo_source_profile->get_function_instance_by_decl
(outer->decl) == NULL)
return false;
for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
{
gcov_type bb_count = 0;
autofdo::count_info info;
basic_block bb = gimple_bb (e->call_stmt);
/* TODO: This is quadratic; cache the value. */
for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
!gsi_end_p (gsi); gsi_next (&gsi))
{
autofdo::count_info info;
gimple *stmt = gsi_stmt (gsi);
if (autofdo::afdo_source_profile->get_count_info (stmt, &info, node))
bb_count = MAX (bb_count, info.count);
}
autofdo::afdo_source_profile->get_count_info (e->call_stmt, &info, node);
info.count = bb_count;
if (!autofdo::afdo_source_profile->update_inlined_ind_target
(e->call_stmt, &info, node))
continue;
changed |= autofdo::afdo_vpt (e->call_stmt, info.targets, true, e);
}
return changed;
}
/* If speculation used during early inline, remove the target
so we do not speculate the indirect edge again during afdo pass. */
void
remove_afdo_speculative_target (cgraph_edge *e)
{
autofdo::afdo_source_profile->remove_icall_target (e);
}
namespace
namespace
{
{
@@ -28,4 +28,11 @@ extern void end_auto_profile (void);
/* Returns TRUE if EDGE is hot enough to be inlined early. */
/* Returns TRUE if EDGE is hot enough to be inlined early. */
extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
/* Try to turn indirect calls into speculative calls for early inlining. */
extern bool afdo_vpt_for_early_inline (cgraph_node *node);
/* If speculation was early inlined, remove it from profile data so we
do not repeat it later. */
extern void remove_afdo_speculative_target (cgraph_edge *);
#endif /* AUTO_PROFILE_H */
#endif /* AUTO_PROFILE_H */
@@ -3113,29 +3113,39 @@ early_inline_small_functions (struct cgraph_node *node)
and inlining seems useful. That is there are enough samples in the callee
and inlining seems useful. That is there are enough samples in the callee
function.
function.
Unlike early inlining, we inline recursively.
Unlike early inlining, we inline recursively. Profile data is also used
TODO: We should also integrate VPT. */
to produce speculative calls which we then inline. In the case some
speculatin was introduced, set SPECULATIVE_CALLS. */
static bool
static bool
inline_functions_by_afdo (struct cgraph_node *node)
inline_functions_by_afdo (struct cgraph_node *node, bool *speculative_calls)
{
{
if (!flag_auto_profile)
if (!flag_auto_profile)
return false;
return false;
struct cgraph_edge *e;
struct cgraph_edge *e;
bool inlined = false;
bool inlined = false;
for (e = node->callees; e; e = e->next_callee)
*speculative_calls |= afdo_vpt_for_early_inline (node);
cgraph_edge *next;
for (e = node->callees; e; e = next)
{
{
struct cgraph_node *callee = e->callee->ultimate_alias_target ();
next = e->next_callee;
if (!e->inline_failed)
if (!e->inline_failed)
{
{
inlined |= inline_functions_by_afdo (e->callee);
inlined |= inline_functions_by_afdo (e->callee, speculative_calls);
continue;
continue;
}
}
if (!afdo_callsite_hot_enough_for_early_inline (e))
if (!afdo_callsite_hot_enough_for_early_inline (e))
continue;
{
/* If we do not want to inline, remove the speculation. */
if (e->speculative)
cgraph_edge::resolve_speculation (e);
continue;
}
struct cgraph_node *callee = e->callee->ultimate_alias_target ();
if (callee->definition
if (callee->definition
&& !ipa_fn_summaries->get (callee))
&& !ipa_fn_summaries->get (callee))
compute_fn_summary (callee, true);
compute_fn_summary (callee, true);
@@ -3147,6 +3157,9 @@ inline_functions_by_afdo (struct cgraph_node *node)
"Not inlining %C -> %C using auto-profile, %s.",
"Not inlining %C -> %C using auto-profile, %s.",
e->caller, e->callee,
e->caller, e->callee,
cgraph_inline_failed_string (e->inline_failed));
cgraph_inline_failed_string (e->inline_failed));
/* If we do not want to inline, remove the speculation. */
if (e->speculative)
cgraph_edge::resolve_speculation (e);
continue;
continue;
}
}
/* We can handle recursive inlining by first producing
/* We can handle recursive inlining by first producing
@@ -3158,6 +3171,9 @@ inline_functions_by_afdo (struct cgraph_node *node)
"Not inlining %C recursively"
"Not inlining %C recursively"
" using auto-profile.\n",
" using auto-profile.\n",
e->callee);
e->callee);
/* If we do not want to inline, remove the speculation. */
if (e->speculative)
cgraph_edge::resolve_speculation (e);
continue;
continue;
}
}
@@ -3173,8 +3189,10 @@ inline_functions_by_afdo (struct cgraph_node *node)
"Inlining using auto-profile %C into %C.\n",
"Inlining using auto-profile %C into %C.\n",
callee, e->caller);
callee, e->caller);
}
}
if (e->speculative)
remove_afdo_speculative_target (e);
inline_call (e, true, NULL, NULL, false);
inline_call (e, true, NULL, NULL, false);
inlined |= inline_functions_by_afdo (e->callee);
inlined |= inline_functions_by_afdo (e->callee, speculative_calls);
inlined = true;
inlined = true;
}
}
@@ -3262,10 +3280,20 @@ early_inliner (function *fun)
param_early_inliner_max_iterations))
param_early_inliner_max_iterations))
{
{
bool inlined = early_inline_small_functions (node);
bool inlined = early_inline_small_functions (node);
inlined |= inline_functions_by_afdo (node);
bool speculative_calls = false;
inlined |= inline_functions_by_afdo (node, &speculative_calls);
if (!inlined)
if (!inlined)
break;
break;
timevar_push (TV_INTEGRATION);
timevar_push (TV_INTEGRATION);
if (speculative_calls)
{
cgraph_edge *next;
for (cgraph_edge *e = node->callees; e; e = next)
{
next = e->next_callee;
cgraph_edge::redirect_call_stmt_to_callee (e);
}
}
todo |= optimize_inline_calls (current_function_decl);
todo |= optimize_inline_calls (current_function_decl);
/* Technically we ought to recompute inline parameters so the new
/* Technically we ought to recompute inline parameters so the new
@@ -13,20 +13,26 @@ struct wrapptr
int test (struct wrapptr *p)
int test (struct wrapptr *p)
{
{
int s = 0;
int s = 0;
int (*ret)(int) = p->ret;
for (int pos = 0; pos < 1000; pos++)
for (int pos = 0; pos < 1000; pos++)
s+p->ret(pos);
ret(pos);
if (s)
if (s)
__builtin_printf ("sum error\n");
__builtin_printf ("sum error\n");
}
}
int main()
int main()
{
{
struct wrapptr p={reta};
for (int i = 0; i < 10000; i++)
for (int i = 0; i < 10000; i++)
{
struct wrapptr p={reta};
test(&p);
test(&p);
}
return 0;
return 0;
}
}
/* { dg-final-use-autofdo { scan-tree-dump "Inlining using auto-profile test" "einline"} } */
/* { dg-final-use-autofdo { scan-tree-dump "Inlining using auto-profile test" "einline"} } */
/* { dg-final-use-autofdo { scan-ipa-dump "Checking indirect call -> direct call reta" "afdo"} } */
/* { dg-final-use-autofdo { scan-tree-dump "Checking indirect call -> direct call ret_" "einline"} } */
/* { dg-final-use-autofdo { scan-ipa-dump-times "looks good" 0 "afdo"} } */
/* { dg-final-use-autofdo { scan-tree-dump "looks good" "einline"} } */
/* If we inlined reta->test->main, it will contian array[pos]. */
/* If we inlined reta->test->main, it will contian array[pos]. */
/* { dg-final-use-autofdo { scan-ipa-dump "array.pos_" "afdo"} } */
/* { dg-final-use-autofdo { scan-tree-dump "array.pos_" "einline"} } */
@@ -31,5 +31,5 @@ main (void)
}
}
/* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* add1 .will resolve by ipa-profile" "profile"} } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* add1 .will resolve by ipa-profile" "profile"} } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* sub1 .will resolve by ipa-profile" "profile"} } */
/* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* sub1 .will resolve by ipa-profile" "profile"} } */
/* { dg-final-use-autofdo { scan-ipa-dump "Inlining add1/. into main/" "afdo"} } */
/* { dg-final-use-autofdo { scan-tree-dump "Inlining add1/. into main/" "einline"} } */
/* { dg-final-use-autofdo { scan-ipa-dump "Inlining sub1/. into main/" "afdo"} } */
/* { dg-final-use-autofdo { scan-tree-dump "Inlining sub1/. into main/" "einline"} } */
@@ -4846,7 +4846,9 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id,
goto egress;
goto egress;
cg_edge = id->dst_node->get_edge (stmt);
cg_edge = id->dst_node->get_edge (stmt);
gcc_checking_assert (cg_edge);
/* Edge should exist and speculations should be resolved at this
stage. */
gcc_checking_assert (cg_edge && !cg_edge->speculative);
/* First, see if we can figure out what function is being called.
/* First, see if we can figure out what function is being called.
If we cannot, then there is no hope of inlining the function. */
If we cannot, then there is no hope of inlining the function. */
if (cg_edge->indirect_unknown_callee)
if (cg_edge->indirect_unknown_callee)