CBMC
sese_regions.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Single-entry, single-exit region analysis
4 
5 Author: Diffblue Ltd.
6 
7 \*******************************************************************/
8 
11 
12 #include <sstream>
13 
14 #include "cfg_dominators.h"
15 #include "natural_loops.h"
16 #include "sese_regions.h"
17 
19 {
20  natural_loopst natural_loops;
21  natural_loops(goto_program);
22 
23  // A single-entry, single-exit region is one whose entry and exit nodes
24  // are in a dominator-postdominator relationship, and which are
25  // *cycle equivalent*, which means that any cycle in the CFG that includes the
26  // entry must include the exit and vice versa.
27 
28  // Because we have a natural-loop analysis but not a general cycle analysis,
29  // we conservatively approximate the latter condition by checking that there
30  // are no loops with multiple backedges (this implies there are effectively
31  // two loops being described as one), and that there are no backedges not
32  // associated with a natural loop (indicating a cycle with multiple entry
33  // edges). Those conditions being met, it suffices to check that the would-be
34  // region has its entry and exit nodes in the same natural loops; without them
35  // we conservatively decline to identify any regions.
36 
37  for(auto it = goto_program.instructions.begin();
38  it != goto_program.instructions.end();
39  ++it)
40  {
41  for(auto predecessor : it->incoming_edges)
42  {
43  if(
44  predecessor->location_number > it->location_number &&
45  !natural_loops.is_loop_header(it))
46  {
47  // No loop header associated with this backedge; conservatively decline
48  // to diagnose any SESE regions.
49  return;
50  }
51  }
52  }
53 
54  for(const auto &loop : natural_loops.loop_map)
55  {
56  std::size_t backedge_count = std::count_if(
57  loop.first->incoming_edges.begin(),
58  loop.first->incoming_edges.end(),
59  [&](const goto_programt::const_targett &predecessor) {
60  return loop.first->location_number < predecessor->location_number;
61  });
62  if(backedge_count > 1)
63  {
64  // Loop with multiple backedges; conservatively decline to diagnose any
65  // SESE regions.
66  return;
67  }
68  }
69 
70  // All checks passed, let's find some regions!
71  compute_sese_regions(goto_program, natural_loops);
72 }
73 
74 typedef std::
75  unordered_map<goto_programt::const_targett, std::size_t, const_target_hash>
77 
82 {
83  innermost_loop_mapt innermost_loop_ids;
84  std::vector<std::size_t> loop_size_by_id;
85 
86  std::size_t loop_id = 0;
87  for(const auto &header_and_loop : natural_loops.loop_map)
88  {
89  const auto &loop = header_and_loop.second;
90  loop_size_by_id.push_back(loop.size());
91 
92  for(const auto &instruction : loop)
93  {
94  auto emplace_result = innermost_loop_ids.emplace(instruction, loop_id);
95  if(!emplace_result.second)
96  {
97  // Is the existing loop for this instruction larger than this one? If so
98  // this is the inner of the two loops.
99  if(loop_size_by_id.at(emplace_result.first->second) > loop.size())
100  emplace_result.first->second = loop_id;
101  }
102  }
103 
104  ++loop_id;
105  }
106 
107  return innermost_loop_ids;
108 }
109 
110 static long get_innermost_loop(
111  const innermost_loop_mapt &innermost_loops,
112  const goto_programt::const_targett &target)
113 {
114  auto findit = innermost_loops.find(target);
115  if(findit == innermost_loops.end())
116  return -1;
117  else
118  return (long)findit->second;
119 }
120 
122  const goto_programt &goto_program,
123  const natural_loopst &natural_loops)
124 {
125  const auto &dominators = natural_loops.get_dominator_info();
126  cfg_post_dominatorst postdominators;
127  postdominators(goto_program);
128 
129  auto innermost_loop_ids = compute_innermost_loop_ids(natural_loops);
130 
131  for(auto it = goto_program.instructions.begin();
132  it != goto_program.instructions.end();
133  ++it)
134  {
135  // Only look for regions starting at nontrivial CFG edges:
136 
137  auto successors = goto_program.get_successors(it);
138  if(
139  successors.size() == 1 &&
140  (*successors.begin())->incoming_edges.size() == 1)
141  continue;
142 
143  const auto &instruction_postdoms = postdominators.get_node(it).dominators;
144 
145  // Ideally we would start with the immediate postdominator and walk down,
146  // but our current dominator analysis doesn't make it easy to determine an
147  // immediate dominator.
148 
149  // Ideally I would use `std::optional<std::size_t>` here, but it triggers a
150  // GCC-5 bug.
151  std::size_t closest_exit_index = dominators.cfg.size();
152  for(const auto &possible_exit : instruction_postdoms)
153  {
154  const auto possible_exit_index = dominators.get_node_index(possible_exit);
155  const auto &possible_exit_node = dominators.cfg[possible_exit_index];
156  const auto possible_exit_dominators =
157  possible_exit_node.dominators.size();
158 
159  if(
160  it != possible_exit && dominators.dominates(it, possible_exit_node) &&
161  get_innermost_loop(innermost_loop_ids, it) ==
162  get_innermost_loop(innermost_loop_ids, possible_exit))
163  {
164  // If there are several candidate region exit nodes, prefer the one with
165  // the least dominators, i.e. the closest to the region entrance.
166  if(
167  closest_exit_index == dominators.cfg.size() ||
168  dominators.cfg[closest_exit_index].dominators.size() >
169  possible_exit_dominators)
170  {
171  closest_exit_index = possible_exit_index;
172  }
173  }
174  }
175 
176  if(closest_exit_index < dominators.cfg.size())
177  {
178  auto emplace_result =
179  sese_regions.emplace(it, dominators.cfg[closest_exit_index].PC);
180  INVARIANT(
181  emplace_result.second, "should only visit each region entry once");
182  }
183  }
184 }
185 
186 static std::string trimmed_last_line(const std::string &input)
187 {
188  auto rhs_trim_idx = input.size() - 1;
189  while(rhs_trim_idx > 0 && isspace(input[rhs_trim_idx]))
190  --rhs_trim_idx;
191 
192  auto lhs_trim_idx = input.rfind('\n', rhs_trim_idx);
193  if(lhs_trim_idx == std::string::npos)
194  lhs_trim_idx = 0;
195 
196  while(lhs_trim_idx < input.size() && isspace(input[lhs_trim_idx]))
197  ++lhs_trim_idx;
198 
199  return input.substr(lhs_trim_idx, (rhs_trim_idx - lhs_trim_idx) + 1);
200 }
201 
202 typedef std::
203  unordered_map<goto_programt::const_targett, std::size_t, const_target_hash>
205 
206 static std::string instruction_ordinals(
207  const goto_programt::const_targett &instruction,
209  &program_relative_instruction_indices)
210 {
211  return "(" +
212  std::to_string(program_relative_instruction_indices.at(instruction)) +
213  ", " + std::to_string(instruction->location_number) + ")";
214 }
215 
216 static std::string brief_instruction_string(
217  const goto_programt &goto_program,
218  const goto_programt::const_targett &instruction,
219  const namespacet &ns,
221  &program_relative_instruction_indices)
222 {
223  std::ostringstream ostr;
224  instruction->output(ostr);
225  return instruction_ordinals(
226  instruction, program_relative_instruction_indices) +
227  " " + trimmed_last_line(ostr.str());
228 }
229 
231  std::ostream &out,
232  const goto_programt &goto_program,
233  const namespacet &ns) const
234 {
235  program_relative_instruction_indicest program_relative_instruction_indices;
236  std::size_t next_index = 0;
237  for(auto it = goto_program.instructions.begin();
238  it != goto_program.instructions.end();
239  ++it, ++next_index)
240  {
241  program_relative_instruction_indices.emplace(it, next_index);
242  }
243  out << "Function contains " << sese_regions.size()
244  << " single-entry, single-exit regions:\n";
245  for(const auto &entry_exit : sese_regions)
246  {
247  out << "Region starting at "
249  goto_program,
250  entry_exit.first,
251  ns,
252  program_relative_instruction_indices)
253  << " ends at "
255  goto_program,
256  entry_exit.second,
257  ns,
258  program_relative_instruction_indices)
259  << "\n";
260  }
261 }
Compute dominators for CFG of goto_function.
const cfgt::nodet & get_node(const T &program_point) const
Get the graph node (which gives dominators, predecessors and successors) for program_point.
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
instructionst::const_iterator const_targett
Definition: goto_program.h:615
std::list< Target > get_successors(Target target) const
Get control-flow successors of a given instruction.
loop_mapt loop_map
Definition: loop_analysis.h:88
bool is_loop_header(const T instruction) const
Returns true if instruction is the header of any loop.
Definition: loop_analysis.h:93
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
const cfg_dominators_templatet< P, T, false > & get_dominator_info() const
Definition: natural_loops.h:60
A concretized version of natural_loops_templatet<const goto_programt, goto_programt::const_targett>
Definition: natural_loops.h:88
void operator()(const goto_programt &goto_program)
void compute_sese_regions(const goto_programt &goto_program, const natural_loopst &natural_loops)
void output(std::ostream &out, const goto_programt &goto_program, const namespacet &ns) const
std::unordered_map< goto_programt::const_targett, goto_programt::const_targett, const_target_hash > sese_regions
Definition: sese_regions.h:39
int isspace(int c)
Definition: ctype.c:80
Compute natural loops in a goto_function.
static std::string instruction_ordinals(const goto_programt::const_targett &instruction, const program_relative_instruction_indicest &program_relative_instruction_indices)
static long get_innermost_loop(const innermost_loop_mapt &innermost_loops, const goto_programt::const_targett &target)
std::unordered_map< goto_programt::const_targett, std::size_t, const_target_hash > innermost_loop_mapt
static std::string brief_instruction_string(const goto_programt &goto_program, const goto_programt::const_targett &instruction, const namespacet &ns, const program_relative_instruction_indicest &program_relative_instruction_indices)
static innermost_loop_mapt compute_innermost_loop_ids(const natural_loopst &natural_loops)
Builds a map from instructions to the smallest (and therefore innermost) loop they are a member of.
static std::string trimmed_last_line(const std::string &input)
std::unordered_map< goto_programt::const_targett, std::size_t, const_target_hash > program_relative_instruction_indicest
Single-entry, single-exit region analysis.
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.