CBMC
ai.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Abstract Interpretation
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "ai.h"
13 
14 #include <memory>
15 #include <sstream>
16 #include <type_traits>
17 
18 #include <util/invariant.h>
19 
21  const namespacet &ns,
22  const goto_functionst &goto_functions,
23  std::ostream &out) const
24 {
25  for(const auto &gf_entry : goto_functions.function_map)
26  {
27  if(gf_entry.second.body_available())
28  {
29  out << "////\n";
30  out << "//// Function: " << gf_entry.first << "\n";
31  out << "////\n";
32  out << "\n";
33 
34  output(ns, gf_entry.first, gf_entry.second.body, out);
35  }
36  }
37 }
38 
40  const namespacet &ns,
41  const irep_idt &function_id,
42  const goto_programt &goto_program,
43  std::ostream &out) const
44 {
45  (void)function_id; // unused parameter
46 
47  forall_goto_program_instructions(i_it, goto_program)
48  {
49  out << "**** " << i_it->location_number << " " << i_it->source_location()
50  << "\n";
51 
52  abstract_state_before(i_it)->output(out, *this, ns);
53  out << "\n";
54  #if 1
55  i_it->output(out);
56  out << "\n";
57  #endif
58  }
59 }
60 
62  const namespacet &ns,
63  const goto_functionst &goto_functions) const
64 {
65  json_objectt result;
66 
67  for(const auto &gf_entry : goto_functions.function_map)
68  {
69  if(gf_entry.second.body_available())
70  {
71  result[id2string(gf_entry.first)] =
72  output_json(ns, gf_entry.first, gf_entry.second.body);
73  }
74  else
75  {
76  result[id2string(gf_entry.first)] = json_arrayt();
77  }
78  }
79 
80  return std::move(result);
81 }
82 
84  const namespacet &ns,
85  const irep_idt &function_id,
86  const goto_programt &goto_program) const
87 {
88  (void)function_id; // unused parameter
89 
90  json_arrayt contents;
91 
92  forall_goto_program_instructions(i_it, goto_program)
93  {
94  // Ideally we need output_instruction_json
95  std::ostringstream out;
96  i_it->output(out);
97 
98  json_objectt location{
99  {"locationNumber", json_numbert(std::to_string(i_it->location_number))},
100  {"sourceLocation", json_stringt(i_it->source_location().as_string())},
101  {"abstractState", abstract_state_before(i_it)->output_json(*this, ns)},
102  {"instruction", json_stringt(out.str())}};
103 
104  contents.push_back(std::move(location));
105  }
106 
107  return std::move(contents);
108 }
109 
111  const namespacet &ns,
112  const goto_functionst &goto_functions) const
113 {
114  xmlt program("program");
115 
116  for(const auto &gf_entry : goto_functions.function_map)
117  {
118  xmlt function(
119  "function",
120  {{"name", id2string(gf_entry.first)},
121  {"body_available", gf_entry.second.body_available() ? "true" : "false"}},
122  {});
123 
124  if(gf_entry.second.body_available())
125  {
126  function.new_element(
127  output_xml(ns, gf_entry.first, gf_entry.second.body));
128  }
129 
130  program.new_element(function);
131  }
132 
133  return program;
134 }
135 
137  const namespacet &ns,
138  const irep_idt &function_id,
139  const goto_programt &goto_program) const
140 {
141  (void)function_id; // unused parameter
142 
143  xmlt function_body;
144 
145  forall_goto_program_instructions(i_it, goto_program)
146  {
147  xmlt location(
148  "",
149  {{"location_number", std::to_string(i_it->location_number)},
150  {"source_location", i_it->source_location().as_string()}},
151  {abstract_state_before(i_it)->output_xml(*this, ns)});
152 
153  // Ideally we need output_instruction_xml
154  std::ostringstream out;
155  i_it->output(out);
156  location.set_attribute("instruction", out.str());
157 
158  function_body.new_element(location);
159  }
160 
161  return function_body;
162 }
163 
166 {
167  // find the 'entry function'
168 
169  goto_functionst::function_mapt::const_iterator
170  f_it=goto_functions.function_map.find(goto_functions.entry_point());
171 
172  if(f_it!=goto_functions.function_map.end())
173  return entry_state(f_it->second.body);
174 
175  // It is not clear if this is even a well-structured goto_functionst object
176  return nullptr;
177 }
178 
180 {
181  // The first instruction of 'goto_program' is the entry point
182  trace_ptrt p = history_factory->epoch(goto_program.instructions.begin());
183  get_state(p).make_entry();
184  return p;
185 }
186 
188  const irep_idt &function_id,
189  const goto_functionst::goto_functiont &goto_function)
190 {
191  initialize(function_id, goto_function.body);
192 }
193 
194 void ai_baset::initialize(const irep_idt &, const goto_programt &goto_program)
195 {
196  // Domains are created and set to bottom on access.
197  // So we do not need to set them to be bottom before hand.
198 }
199 
200 void ai_baset::initialize(const goto_functionst &goto_functions)
201 {
202  for(const auto &gf_entry : goto_functions.function_map)
203  initialize(gf_entry.first, gf_entry.second);
204 }
205 
207 {
208  // Nothing to do per default
209 }
210 
212 {
213  PRECONDITION(!working_set.empty());
214 
215  static_assert(
216  std::is_same<
217  working_sett,
218  std::set<trace_ptrt, ai_history_baset::compare_historyt>>::value,
219  "begin must return the minimal entry");
220  auto first = working_set.begin();
221 
222  trace_ptrt t = *first;
223 
224  working_set.erase(first);
225 
226  return t;
227 }
228 
230  trace_ptrt start_trace,
231  const irep_idt &function_id,
232  const goto_programt &goto_program,
233  const goto_functionst &goto_functions,
234  const namespacet &ns)
235 {
236  PRECONDITION(start_trace != nullptr);
237 
238  working_sett working_set;
239  put_in_working_set(working_set, start_trace);
240 
241  bool new_data=false;
242 
243  while(!working_set.empty())
244  {
245  trace_ptrt p = get_next(working_set);
246 
247  // goto_program is really only needed for iterator manipulation
248  if(visit(function_id, p, working_set, goto_program, goto_functions, ns))
249  new_data=true;
250  }
251 
252  return new_data;
253 }
254 
256  trace_ptrt start_trace,
257  const goto_functionst &goto_functions,
258  const namespacet &ns)
259 {
260  goto_functionst::function_mapt::const_iterator f_it =
261  goto_functions.function_map.find(goto_functions.entry_point());
262 
263  if(f_it != goto_functions.function_map.end())
264  fixedpoint(start_trace, f_it->first, f_it->second.body, goto_functions, ns);
265 }
266 
268  const irep_idt &function_id,
269  trace_ptrt p,
270  working_sett &working_set,
271  const goto_programt &goto_program,
272  const goto_functionst &goto_functions,
273  const namespacet &ns)
274 {
275  bool new_data=false;
276  locationt l = p->current_location();
278 
279  log.progress() << "ai_baset::visit " << l->location_number << " in "
280  << function_id << messaget::eom;
281 
282  // Function call and end are special cases
283  if(l->is_function_call())
284  {
286  goto_program.get_successors(l).size() == 1,
287  "function calls only have one successor");
288 
290  *(goto_program.get_successors(l).begin()) == std::next(l),
291  "function call successor / return location must be the next instruction");
292 
293  new_data = visit_function_call(
294  function_id, p, working_set, goto_program, goto_functions, ns);
295  }
296  else if(l->is_end_function())
297  {
299  goto_program.get_successors(l).empty(),
300  "The end function instruction should have no successors.");
301 
302  new_data = visit_end_function(
303  function_id, p, working_set, goto_program, goto_functions, ns);
304  }
305  else
306  {
307  // Successors can be empty, for example assume(0).
308  // Successors can contain duplicates, for example GOTO next;
309  for(const auto &to_l : goto_program.get_successors(l))
310  {
311  if(to_l == goto_program.instructions.end())
312  continue;
313 
314  new_data |= visit_edge(
315  function_id,
316  p,
317  function_id,
318  to_l,
320  ns,
321  working_set); // Local steps so no caller history needed
322  }
323  }
324 
325  return new_data;
326 }
327 
329  const irep_idt &function_id,
330  trace_ptrt p,
331  const irep_idt &to_function_id,
332  locationt to_l,
333  trace_ptrt caller_history,
334  const namespacet &ns,
335  working_sett &working_set)
336 {
338  log.progress() << "ai_baset::visit_edge from "
339  << p->current_location()->location_number << " to "
340  << to_l->location_number << "... ";
341 
342  // Has history taught us not to step here...
343  auto next =
344  p->step(to_l, *(storage->abstract_traces_before(to_l)), caller_history);
346  {
347  log.progress() << "blocked by history" << messaget::eom;
348  return false;
349  }
350  else if(next.first == ai_history_baset::step_statust::NEW)
351  {
352  log.progress() << "gives a new history... ";
353  }
354  else
355  {
356  log.progress() << "merges with existing history... ";
357  }
358  trace_ptrt to_p = next.second;
359 
360  // Abstract domains are mutable so we must copy before we transform
361  statet &current = get_state(p);
362 
363  std::unique_ptr<statet> tmp_state(make_temporary_state(current));
364  statet &new_values = *tmp_state;
365 
366  // Apply transformer
367  log.progress() << "applying transformer... ";
368  new_values.transform(function_id, p, to_function_id, to_p, *this, ns);
369 
370  // Expanding a domain means that it has to be analysed again
371  // Likewise if the history insists that it is a new trace
372  // (assuming it is actually reachable).
373  log.progress() << "merging... ";
374  bool return_value = false;
375  if(
376  merge(new_values, p, to_p) ||
377  (next.first == ai_history_baset::step_statust::NEW &&
378  !new_values.is_bottom()))
379  {
380  put_in_working_set(working_set, to_p);
381  log.progress() << "result queued." << messaget::eom;
382  return_value = true;
383  }
384  else
385  {
386  log.progress() << "domain unchanged." << messaget::eom;
387  }
388 
389  // Branch because printing some histories and domains can be expensive
390  // esp. if the output is then just discarded and this is a critical path.
391  if(message_handler.get_verbosity() >= messaget::message_levelt::M_DEBUG)
392  {
393  log.debug() << "p = ";
394  p->output(log.debug());
395  log.debug() << messaget::eom;
396 
397  log.debug() << "current = ";
398  current.output(log.debug(), *this, ns);
399  log.debug() << messaget::eom;
400 
401  log.debug() << "to_p = ";
402  to_p->output(log.debug());
403  log.debug() << messaget::eom;
404 
405  log.debug() << "new_values = ";
406  new_values.output(log.debug(), *this, ns);
407  log.debug() << messaget::eom;
408  }
409 
410  return return_value;
411 }
412 
414  const irep_idt &calling_function_id,
415  trace_ptrt p_call,
416  locationt l_return,
417  const irep_idt &,
418  working_sett &working_set,
419  const goto_programt &,
420  const goto_functionst &,
421  const namespacet &ns)
422 {
424  log.progress() << "ai_baset::visit_edge_function_call from "
425  << p_call->current_location()->location_number << " to "
426  << l_return->location_number << messaget::eom;
427 
428  // The default implementation is not interprocedural
429  // so the effects of the call are approximated but nothing else
430  return visit_edge(
431  calling_function_id,
432  p_call,
433  calling_function_id,
434  l_return,
436  no_caller_history, // Not needed as we are skipping the function call
437  ns,
438  working_set);
439 }
440 
442  const irep_idt &calling_function_id,
443  trace_ptrt p_call,
444  working_sett &working_set,
445  const goto_programt &caller,
446  const goto_functionst &goto_functions,
447  const namespacet &ns)
448 {
449  locationt l_call = p_call->current_location();
450 
451  PRECONDITION(l_call->is_function_call());
452 
454  log.progress() << "ai_baset::visit_function_call at "
455  << l_call->location_number << messaget::eom;
456 
457  locationt l_return = std::next(l_call);
458 
459  // operator() allows analysis of a single goto_program independently
460  // it generates a synthetic goto_functions object for this
461  if(!goto_functions.function_map.empty())
462  {
463  const exprt &callee_expression = l_call->call_function();
464 
465  if(callee_expression.id() == ID_symbol)
466  {
467  const irep_idt &callee_function_id =
468  to_symbol_expr(callee_expression).get_identifier();
469 
470  log.progress() << "Calling " << callee_function_id << messaget::eom;
471 
472  goto_functionst::function_mapt::const_iterator it =
473  goto_functions.function_map.find(callee_function_id);
474 
476  it != goto_functions.function_map.end(),
477  "Function " + id2string(callee_function_id) + "not in function map");
478 
479  const goto_functionst::goto_functiont &callee_fun = it->second;
480 
481  if(callee_fun.body_available())
482  {
484  calling_function_id,
485  p_call,
486  l_return,
487  callee_function_id,
488  working_set,
489  callee_fun.body,
490  goto_functions,
491  ns);
492  }
493  else
494  {
495  // Fall through to the default, body not available, case
496  }
497  }
498  else
499  {
500  // Function pointers are not currently supported and must be removed
502  callee_expression.id() == ID_symbol,
503  "Function pointers and indirect calls must be removed before "
504  "analysis.");
505  }
506  }
507 
508  // If the body is not available then we just do the edge from call to return
509  // in the caller. Domains should over-approximate what the function might do.
510  return visit_edge(
511  calling_function_id,
512  p_call,
513  calling_function_id,
514  l_return,
515  ai_history_baset::no_caller_history, // Would be the same as p_call...
516  ns,
517  working_set);
518 }
519 
521  const irep_idt &function_id,
522  trace_ptrt p,
523  working_sett &working_set,
524  const goto_programt &goto_program,
525  const goto_functionst &goto_functions,
526  const namespacet &ns)
527 {
528  locationt l = p->current_location();
529  PRECONDITION(l->is_end_function());
530 
532  log.progress() << "ai_baset::visit_end_function " << function_id
533  << messaget::eom;
534 
535  // Do nothing
536  return false;
537 }
538 
540  const irep_idt &calling_function_id,
541  trace_ptrt p_call,
542  locationt l_return,
543  const irep_idt &callee_function_id,
544  working_sett &working_set,
545  const goto_programt &callee,
546  const goto_functionst &goto_functions,
547  const namespacet &ns)
548 {
550  log.progress() << "ai_recursive_interproceduralt::visit_edge_function_call"
551  << " from " << p_call->current_location()->location_number
552  << " to " << l_return->location_number << messaget::eom;
553 
554  // This is the edge from call site to function head.
555  {
556  locationt l_begin = callee.instructions.begin();
557 
558  working_sett catch_working_set; // The trace from the next fixpoint
559 
560  // Do the edge from the call site to the beginning of the function
561  bool new_data = visit_edge(
562  calling_function_id,
563  p_call,
564  callee_function_id,
565  l_begin,
567  no_caller_history, // Not needed as p_call already has the info
568  ns,
569  catch_working_set);
570 
571  log.progress() << "Handle " << callee_function_id << " recursively"
572  << messaget::eom;
573 
574  // do we need to do/re-do the fixedpoint of the body?
575  if(new_data)
576  fixedpoint(
577  get_next(catch_working_set),
578  callee_function_id,
579  callee,
580  goto_functions,
581  ns);
582  }
583 
584  // This is the edge from function end to return site.
585  {
586  log.progress() << "Handle return edges" << messaget::eom;
587 
588  // get location at end of the procedure we have called
589  locationt l_end = --callee.instructions.end();
591  l_end->is_end_function(),
592  "The last instruction of a goto_program must be END_FUNCTION");
593 
594  // Find the histories for a location
595  auto traces = storage->abstract_traces_before(l_end);
596 
597  bool new_data = false;
598 
599  // The history used may mean there are multiple histories at the end of the
600  // function. Some of these can be progressed (for example, if the history
601  // tracks paths within functions), some can't be (for example, not all
602  // calling contexts will be appropriate). We rely on the history's step,
603  // called from visit_edge to prune as applicable.
604  for(auto p_end : *traces)
605  {
606  // do edge from end of function to instruction after call
607  const statet &end_state = get_state(p_end);
608 
609  if(!end_state.is_bottom())
610  {
611  // function exit point reachable in that history
612 
613  new_data |= visit_edge(
614  callee_function_id,
615  p_end,
616  calling_function_id,
617  l_return,
618  p_call, // To allow function-local history
619  ns,
620  working_set);
621  }
622  }
623 
624  return new_data;
625  }
626 }
Abstract Interpretation.
goto_programt::const_targett locationt
Definition: ai.h:124
virtual bool visit(const irep_idt &function_id, trace_ptrt p, working_sett &working_set, const goto_programt &goto_program, const goto_functionst &goto_functions, const namespacet &ns)
Perform one step of abstract interpretation from trace t Depending on the instruction type it may com...
Definition: ai.cpp:267
std::unique_ptr< ai_storage_baset > storage
Definition: ai.h:511
bool visit_edge(const irep_idt &function_id, trace_ptrt p, const irep_idt &to_function_id, locationt to_l, trace_ptrt caller_history, const namespacet &ns, working_sett &working_set)
Definition: ai.cpp:328
virtual void output(const namespacet &ns, const irep_idt &function_id, const goto_programt &goto_program, std::ostream &out) const
Output the abstract states for a single function.
Definition: ai.cpp:39
message_handlert & message_handler
Definition: ai.h:521
virtual bool visit_end_function(const irep_idt &function_id, trace_ptrt p, working_sett &working_set, const goto_programt &goto_program, const goto_functionst &goto_functions, const namespacet &ns)
Definition: ai.cpp:520
virtual bool visit_function_call(const irep_idt &function_id, trace_ptrt p_call, working_sett &working_set, const goto_programt &goto_program, const goto_functionst &goto_functions, const namespacet &ns)
Definition: ai.cpp:441
trace_ptrt entry_state(const goto_programt &goto_program)
Set the abstract state of the entry location of a single function to the entry state required by the ...
Definition: ai.cpp:179
virtual bool visit_edge_function_call(const irep_idt &calling_function_id, trace_ptrt p_call, locationt l_return, const irep_idt &callee_function_id, working_sett &working_set, const goto_programt &callee, const goto_functionst &goto_functions, const namespacet &ns)
Definition: ai.cpp:413
virtual void initialize(const irep_idt &function_id, const goto_programt &goto_program)
Initialize all the abstract states for a single function.
Definition: ai.cpp:194
virtual std::unique_ptr< statet > make_temporary_state(const statet &s)
Make a copy of a state.
Definition: ai.h:505
virtual bool fixedpoint(trace_ptrt starting_trace, const irep_idt &function_id, const goto_programt &goto_program, const goto_functionst &goto_functions, const namespacet &ns)
Run the fixedpoint algorithm until it reaches a fixed point.
Definition: ai.cpp:229
ai_history_baset::trace_ptrt trace_ptrt
Definition: ai.h:121
void put_in_working_set(working_sett &working_set, trace_ptrt t)
Definition: ai.h:419
trace_ptrt get_next(working_sett &working_set)
Get the next location from the work queue.
Definition: ai.cpp:211
trace_sett working_sett
The work queue, sorted using the history's ordering operator.
Definition: ai.h:414
virtual bool merge(const statet &src, trace_ptrt from, trace_ptrt to)
Merge the state src, flowing from tracet from to tracet to, into the state currently stored for trace...
Definition: ai.h:498
virtual cstate_ptrt abstract_state_before(locationt l) const
Get a copy of the abstract state before the given instruction, without needing to know what kind of d...
Definition: ai.h:221
virtual xmlt output_xml(const namespacet &ns, const irep_idt &function_id, const goto_programt &goto_program) const
Output the abstract states for a single function as XML.
Definition: ai.cpp:136
virtual jsont output_json(const namespacet &ns, const irep_idt &function_id, const goto_programt &goto_program) const
Output the abstract states for a single function as JSON.
Definition: ai.cpp:83
std::unique_ptr< ai_history_factory_baset > history_factory
For creating history objects.
Definition: ai.h:491
virtual void finalize()
Override this to add a cleanup or post-processing step after fixedpoint has run.
Definition: ai.cpp:206
virtual statet & get_state(trace_ptrt p)
Get the state for the given history, creating it with the factory if it doesn't exist.
Definition: ai.h:515
The interface offered by a domain, allows code to manipulate domains without knowing their exact type...
Definition: ai_domain.h:54
virtual bool is_bottom() const =0
virtual void transform(const irep_idt &function_from, trace_ptrt from, const irep_idt &function_to, trace_ptrt to, ai_baset &ai, const namespacet &ns)=0
how function calls are treated: a) there is an edge from each call site to the function head b) there...
virtual void make_entry()
Make this domain a reasonable entry-point state For most domains top is sufficient.
Definition: ai_domain.h:121
virtual void output(std::ostream &, const ai_baset &, const namespacet &) const
Definition: ai_domain.h:104
A history object is an abstraction / representation of the control-flow part of a set of traces.
Definition: ai_history.h:37
static const trace_ptrt no_caller_history
Definition: ai_history.h:121
bool visit_edge_function_call(const irep_idt &calling_function_id, trace_ptrt p_call, locationt l_return, const irep_idt &callee_function_id, working_sett &working_set, const goto_programt &callee, const goto_functionst &goto_functions, const namespacet &ns) override
Definition: ai.cpp:539
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Base class for all expressions.
Definition: expr.h:56
A collection of goto functions.
function_mapt function_map
::goto_functiont goto_functiont
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:622
std::list< Target > get_successors(Target target) const
Get control-flow successors of a given instruction.
const irep_idt & id() const
Definition: irep.h:384
jsont & push_back(const jsont &json)
Definition: json.h:212
Definition: json.h:27
unsigned get_verbosity() const
Definition: message.h:54
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
static eomt eom
Definition: message.h:297
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
const irep_idt & get_identifier() const
Definition: std_expr.h:160
Definition: xml.h:21
xmlt & new_element(const std::string &key)
Definition: xml.h:95
void output(std::ostream &out, unsigned indent=0) const
Definition: xml.cpp:33
#define forall_goto_program_instructions(it, program)
const std::string & id2string(const irep_idt &d)
Definition: irep.h:40
double log(double x)
Definition: math.c:2776
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.