CBMC
symex_dereference.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Symbolic Execution of ANSI-C
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "goto_symex.h"
13 
14 #include <util/arith_tools.h>
15 #include <util/byte_operators.h>
16 #include <util/c_types.h>
17 #include <util/exception_utils.h>
18 #include <util/expr_util.h>
19 #include <util/fresh_symbol.h>
20 #include <util/invariant.h>
22 
24 
25 #include "expr_skeleton.h"
26 #include "path_storage.h"
27 #include "symex_assign.h"
29 
44  const exprt &expr,
45  statet &state,
46  bool keep_array)
47 {
48  exprt result;
49 
50  if(expr.id()==ID_byte_extract_little_endian ||
51  expr.id()==ID_byte_extract_big_endian)
52  {
53  // address_of(byte_extract(op, offset, t)) is
54  // address_of(op) + offset with adjustments for arrays
55 
57 
58  // recursive call
59  result = address_arithmetic(be.op(), state, keep_array);
60 
61  if(be.op().type().id() == ID_array && result.id() == ID_address_of)
62  {
64 
65  // turn &a of type T[i][j] into &(a[0][0])
66  for(const typet *t = &(to_type_with_subtype(a.type()).subtype());
67  t->id() == ID_array && expr.type() != *t;
68  t = &(to_array_type(*t).element_type()))
70  }
71 
72  // do (expr.type() *)(((char *)op)+offset)
73  result=typecast_exprt(result, pointer_type(char_type()));
74 
75  // there could be further dereferencing in the offset
76  exprt offset=be.offset();
77  dereference_rec(offset, state, false, false);
78 
79  result=plus_exprt(result, offset);
80 
81  // treat &array as &array[0]
82  const typet &expr_type = expr.type();
83  typet dest_type_subtype;
84 
85  if(expr_type.id()==ID_array && !keep_array)
86  dest_type_subtype = to_array_type(expr_type).element_type();
87  else
88  dest_type_subtype=expr_type;
89 
90  result=typecast_exprt(result, pointer_type(dest_type_subtype));
91  }
92  else if(expr.id()==ID_index ||
93  expr.id()==ID_member)
94  {
96  ode.build(expr, ns);
97 
98  const byte_extract_exprt be =
99  make_byte_extract(ode.root_object(), ode.offset(), expr.type());
100 
101  // recursive call
102  result = address_arithmetic(be, state, keep_array);
103 
104  do_simplify(result);
105  }
106  else if(expr.id()==ID_dereference)
107  {
108  // ANSI-C guarantees &*p == p no matter what p is,
109  // even if it's complete garbage
110  // just grab the pointer, but be wary of further dereferencing
111  // in the pointer itself
112  result=to_dereference_expr(expr).pointer();
113  dereference_rec(result, state, false, false);
114  }
115  else if(expr.id()==ID_if)
116  {
117  if_exprt if_expr=to_if_expr(expr);
118 
119  // the condition is not an address
120  dereference_rec(if_expr.cond(), state, false, false);
121 
122  // recursive call
123  if_expr.true_case() =
124  address_arithmetic(if_expr.true_case(), state, keep_array);
125  if_expr.false_case() =
126  address_arithmetic(if_expr.false_case(), state, keep_array);
127 
128  result=if_expr;
129  }
130  else if(expr.id()==ID_symbol ||
131  expr.id()==ID_string_constant ||
132  expr.id()==ID_label ||
133  expr.id()==ID_array)
134  {
135  // give up, just dereference
136  result=expr;
137  dereference_rec(result, state, false, false);
138 
139  // turn &array into &array[0]
140  if(result.type().id() == ID_array && !keep_array)
141  result = index_exprt(result, from_integer(0, c_index_type()));
142 
143  // handle field-sensitive SSA symbol
144  mp_integer offset=0;
145  if(is_ssa_expr(expr))
146  {
147  auto offset_opt = compute_pointer_offset(expr, ns);
148  PRECONDITION(offset_opt.has_value());
149  offset = *offset_opt;
150  }
151 
152  if(offset>0)
153  {
155  to_ssa_expr(expr).get_l1_object(),
156  from_integer(offset, c_index_type()),
157  expr.type());
158 
159  result = address_arithmetic(be, state, keep_array);
160 
161  do_simplify(result);
162  }
163  else
164  result=address_of_exprt(result);
165  }
166  else if(expr.id() == ID_typecast)
167  {
168  const typecast_exprt &tc_expr = to_typecast_expr(expr);
169 
170  result = address_arithmetic(tc_expr.op(), state, keep_array);
171 
172  // treat &array as &array[0]
173  const typet &expr_type = expr.type();
174  typet dest_type_subtype;
175 
176  if(expr_type.id() == ID_array && !keep_array)
177  dest_type_subtype = to_array_type(expr_type).element_type();
178  else
179  dest_type_subtype = expr_type;
180 
181  result = typecast_exprt(result, pointer_type(dest_type_subtype));
182  }
183  else
185  "goto_symext::address_arithmetic does not handle " + expr.id_string());
186 
187  const typet &expr_type = expr.type();
188  INVARIANT(
189  (expr_type.id() == ID_array && !keep_array) ||
190  pointer_type(expr_type) == result.type(),
191  "either non-persistent array or pointer to result");
192 
193  return result;
194 }
195 
197 goto_symext::cache_dereference(exprt &dereference_result, statet &state)
198 {
199  auto const cache_key = [&] {
200  auto cache_key =
201  state.field_sensitivity.apply(ns, state, dereference_result, false);
202  if(auto let_expr = expr_try_dynamic_cast<let_exprt>(dereference_result))
203  {
204  let_expr->value() = state.rename<L2>(let_expr->value(), ns).get();
205  }
206  else
207  {
208  cache_key = state.rename<L2>(cache_key, ns).get();
209  }
210  return cache_key;
211  }();
212 
213  if(auto cached = state.dereference_cache.find(cache_key))
214  {
215  return *cached;
216  }
217 
218  auto const &cache_symbol = get_fresh_aux_symbol(
219  cache_key.type(),
220  "symex",
221  "dereference_cache",
222  dereference_result.source_location(),
224  ns,
225  state.symbol_table);
226 
227  // we need to lift possible lets
228  // (come from the value set to avoid repeating complex pointer comparisons)
229  auto cache_value = cache_key;
230  lift_lets(state, cache_value);
231 
232  auto assign = symex_assignt{
234  state,
236  ns,
237  symex_config,
238  target};
239 
240  auto cache_symbol_expr = cache_symbol.symbol_expr();
241  assign.assign_symbol(
242  to_ssa_expr(state.rename<L1>(cache_symbol_expr, ns).get()),
243  expr_skeletont{},
244  cache_value,
245  {});
246 
247  state.dereference_cache.insert(cache_key, cache_symbol_expr);
248  return cache_symbol_expr;
249 }
250 
263  exprt &expr,
264  statet &state,
265  bool write,
266  bool is_in_binding_expression)
267 {
268  if(expr.id()==ID_dereference)
269  {
270  bool expr_is_not_null = false;
271 
272  if(state.threads.size() == 1)
273  {
274  const irep_idt &expr_function = state.source.function_id;
275  if(!expr_function.empty())
276  {
277  const dereference_exprt to_check =
279 
280  expr_is_not_null = path_storage.safe_pointers.at(expr_function)
281  .is_safe_dereference(to_check, state.source.pc);
282  }
283  }
284 
285  exprt tmp1;
286  tmp1.swap(to_dereference_expr(expr).pointer());
287 
288  // first make sure there are no dereferences in there
289  dereference_rec(tmp1, state, false, is_in_binding_expression);
290 
291  // Depending on the nature of the pointer expression, the recursive deref
292  // operation might have introduced a construct such as
293  // (x == &o1 ? o1 : o2).field, in which case we should simplify to push the
294  // member operator inside the if, then apply field-sensitivity to yield
295  // (x == &o1 ? o1..field : o2..field). value_set_dereferencet can then
296  // apply the dereference operation to each of o1..field and o2..field
297  // independently, as it special-cases the ternary conditional operator.
298  // There may also be index operators in tmp1 which can now be resolved to
299  // constant array cell references, so we replace symbols with constants
300  // first, hoping for a transformation such as
301  // (x == &o1 ? o1 : o2)[idx] =>
302  // (x == &o1 ? o1 : o2)[2] =>
303  // (x == &o1 ? o1[[2]] : o2[[2]])
304  // Note we don't L2 rename non-constant symbols at this point, because the
305  // value-set works in terms of L1 names and we don't want to ask it to
306  // dereference an L2 pointer, which it would not have an entry for.
307 
308  tmp1 = state.rename<L1_WITH_CONSTANT_PROPAGATION>(tmp1, ns).get();
309 
310  do_simplify(tmp1);
311 
313  {
314  // make sure simplify has not re-introduced any dereferencing that
315  // had previously been cleaned away
316  INVARIANT(
317  !has_subexpr(tmp1, ID_dereference),
318  "simplify re-introduced dereferencing");
319  }
320 
321  tmp1 = state.field_sensitivity.apply(ns, state, std::move(tmp1), false);
322 
323  // we need to set up some elaborate call-backs
324  symex_dereference_statet symex_dereference_state(state, ns);
325 
327  ns,
328  state.symbol_table,
329  symex_dereference_state,
331  expr_is_not_null,
333 
334  // std::cout << "**** " << format(tmp1) << '\n';
335  exprt tmp2 =
336  dereference.dereference(tmp1, symex_config.show_points_to_sets);
337  // std::cout << "**** " << format(tmp2) << '\n';
338 
339 
340  // this may yield a new auto-object
341  trigger_auto_object(tmp2, state);
342 
343  // Check various conditions for when we should try to cache the expression.
344  // 1. Caching dereferences must be enabled.
345  // 2. Do not cache inside LHS of writes.
346  // 3. Do not cache inside quantifiers (references variables outside their
347  // scope).
348  // 4. Only cache "complicated" expressions, i.e. of the form
349  // [let p = <expr> in ]
350  // (p == &something ? something : ...))
351  // Otherwise we should just return it unchanged.
352  if(
353  symex_config.cache_dereferences && !write && !is_in_binding_expression &&
354  (tmp2.id() == ID_if || tmp2.id() == ID_let))
355  {
356  expr = cache_dereference(tmp2, state);
357  }
358  else
359  {
360  expr = std::move(tmp2);
361  }
362  }
363  else if(
364  expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member &&
365  to_array_type(to_index_expr(expr).array().type()).size().is_zero())
366  {
367  // This is an expression of the form x.a[i],
368  // where a is a zero-sized array. This gets
369  // re-written into *(&x.a+i)
370 
371  index_exprt index_expr=to_index_expr(expr);
372 
373  address_of_exprt address_of_expr(index_expr.array());
374  address_of_expr.type()=pointer_type(expr.type());
375 
376  dereference_exprt tmp{plus_exprt{address_of_expr, index_expr.index()}};
378 
379  // recursive call
380  dereference_rec(tmp, state, write, is_in_binding_expression);
381 
382  expr.swap(tmp);
383  }
384  else if(expr.id()==ID_index &&
385  to_index_expr(expr).array().type().id()==ID_pointer)
386  {
387  // old stuff, will go away
388  UNREACHABLE;
389  }
390  else if(expr.id()==ID_address_of)
391  {
392  address_of_exprt &address_of_expr=to_address_of_expr(expr);
393 
394  exprt &object=address_of_expr.object();
395 
396  expr = address_arithmetic(
397  object, state, to_pointer_type(expr.type()).base_type().id() == ID_array);
398  }
399  else if(expr.id()==ID_typecast)
400  {
401  exprt &tc_op=to_typecast_expr(expr).op();
402 
403  // turn &array into &array[0] when casting to pointer-to-element-type
404  if(
405  tc_op.id() == ID_address_of &&
406  to_address_of_expr(tc_op).object().type().id() == ID_array &&
407  expr.type() ==
408  pointer_type(to_array_type(to_address_of_expr(tc_op).object().type())
409  .element_type()))
410  {
412  to_address_of_expr(tc_op).object(), from_integer(0, c_index_type())));
413 
414  dereference_rec(expr, state, write, is_in_binding_expression);
415  }
416  else
417  {
418  dereference_rec(tc_op, state, write, is_in_binding_expression);
419  }
420  }
421  else
422  {
423  bool is_binding_expression =
425  Forall_operands(it, expr)
426  {
428  *it, state, write, is_in_binding_expression || is_binding_expression);
429  }
430  }
431 }
432 
433 static exprt
434 apply_to_objects_in_dereference(exprt e, const std::function<exprt(exprt)> &f)
435 {
436  if(auto deref = expr_try_dynamic_cast<dereference_exprt>(e))
437  {
438  deref->op() = f(std::move(deref->op()));
439  return *deref;
440  }
441 
442  for(auto &sub : e.operands())
443  sub = apply_to_objects_in_dereference(std::move(sub), f);
444  return e;
445 }
446 
484 void goto_symext::dereference(exprt &expr, statet &state, bool write)
485 {
486  PRECONDITION(!state.call_stack().empty());
487 
488  // Symbols whose address is taken need to be renamed to level 1
489  // in order to distinguish addresses of local variables
490  // from different frames.
491  expr = apply_to_objects_in_dereference(std::move(expr), [&](exprt e) {
492  return state.field_sensitivity.apply(
493  ns, state, state.rename<L1>(std::move(e), ns).get(), false);
494  });
495 
496  // start the recursion!
497  dereference_rec(expr, state, write, false);
498  // dereferencing may introduce new symbol_exprt
499  // (like __CPROVER_memory)
500  expr = state.rename<L1>(std::move(expr), ns).get();
501 
502  // Dereferencing is likely to introduce new member-of-if constructs --
503  // for example, "x->field" may have become "(x == &o1 ? o1 : o2).field."
504  // Run expression simplification, which converts that to
505  // (x == &o1 ? o1.field : o2.field))
506  // before applying field sensitivity. Field sensitivity can then turn such
507  // field-of-symbol expressions into atomic SSA expressions instead of having
508  // to rewrite all of 'o1' otherwise.
509  // Even without field sensitivity this can be beneficial: for example,
510  // "(b ? s1 : s2).member := X" results in
511  // (b ? s1 : s2) := (b ? s1 : s2) with (member := X)
512  // and then
513  // s1 := b ? ((b ? s1 : s2) with (member := X)) : s1
514  // when all we need is
515  // s1 := s1 with (member := X) [and guard b]
516  // s2 := s2 with (member := X) [and guard !b]
517  do_simplify(expr);
518 
520  {
521  // make sure simplify has not re-introduced any dereferencing that
522  // had previously been cleaned away
523  INVARIANT(
524  !has_subexpr(expr, ID_dereference),
525  "simplify re-introduced dereferencing");
526  }
527 
528  expr = state.field_sensitivity.apply(ns, state, std::move(expr), write);
529 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
byte_extract_exprt make_byte_extract(const exprt &_op, const exprt &_offset, const typet &_type)
Construct a byte_extract_exprt with endianness and byte width matching the current configuration.
Expression classes for byte-level operators.
const byte_extract_exprt & to_byte_extract_expr(const exprt &expr)
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
bitvector_typet char_type()
Definition: c_types.cpp:106
bitvector_typet c_index_type()
Definition: c_types.cpp:16
Operator to return the address of an object.
Definition: pointer_expr.h:540
exprt & object()
Definition: pointer_expr.h:549
const typet & element_type() const
The type of the elements of the array.
Definition: std_types.h:827
Expression of type type extracted from some object op starting at position offset (given in number of...
Operator to dereference a pointer.
Definition: pointer_expr.h:834
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool empty() const
Definition: dstring.h:89
Expression in which some part is missing and can be substituted for another expression.
Definition: expr_skeleton.h:26
Base class for all expressions.
Definition: expr.h:56
source_locationt & add_source_location()
Definition: expr.h:236
const source_locationt & source_location() const
Definition: expr.h:231
typet & type()
Return the type of the expression.
Definition: expr.h:84
operandst & operands()
Definition: expr.h:94
exprt apply(const namespacet &ns, goto_symex_statet &state, exprt expr, bool write) const
Turn an expression expr into a field-sensitive SSA expression.
sharing_mapt< exprt, symbol_exprt, false, irep_hash > dereference_cache
This is used for eliminating repeated complicated dereferences.
Definition: goto_state.h:43
Central data structure: state.
call_stackt & call_stack()
renamedt< exprt, level > rename(exprt expr, const namespacet &ns)
Rewrites symbol expressions in exprt, applying a suffix to each symbol reflecting its most recent ver...
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
field_sensitivityt field_sensitivity
symex_targett::sourcet source
std::vector< threadt > threads
irep_idt language_mode
language_mode: ID_java, ID_C or another language identifier if we know the source language in use,...
Definition: goto_symex.h:241
path_storaget & path_storage
Symbolic execution paths to be resumed later.
Definition: goto_symex.h:816
symex_target_equationt & target
The equation that this execution is building up.
Definition: goto_symex.h:266
shadow_memoryt shadow_memory
Shadow memory instrumentation API.
Definition: goto_symex.h:845
namespacet ns
Initialized just before symbolic execution begins, to point to both outer_symbol_table and the symbol...
Definition: goto_symex.h:258
void trigger_auto_object(const exprt &, statet &)
void lift_lets(statet &, exprt &)
Execute any let expressions in expr using symex_assignt::assign_symbol.
exprt address_arithmetic(const exprt &, statet &, bool keep_array)
Transforms an lvalue expression by replacing any dereference operations it contains with explicit ref...
virtual void do_simplify(exprt &expr)
Definition: goto_symex.cpp:32
symbol_exprt cache_dereference(exprt &dereference_result, statet &state)
messaget log
The messaget to write log messages to.
Definition: goto_symex.h:278
const symex_configt symex_config
The configuration to use for this symbolic execution.
Definition: goto_symex.h:185
virtual void dereference(exprt &, statet &, bool write)
Replace all dereference operations within expr with explicit references to the objects they may refer...
void dereference_rec(exprt &expr, statet &state, bool write, bool is_in_quantifier)
If expr is a dereference_exprt, replace it with explicit references to the objects it may point to.
The trinary if-then-else operator.
Definition: std_expr.h:2370
exprt & true_case()
Definition: std_expr.h:2397
exprt & false_case()
Definition: std_expr.h:2407
exprt & cond()
Definition: std_expr.h:2387
Array index operator.
Definition: std_expr.h:1465
exprt & array()
Definition: std_expr.h:1495
exprt & index()
Definition: std_expr.h:1505
const std::string & id_string() const
Definition: irep.h:387
const irep_idt & id() const
Definition: irep.h:384
void swap(irept &irep)
Definition: irep.h:430
message_handlert & get_message_handler()
Definition: message.h:184
Split an expression into a base object and a (byte) offset.
Definition: pointer_expr.h:181
void build(const exprt &expr, const namespacet &ns)
Given an expression expr, attempt to find the underlying object it represents by skipping over type c...
static const exprt & root_object(const exprt &expr)
std::unordered_map< irep_idt, local_safe_pointerst > safe_pointers
Map function identifiers to local_safe_pointerst instances.
Definition: path_storage.h:100
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
const typet & base_type() const
The type of the data what we point to.
Definition: pointer_expr.h:35
void insert(const key_type &k, valueU &&m)
Insert element, element must not exist in map.
Definition: sharing_map.h:1348
std::optional< std::reference_wrapper< const mapped_type > > find(const key_type &k) const
Find element.
Definition: sharing_map.h:1451
Expression to hold a symbol (variable)
Definition: std_expr.h:131
Functor for symex assignment.
Definition: symex_assign.h:28
Callback object that goto_symext::dereference_rec provides to value_set_dereferencet to provide value...
const typet & subtype() const
Definition: type.h:187
Semantic type conversion.
Definition: std_expr.h:2068
The type of an expression, extends irept.
Definition: type.h:29
const exprt & op() const
Definition: std_expr.h:391
Thrown when we encounter an instruction, parameters to an instruction etc.
Wrapper for a function dereferencing pointer expressions using a value set.
#define Forall_operands(it, expr)
Definition: expr.h:27
Expression skeleton.
bool has_subexpr(const exprt &expr, const std::function< bool(const exprt &)> &pred)
returns true if the expression has a subexpression that satisfies pred
Definition: expr_util.cpp:141
Deprecated expression utility functions.
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, const namespacet &ns, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with respect to the given namespace ns with the requested name pattern ...
Fresh auxiliary symbol creation.
Symbolic Execution.
Storage of symbolic execution paths to resume.
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast an exprt to a dereference_exprt.
Definition: pointer_expr.h:890
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: pointer_expr.h:93
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:577
std::optional< mp_integer > compute_pointer_offset(const exprt &expr, const namespacet &ns)
Pointer Logic.
@ L2
Definition: renamed.h:26
@ L1_WITH_CONSTANT_PROPAGATION
Definition: renamed.h:25
@ L1
Definition: renamed.h:24
exprt get_original_name(exprt expr)
Undo all levels of renaming.
BigInt mp_integer
Definition: smt_terms.h:17
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:525
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
const ssa_exprt & to_ssa_expr(const exprt &expr)
Cast a generic exprt to an ssa_exprt.
Definition: ssa_expr.h:145
bool is_ssa_expr(const exprt &expr)
Definition: ssa_expr.h:125
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:2450
bool can_cast_expr< binding_exprt >(const exprt &base)
Definition: std_expr.h:3150
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition: std_expr.h:2102
bool can_cast_expr< let_exprt >(const exprt &base)
Definition: std_expr.h:3304
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1533
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:888
bool show_points_to_sets
Definition: symex_config.h:49
bool cache_dereferences
Whether or not to replace multiple occurrences of the same dereference with a single symbol that cont...
Definition: symex_config.h:64
bool run_validation_checks
Should the additional validation checks be run? If this flag is set the checks for renaming (both lev...
Definition: symex_config.h:44
goto_programt::const_targett pc
Definition: symex_target.h:42
Symbolic Execution of assignments.
static exprt apply_to_objects_in_dereference(exprt e, const std::function< exprt(exprt)> &f)
Symbolic Execution of ANSI-C.
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition: type.h:208
ssize_t write(int fildes, const void *buf, size_t nbyte)
Definition: unistd.c:195
Pointer Dereferencing.