CBMC
contracts_wrangler.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Parse and annotate contracts from configuration files
4 
5 Author: Qinheping Hu
6 
7 Date: June 2023
8 
9 \*******************************************************************/
10 
13 
14 #include "contracts_wrangler.h"
15 
16 #include <util/c_types.h>
17 #include <util/expr_iterator.h>
18 #include <util/format_expr.h>
19 #include <util/pointer_expr.h>
20 #include <util/simplify_expr.h>
21 #include <util/std_types.h>
22 
24 
25 #include <ansi-c/ansi_c_parser.h>
28 
30  goto_modelt &goto_model,
31  const std::string &file_name,
32  message_handlert &message_handler)
33  : ns(goto_model.symbol_table),
34  goto_model(goto_model),
35  symbol_table(goto_model.symbol_table),
36  goto_functions(goto_model.goto_functions),
37  message_handler(message_handler)
38 {
39  jsont configuration;
40 
41  if(parse_json(file_name, message_handler, configuration))
42  throw deserialization_exceptiont(file_name + " is not a valid JSON file");
43 
44  configure_functions(configuration);
45 
46  // Mangle loop contracts function by function.
47  // We match function with a regex. There should one and only one function
48  // with name matched by the regex.
49  for(const auto &function_entry : this->functions)
50  {
51  bool function_found = false;
52 
53  for(const auto &function : goto_model.goto_functions.function_map)
54  {
55  // We find a matched function.
56  if(std::regex_match(function.first.c_str(), function_entry.first))
57  {
58  if(function_found)
60  "function regex " + function_entry.second.regex_str +
61  " matches more than one function");
62 
63  function_found = true;
64 
65  // Mangle all loop contracts in the function.
66  for(const auto &loop_entry : function_entry.second.loop_contracts)
67  {
68  mangle(loop_entry, function.first);
69  }
70  }
71  }
72 
73  if(!function_found)
75  "function regex " + function_entry.second.regex_str +
76  " matches no function");
77  }
78 }
79 
81  std::string function_name,
82  const std::size_t num_of_args)
83 {
84  // Already exist.
85  if(symbol_table.has_symbol(CPROVER_PREFIX + function_name))
86  return;
87 
88  code_typet::parameterst parameters;
89  for(unsigned i = 0; i < num_of_args; i++)
90  {
91  parameters.emplace_back(pointer_type(void_type()));
92  }
93  symbolt fun_symbol(
94  CPROVER_PREFIX + function_name,
95  code_typet(parameters, empty_typet()),
96  ID_C);
97  symbol_table.add(fun_symbol);
98 }
99 
101  const loop_contracts_clauset &loop_contracts,
102  const irep_idt &function_id)
103 {
105  // Loop contracts mangling consists of four steps.
106  //
107  // 1. Construct a fake function with a fake loop that contains the loop
108  // contracts for parsing the contracts.
109  // 2. Parse the fake function and extract the contracts as exprt.
110  // 3. Replace symbols in the extracted exprt with the symbols in the
111  // symbol table of the goto model according to the symbol map provided by
112  // the user.
113  // 4. Typecheck all contracts exprt.
114  // 5. Annotate all contracts exprt to the corresponding loop.
115 
116  // Construct a fake function to parse.
117  // void function_id()
118  // {
119  // while(1==1)
120  // __CPROVER_assigns(assigns_str) // optional
121  // __CPROVER_loop_invariant(invariants_str)
122  // __CPROVER_decreases(decreases_str) // optional
123  // }
124  std::ostringstream pr;
125  pr << "void _fn() { while(1==1)";
126  if(!loop_contracts.assigns.empty())
127  {
128  pr << CPROVER_PREFIX << "assigns(" << loop_contracts.assigns << ") ";
129  }
130  pr << CPROVER_PREFIX << "loop_invariant(" << loop_contracts.invariants
131  << ") ";
132  if(!loop_contracts.decreases.empty())
133  {
134  pr << CPROVER_PREFIX << "decreases(" << loop_contracts.decreases << ") ";
135  }
136  pr << " {}}" << std::endl;
137 
138  log.debug() << "Constructing fake function:\n" << pr.str() << log.eom;
139 
140  // Parse the fake function.
141  std::istringstream is(pr.str());
142  ansi_c_parsert ansi_c_parser{message_handler};
143  ansi_c_parser.set_line_no(0);
144  ansi_c_parser.set_file("<predicate>");
145  ansi_c_parser.in = &is;
146  ansi_c_parser.for_has_scope = config.ansi_c.for_has_scope;
147  ansi_c_parser.ts_18661_3_Floatn_types = config.ansi_c.ts_18661_3_Floatn_types;
148  ansi_c_parser.float16_type = config.ansi_c.float16_type;
149  ansi_c_parser.bf16_type = config.ansi_c.bf16_type;
150  ansi_c_parser.cpp98 = false; // it's not C++
151  ansi_c_parser.cpp11 = false; // it's not C++
152  ansi_c_parser.mode = config.ansi_c.mode;
153  ansi_c_scanner_init(ansi_c_parser);
154  ansi_c_parser.parse();
155 
156  // Extract the invariants from prase_tree.
157  exprt &inv_expr(static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
158  .declarator()
159  .value()
160  .operands()[0]
161  .add(ID_C_spec_loop_invariant))
162  .operands()[0]);
163 
164  log.debug() << "Extracted loop invariants: " << inv_expr.pretty() << "\n"
165  << log.eom;
166 
167  // Extract the assigns from parse_tree.
168  std::optional<exprt> assigns_expr;
169  if(!loop_contracts.assigns.empty())
170  {
171  assigns_expr = static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
172  .declarator()
173  .value()
174  .operands()[0]
175  .add(ID_C_spec_assigns))
176  .operands()[0];
177  }
178 
179  // Extract the decreases from parse_tree.
180  exprt::operandst decreases_exprs;
181  if(!loop_contracts.decreases.empty())
182  {
183  for(auto op : static_cast<exprt &>(ansi_c_parser.parse_tree.items.front()
184  .declarator()
185  .value()
186  .operands()[0]
187  .add(ID_C_spec_decreases))
188  .operands())
189  {
190  decreases_exprs.emplace_back(op);
191  }
192  }
193 
194  // Replace symbols in the clauses with the symbols in the symbol table.
195  log.debug() << "Start to replace symbols" << log.eom;
196  loop_contracts.replace_symbol(inv_expr);
197  if(assigns_expr.has_value())
198  {
199  loop_contracts.replace_symbol(assigns_expr.value());
200  }
201  for(exprt &decrease_expr : decreases_exprs)
202  {
203  loop_contracts.replace_symbol(decrease_expr);
204  }
205 
206  // Add builtin functions that might be used in contracts to the symbol table.
207  add_builtin_pointer_function_symbol("loop_entry", 1);
208  add_builtin_pointer_function_symbol("same_object", 2);
209  add_builtin_pointer_function_symbol("OBJECT_SIZE", 1);
210  add_builtin_pointer_function_symbol("POINTER_OBJECT", 1);
211  add_builtin_pointer_function_symbol("POINTER_OFFSET", 1);
212 
213  // Typecheck clauses.
215  ansi_c_parser.parse_tree,
216  symbol_table,
217  symbol_table.lookup_ref(function_id).module.c_str(),
218  log.get_message_handler());
219 
220  // Typecheck and annotate invariants.
221  {
222  log.debug() << "Start to typecheck invariants." << log.eom;
223  ansi_c_typecheck.typecheck_expr(inv_expr);
224  if(has_subexpr(inv_expr, ID_old))
226  "old is not allowed in loop invariants.");
227 
228  invariant_mapt inv_map;
229  inv_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
230  and_exprt(simplify_expr(inv_expr, ns), true_exprt());
232  }
233 
234  // Typecheck and annotate assigns.
235  log.debug() << "Start to typecheck assigns." << log.eom;
236  if(assigns_expr.has_value())
237  {
238  ansi_c_typecheck.typecheck_spec_assigns(assigns_expr.value().operands());
239 
240  invariant_mapt assigns_map;
241  assigns_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
242  assigns_expr.value();
243  annotate_assigns(assigns_map, goto_model);
244  }
245 
246  // Typecheck an annotate decreases.
247  log.debug() << "Start to typecheck decreases." << log.eom;
248  if(!decreases_exprs.empty())
249  {
250  std::map<loop_idt, std::vector<exprt>> decreases_map;
251  decreases_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))] =
252  std::vector<exprt>();
253  for(exprt &decrease_expr : decreases_exprs)
254  {
255  ansi_c_typecheck.typecheck_expr(decrease_expr);
256  decreases_map[loop_idt(function_id, std::stoi(loop_contracts.identifier))]
257  .emplace_back(decrease_expr);
258  }
259  annotate_decreases(decreases_map, goto_model);
260  }
261 
262  log.debug() << "Mangling finished." << log.eom;
263  // Remove added symbol.
264  symbol_table.remove(CPROVER_PREFIX "loop_entry");
265  symbol_table.remove(CPROVER_PREFIX "same_object");
266  symbol_table.remove(CPROVER_PREFIX "OBJECT_SIZE");
267  symbol_table.remove(CPROVER_PREFIX "POINTER_OBJECT");
268  symbol_table.remove(CPROVER_PREFIX "POINTER_OFFSET");
269 }
270 
272 {
273  auto functions = config["functions"];
274 
275  if(functions.is_null())
276  return;
277 
278  if(!functions.is_array())
279  throw deserialization_exceptiont("functions entry must be sequence");
280 
281  for(const auto &function : to_json_array(functions))
282  {
283  if(!function.is_object())
284  throw deserialization_exceptiont("function entry must be object");
285 
286  for(const auto &function_entry : to_json_object(function))
287  {
288  const auto function_name = function_entry.first;
289  const auto &items = function_entry.second;
290 
291  if(!items.is_array())
292  throw deserialization_exceptiont("loops entry must be sequence");
293 
294  this->functions.emplace_back(function_name, functiont{});
295  functiont &function_config = this->functions.back().second;
296  function_config.regex_str = function_name;
297 
298  for(const auto &function_item : to_json_array(items))
299  {
300  if(!function_item.is_object())
301  throw deserialization_exceptiont("loop entry must be object");
302 
303  std::string loop_id = "";
304  std::string invariants_str = "";
305  std::string assigns_str = "";
306  std::string decreases_str = "";
307  unchecked_replace_symbolt replace_symbol;
308 
309  for(const auto &loop_item : to_json_object(function_item))
310  {
311  if(!loop_item.second.is_string())
312  throw deserialization_exceptiont("loop item must be string");
313 
314  if(loop_item.first == "loop_id")
315  {
316  loop_id = loop_item.second.value;
317  continue;
318  }
319 
320  if(loop_item.first == "assigns")
321  {
322  assigns_str = loop_item.second.value;
323  continue;
324  }
325 
326  if(loop_item.first == "decreases")
327  {
328  decreases_str = loop_item.second.value;
329  continue;
330  }
331 
332  if(loop_item.first == "invariants")
333  {
334  invariants_str = loop_item.second.value;
335  continue;
336  }
337 
338  if(loop_item.first == "symbol_map")
339  {
340  std::string symbol_map_str = loop_item.second.value;
341 
342  // Remove all space in symbol_map_str
343  symbol_map_str.erase(
344  std::remove_if(
345  symbol_map_str.begin(), symbol_map_str.end(), isspace),
346  symbol_map_str.end());
347 
348  for(const auto &symbol_map_entry :
349  split_string(symbol_map_str, ';'))
350  {
351  const auto symbol_map_pair = split_string(symbol_map_entry, ',');
352  const auto &symbol = symbol_table.lookup(symbol_map_pair.back());
353  if(symbol == nullptr)
355  "symbol with id \"" + symbol_map_pair.front() +
356  "\" does not exist in the symbol table");
357  // Create a dummy symbol. The type will be discarded when inserted
358  // into replace_symbol.
359  const symbol_exprt old_expr(
360  symbol_map_pair.front(), bool_typet());
361  replace_symbol.insert(old_expr, symbol->symbol_expr());
362  }
363 
364  continue;
365  }
366 
367  throw deserialization_exceptiont("unexpected loop item");
368  }
369 
370  if(loop_id.empty())
371  {
373  "loop entry must have one identifier");
374  }
375 
376  if(invariants_str.empty())
377  {
379  "loop entry must have one invariant string");
380  }
381 
382  function_config.loop_contracts.emplace_back(
383  loop_id, invariants_str, assigns_str, decreases_str, replace_symbol);
384  }
385  }
386  }
387 }
void ansi_c_scanner_init(ansi_c_parsert &)
bool ansi_c_typecheck(ansi_c_parse_treet &ansi_c_parse_tree, symbol_table_baset &symbol_table, const std::string &module, message_handlert &message_handler)
ANSI-C Language Type Checking.
configt config
Definition: config.cpp:25
empty_typet void_type()
Definition: c_types.cpp:245
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
Boolean AND.
Definition: std_expr.h:2120
The Boolean type.
Definition: std_types.h:36
Base type of functions.
Definition: std_types.h:583
std::vector< parametert > parameterst
Definition: std_types.h:585
struct configt::ansi_ct ansi_c
goto_modelt & goto_model
symbol_tablet & symbol_table
message_handlert & message_handler
void configure_functions(const jsont &)
void add_builtin_pointer_function_symbol(std::string function_name, const std::size_t num_of_args)
Add builtin function symbol with function_name into symbol table.
contracts_wranglert(goto_modelt &goto_model, const std::string &file_name, message_handlert &message_handler)
void mangle(const loop_contracts_clauset &loop_contracts, const irep_idt &function_id)
Mangle loop_contracts in the function with function_id
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
const char * c_str() const
Definition: dstring.h:116
The empty type.
Definition: std_types.h:51
Base class for all expressions.
Definition: expr.h:56
std::vector< exprt > operandst
Definition: expr.h:58
operandst & operands()
Definition: expr.h:94
function_mapt function_map
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:34
Thrown when user-provided input cannot be processed.
std::string pretty(unsigned indent=0, unsigned max_indent=0) const
Definition: irep.cpp:482
Definition: json.h:27
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
Expression to hold a symbol (variable)
Definition: std_expr.h:131
bool remove(const irep_idt &name)
Remove a symbol from the symbol table.
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Symbol table entry.
Definition: symbol.h:28
irep_idt module
Name of module the symbol belongs to.
Definition: symbol.h:43
The Boolean constant true.
Definition: std_expr.h:3055
void insert(const symbol_exprt &old_expr, const exprt &new_expr)
Parse and annotate contracts.
#define CPROVER_PREFIX
int isspace(int c)
Definition: ctype.c:80
Forward depth-first search iterators These iterators' copy operations are expensive,...
bool has_subexpr(const exprt &expr, const std::function< bool(const exprt &)> &pred)
returns true if the expression has a subexpression that satisfies pred
Definition: expr_util.cpp:141
json_arrayt & to_json_array(jsont &json)
Definition: json.h:424
json_objectt & to_json_object(jsont &json)
Definition: json.h:442
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:27
double log(double x)
Definition: math.c:2776
API to expression classes for Pointers.
Unused function removal.
exprt simplify_expr(exprt src, const namespacet &ns)
Pre-defined types.
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
bool for_has_scope
Definition: config.h:151
bool float16_type
Definition: config.h:154
bool bf16_type
Definition: config.h:155
bool ts_18661_3_Floatn_types
Definition: config.h:152
flavourt mode
Definition: config.h:253
std::string regex_str
std::vector< loop_contracts_clauset > loop_contracts
unchecked_replace_symbolt replace_symbol
Loop id used to identify loops.
Definition: loop_ids.h:28
void annotate_decreases(const std::map< loop_idt, std::vector< exprt >> &decreases_map, goto_modelt &goto_model)
Annotate the decreases in decreases_map to their corresponding loops.
Definition: utils.cpp:761
void annotate_invariants(const invariant_mapt &invariant_map, goto_modelt &goto_model)
Annotate the invariants in invariant_map to their corresponding loops.
Definition: utils.cpp:700
void annotate_assigns(const std::map< loop_idt, std::set< exprt >> &assigns_map, goto_modelt &goto_model)
Annotate the assigns in assigns_map to their corresponding loops.
Definition: utils.cpp:720
std::map< loop_idt, exprt > invariant_mapt
Definition: utils.h:29