CBMC
type2name.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Type Naming for C
4 
5 Author: Daniel Kroening, kroening@cs.cmu.edu
6 
7 \*******************************************************************/
8 
11 
12 #include "type2name.h"
13 
14 #include <util/arith_tools.h>
15 #include <util/c_types.h>
16 #include <util/invariant.h>
17 #include <util/namespace.h>
19 #include <util/std_expr.h>
20 #include <util/symbol.h>
21 
22 #include <regex>
23 
24 typedef std::unordered_map<irep_idt, std::pair<size_t, bool>> symbol_numbert;
25 
26 static std::string type2name(
27  const typet &type,
28  const namespacet &ns,
29  symbol_numbert &symbol_number);
30 
31 static std::string type2name_tag(
32  const tag_typet &type,
33  const namespacet &ns,
34  symbol_numbert &symbol_number)
35 {
36  const irep_idt &identifier = type.get_identifier();
37 
38  const symbolt *symbol;
39 
40  if(ns.lookup(identifier, symbol))
41  return "SYM#"+id2string(identifier)+"#";
42 
43  DATA_INVARIANT(symbol, "symbol not found");
44  DATA_INVARIANT(symbol->is_type, "not a type symbol");
45 
46  if(symbol->type.id()!=ID_struct &&
47  symbol->type.id()!=ID_union)
48  return type2name(symbol->type, ns, symbol_number);
49 
50  // assign each symbol a number when seen for the first time
51  std::pair<symbol_numbert::iterator, bool> entry=
52  symbol_number.insert(std::make_pair(
53  identifier,
54  std::make_pair(symbol_number.size(), true)));
55 
56  std::string result = "SYM" +
58  '#' + std::to_string(entry.first->second.first);
59 
60  // new entry, add definition
61  if(entry.second)
62  {
63  result+="={";
64  result+=type2name(symbol->type, ns, symbol_number);
65  result+='}';
66 
67  entry.first->second.second=false;
68  }
69 
70  return result;
71 }
72 
73 static std::string pointer_offset_bits_as_string(
74  const typet &type,
75  const namespacet &ns)
76 {
77  auto bits = pointer_offset_bits(type, ns);
78  CHECK_RETURN(bits.has_value());
79  return integer2string(*bits);
80 }
81 
82 static std::string type2name(
83  const typet &type,
84  const namespacet &ns,
85  symbol_numbert &symbol_number)
86 {
87  std::string result;
88 
89  // qualifiers first
90  if(type.get_bool(ID_C_constant))
91  result+='c';
92 
93  if(type.get_bool(ID_C_restricted))
94  result+='r';
95 
96  if(type.get_bool(ID_C_volatile))
97  result+='v';
98 
99  if(type.get_bool(ID_C_transparent_union))
100  result+='t';
101 
102  if(type.get_bool(ID_C_noreturn))
103  result+='n';
104 
105  // this isn't really a qualifier, but the linker needs to
106  // distinguish these - should likely be fixed in the linker instead
107  if(!type.source_location().get_function().empty())
108  result+='l';
109 
110  if(type.id().empty())
111  throw "empty type encountered";
112  else if(type.id()==ID_empty)
113  result+='V';
114  else if(type.id()==ID_signedbv)
115  result+="S" + pointer_offset_bits_as_string(type, ns);
116  else if(type.id()==ID_unsignedbv)
117  result+="U" + pointer_offset_bits_as_string(type, ns);
118  else if(type.id()==ID_bool ||
119  type.id()==ID_c_bool)
120  result+='B';
121  else if(type.id()==ID_integer)
122  result+='I';
123  else if(type.id()==ID_real)
124  result+='R';
125  else if(type.id()==ID_complex)
126  result+='C';
127  else if(type.id()==ID_floatbv)
128  result+="F" + pointer_offset_bits_as_string(type, ns);
129  else if(type.id()==ID_fixedbv)
130  result+="X" + pointer_offset_bits_as_string(type, ns);
131  else if(type.id()==ID_natural)
132  result+='N';
133  else if(type.id()==ID_pointer)
134  {
135  if(type.get_bool(ID_C_reference))
136  result+='&';
137  else
138  result+='*';
139  }
140  else if(type.id()==ID_code)
141  {
142  const code_typet &t=to_code_type(type);
143  const code_typet::parameterst parameters=t.parameters();
144  result+=type2name(t.return_type(), ns, symbol_number)+"(";
145 
146  for(code_typet::parameterst::const_iterator
147  it=parameters.begin();
148  it!=parameters.end();
149  it++)
150  {
151  if(it!=parameters.begin())
152  result+='|';
153  result+=type2name(it->type(), ns, symbol_number);
154  }
155 
156  if(t.has_ellipsis())
157  {
158  if(!parameters.empty())
159  result+='|';
160  result+="...";
161  }
162 
163  result+=")->";
164  result+=type2name(t.return_type(), ns, symbol_number);
165  }
166  else if(type.id()==ID_array)
167  {
168  const exprt &size = to_array_type(type).size();
169 
170  if(size.id() == ID_symbol)
171  result += "ARR" + id2string(to_symbol_expr(size).get_identifier());
172  else
173  {
174  const auto size_int = numeric_cast<mp_integer>(size);
175 
176  if(!size_int.has_value())
177  result += "ARR?";
178  else
179  result += "ARR" + integer2string(*size_int);
180  }
181  }
182  else if(
183  type.id() == ID_c_enum_tag || type.id() == ID_struct_tag ||
184  type.id() == ID_union_tag)
185  {
186  result += type2name_tag(to_tag_type(type), ns, symbol_number);
187  }
188  else if(type.id()==ID_struct ||
189  type.id()==ID_union)
190  {
191  const auto &struct_union_type = to_struct_union_type(type);
192 
193  if(struct_union_type.is_incomplete())
194  {
195  if(type.id() == ID_struct)
196  result += "ST?";
197  else if(type.id() == ID_union)
198  result += "UN?";
199  }
200  else
201  {
202  if(type.id() == ID_struct)
203  result += "ST";
204  if(type.id() == ID_union)
205  result += "UN";
206  result += '[';
207  bool first = true;
208  for(const auto &c : struct_union_type.components())
209  {
210  if(!first)
211  result += '|';
212  else
213  first = false;
214 
215  result += type2name(c.type(), ns, symbol_number);
216  const irep_idt &component_name = c.get_name();
217  CHECK_RETURN(!component_name.empty());
218  result += "'" + id2string(component_name) + "'";
219  }
220  result += ']';
221  }
222  }
223  else if(type.id()==ID_c_enum)
224  {
225  const c_enum_typet &t=to_c_enum_type(type);
226 
227  if(t.is_incomplete())
228  result += "EN?";
229  else
230  {
231  result += "EN";
232  const c_enum_typet::memberst &members = t.members();
233  result += '[';
234  for(c_enum_typet::memberst::const_iterator it = members.begin();
235  it != members.end();
236  ++it)
237  {
238  if(it != members.begin())
239  result += '|';
240  result += id2string(it->get_value());
241  result += "'" + id2string(it->get_identifier()) + "'";
242  }
243  }
244  }
245  else if(type.id()==ID_c_bit_field)
246  result+="BF"+pointer_offset_bits_as_string(type, ns);
247  else if(type.id()==ID_vector)
248  {
249  const constant_exprt &size = to_vector_type(type).size();
250  const auto size_int = numeric_cast_v<mp_integer>(size);
251  result += "VEC" + integer2string(size_int);
252  }
253  else
254  throw "unknown type '"+type.id_string()+"' encountered";
255 
256  if(type.has_subtype())
257  {
258  result+='{';
259  result +=
260  type2name(to_type_with_subtype(type).subtype(), ns, symbol_number);
261  result+='}';
262  }
263  else if(type.has_subtypes())
264  {
265  result+='$';
266  for(const typet &subtype : to_type_with_subtypes(type).subtypes())
267  {
268  result += type2name(subtype, ns, symbol_number);
269  result+='|';
270  }
271  result[result.size()-1]='$';
272  }
273 
274  return result;
275 }
276 
277 std::string type2name(const typet &type, const namespacet &ns)
278 {
279  symbol_numbert symbol_number;
280  return type2name(type, ns, symbol_number);
281 }
282 
287 std::string type_name2type_identifier(const std::string &name)
288 {
289  const auto replace_special_characters = [](const std::string &name) {
290  std::string result{};
291  for(char c : name)
292  {
293  switch(c)
294  {
295  case '*':
296  result += "_ptr_";
297  break;
298  case '{':
299  result += "_start_sub_";
300  break;
301  case '}':
302  result += "_end_sub_";
303  break;
304  default:
305  result += c;
306  break;
307  }
308  }
309  return result;
310  };
311  const auto replace_invalid_characters_with_underscore =
312  [](const std::string &identifier) {
313  static const std::regex non_alpha_numeric{"[^A-Za-z0-9\x80-\xff]+"};
314  return std::regex_replace(identifier, non_alpha_numeric, "_");
315  };
316  const auto strip_leading_digits = [](const std::string &identifier) {
317  static const std::regex identifier_regex{
318  "[A-Za-z\x80-\xff_][A-Za-z0-9_\x80-\xff]*"};
319  std::smatch match_results;
320  bool found = std::regex_search(identifier, match_results, identifier_regex);
321  POSTCONDITION(found);
322  return match_results.str(0);
323  };
324  return strip_leading_digits(replace_invalid_characters_with_underscore(
325  replace_special_characters(name)));
326 }
327 
328 std::string type_to_partial_identifier(const typet &type, const namespacet &ns)
329 {
330  return type_name2type_identifier(type2name(type, ns));
331 }
const c_enum_typet & to_c_enum_type(const typet &type)
Cast a typet to a c_enum_typet.
Definition: c_types.h:335
const exprt & size() const
Definition: std_types.h:840
The type of C enums.
Definition: c_types.h:239
memberst & members()
Definition: c_types.h:283
std::vector< c_enum_membert > memberst
Definition: c_types.h:275
bool is_incomplete() const
enum types may be incomplete
Definition: c_types.h:294
Base type of functions.
Definition: std_types.h:583
std::vector< parametert > parameterst
Definition: std_types.h:585
const typet & return_type() const
Definition: std_types.h:689
bool has_ellipsis() const
Definition: std_types.h:655
const parameterst & parameters() const
Definition: std_types.h:699
A constant literal expression.
Definition: std_expr.h:2987
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool empty() const
Definition: dstring.h:89
Base class for all expressions.
Definition: expr.h:56
bool get_bool(const irep_idt &name) const
Definition: irep.cpp:57
const std::string & id_string() const
Definition: irep.h:387
const irep_idt & id() const
Definition: irep.h:384
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:148
const irep_idt & get_function() const
irep_idt get_tag() const
Definition: std_types.h:168
Symbol table entry.
Definition: symbol.h:28
bool is_type
Definition: symbol.h:61
typet type
Type of symbol.
Definition: symbol.h:31
A tag-based type, i.e., typet with an identifier.
Definition: std_types.h:396
const irep_idt & get_identifier() const
Definition: std_types.h:410
The type of an expression, extends irept.
Definition: type.h:29
const source_locationt & source_location() const
Definition: type.h:72
bool has_subtypes() const
Definition: type.h:61
bool has_subtype() const
Definition: type.h:64
const constant_exprt & size() const
Definition: std_types.cpp:275
const std::string & id2string(const irep_idt &d)
Definition: irep.h:40
const std::string integer2string(const mp_integer &n, unsigned base)
Definition: mp_arith.cpp:103
std::optional< mp_integer > pointer_offset_bits(const typet &type, const namespacet &ns)
Pointer Logic.
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:534
#define POSTCONDITION(CONDITION)
Definition: invariant.h:479
API to expression classes.
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:272
const vector_typet & to_vector_type(const typet &type)
Cast a typet to a vector_typet.
Definition: std_types.h:1104
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:788
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:888
const struct_union_typet & to_struct_union_type(const typet &type)
Cast a typet to a struct_union_typet.
Definition: std_types.h:214
const tag_typet & to_tag_type(const typet &type)
Cast a typet to a tag_typet.
Definition: std_types.h:434
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Symbol table entry.
static std::string pointer_offset_bits_as_string(const typet &type, const namespacet &ns)
Definition: type2name.cpp:73
std::string type_to_partial_identifier(const typet &type, const namespacet &ns)
Constructs a string describing the given type, which can be used as part of a C identifier.
Definition: type2name.cpp:328
static std::string type2name(const typet &type, const namespacet &ns, symbol_numbert &symbol_number)
Definition: type2name.cpp:82
static std::string type2name_tag(const tag_typet &type, const namespacet &ns, symbol_numbert &symbol_number)
Definition: type2name.cpp:31
std::string type_name2type_identifier(const std::string &name)
type2name generates strings that aren't valid C identifiers If we want utilities like dump_c to work ...
Definition: type2name.cpp:287
std::unordered_map< irep_idt, std::pair< size_t, bool > > symbol_numbert
Definition: type2name.cpp:24
Type Naming for C.
const type_with_subtypest & to_type_with_subtypes(const typet &type)
Definition: type.h:252
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition: type.h:208