CBMC
format_strings.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Format String Parser
4 
5 Author: CM Wintersteiger
6 
7 \*******************************************************************/
8 
11 
12 #include "format_strings.h"
13 
14 #include <util/c_types.h>
15 #include <util/exception_utils.h>
16 #include <util/invariant.h>
17 #include <util/std_expr.h>
18 
19 #include <cctype>
20 
22  std::string::const_iterator &it,
23  format_tokent &curtok)
24 {
25  while(*it=='#' || *it=='0' ||
26  *it=='-' || *it==' ' || *it=='+')
27  {
28  switch(*it)
29  {
30  case '#':
31  curtok.flags.push_back(format_tokent::flag_typet::ALTERNATE); break;
32  case '0':
33  curtok.flags.push_back(format_tokent::flag_typet::ZERO_PAD); break;
34  case '-':
35  curtok.flags.push_back(format_tokent::flag_typet::LEFT_ADJUST); break;
36  case ' ':
37  curtok.flags.push_back(format_tokent::flag_typet::SIGNED_SPACE); break;
38  case '+':
39  curtok.flags.push_back(format_tokent::flag_typet::SIGN); break;
40  default:
42  std::string("unsupported format specifier flag: '") + *it + "'");
43  }
44  it++;
45  }
46 }
47 
49  std::string::const_iterator &it,
50  format_tokent &curtok)
51 {
52  if(*it=='*')
53  {
55  it++;
56  }
57 
58  std::string tmp;
59  for( ; isdigit(*it); it++) tmp+=*it;
60  curtok.field_width=string2integer(tmp);
61 }
62 
64  std::string::const_iterator &it,
65  format_tokent &curtok)
66 {
67  if(*it=='.')
68  {
69  it++;
70 
71  if(*it=='*')
72  {
74  it++;
75  }
76  else
77  {
78  std::string tmp;
79  for( ; isdigit(*it); it++) tmp+=*it;
80  curtok.precision=string2integer(tmp);
81  }
82  }
83 }
84 
86  std::string::const_iterator &it,
87  format_tokent &curtok)
88 {
89  if(*it=='h')
90  {
91  it++;
92  if(*it=='h')
93  it++;
95  }
96  else if(*it=='l')
97  {
98  it++;
99  if(*it=='l')
100  it++;
102  }
103  else if(*it=='L')
104  {
105  it++;
107  }
108  else if(*it=='j')
109  {
110  it++;
112  }
113  else if(*it=='t')
114  {
115  it++;
117  }
118 }
119 
121  const std::string &arg_string,
122  std::string::const_iterator &it,
123  format_tokent &curtok)
124 {
125  switch(*it)
126  {
127  case 'd':
128  case 'i':
131  break;
132  case 'o':
135  break;
136  case 'u':
139  break;
140  case 'x':
141  case 'X':
144  break;
145  case 'e':
146  case 'E': curtok.type=format_tokent::token_typet::FLOAT; break;
147  case 'f':
148  case 'F': curtok.type=format_tokent::token_typet::FLOAT; break;
149  case 'g':
150  case 'G': curtok.type=format_tokent::token_typet::FLOAT; break;
151  case 'a':
152  case 'A': curtok.type=format_tokent::token_typet::FLOAT; break;
153  case 'c': curtok.type=format_tokent::token_typet::CHAR; break;
154  case 's': curtok.type=format_tokent::token_typet::STRING; break;
155  case 'p': curtok.type=format_tokent::token_typet::POINTER; break;
156  case '%':
158  curtok.value="%";
159  break;
160  case '[': // pattern matching in, e.g., fscanf.
161  {
162  std::string tmp;
163  it++;
164  if(*it=='^') // if it's there, it must be first
165  {
166  tmp+='^'; it++;
167  if(*it==']') // if it's there, it must be here
168  {
169  tmp+=']'; it++;
170  }
171  }
172 
173  for( ; it!=arg_string.end() && *it!=']'; it++)
174  tmp+=*it;
175 
176  break;
177  }
178 
179  default:
181  std::string("unsupported format conversion specifier: '") + *it + "'");
182  }
183  it++;
184 }
185 
186 format_token_listt parse_format_string(const std::string &arg_string)
187 {
188  format_token_listt token_list;
189 
190  std::string::const_iterator it=arg_string.begin();
191 
192  while(it!=arg_string.end())
193  {
194  if(*it=='%')
195  {
196  token_list.push_back(format_tokent());
197  format_tokent &curtok=token_list.back();
198  it++;
199 
200  parse_flags(it, curtok);
201  parse_field_width(it, curtok);
202  parse_precision(it, curtok);
203  parse_length_modifier(it, curtok);
204  parse_conversion_specifier(arg_string, it, curtok);
205  }
206  else
207  {
208  if(token_list.empty() ||
209  token_list.back().type!=format_tokent::token_typet::TEXT)
210  token_list.push_back(format_tokent(format_tokent::token_typet::TEXT));
211 
212  std::string tmp;
213  for( ; it!=arg_string.end() && *it!='%'; it++)
214  tmp+=*it;
215 
216  INVARIANT(
217  !token_list.empty() &&
218  token_list.back().type == format_tokent::token_typet::TEXT,
219  "must already have a TEXT token at the back of the token list");
220 
221  token_list.back().value=tmp;
222  }
223  }
224 
225  return token_list;
226 }
227 
228 std::optional<typet> get_type(const format_tokent &token)
229 {
230  switch(token.type)
231  {
233  switch(token.length_modifier)
234  {
237  return signed_char_type();
238  else
239  return unsigned_char_type();
240 
243  return signed_short_int_type();
244  else
245  return unsigned_short_int_type();
246 
249  return signed_long_int_type();
250  else
251  return unsigned_long_int_type();
252 
255  return signed_long_long_int_type();
256  else
258 
264  return signed_int_type();
265  else
266  return unsigned_int_type();
267  }
268 
270  switch(token.length_modifier)
271  {
280  return float_type();
281  }
282 
284  switch(token.length_modifier)
285  {
294  return char_type();
295  }
296 
298  return pointer_type(void_type());
299 
301  switch(token.length_modifier)
302  {
304  return array_typet(wchar_t_type(), nil_exprt());
312  return array_typet(char_type(), nil_exprt());
313  }
314 
317  return {};
318  }
319 
320  UNREACHABLE;
321 }
floatbv_typet float_type()
Definition: c_types.cpp:177
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:72
signedbv_typet signed_char_type()
Definition: c_types.cpp:134
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:36
unsignedbv_typet unsigned_long_long_int_type()
Definition: c_types.cpp:93
unsignedbv_typet unsigned_long_int_type()
Definition: c_types.cpp:86
empty_typet void_type()
Definition: c_types.cpp:245
signedbv_typet signed_int_type()
Definition: c_types.cpp:22
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:235
unsignedbv_typet unsigned_char_type()
Definition: c_types.cpp:127
bitvector_typet char_type()
Definition: c_types.cpp:106
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:79
bitvector_typet wchar_t_type()
Definition: c_types.cpp:141
floatbv_typet long_double_type()
Definition: c_types.cpp:193
floatbv_typet double_type()
Definition: c_types.cpp:185
signedbv_typet signed_short_int_type()
Definition: c_types.cpp:29
unsignedbv_typet unsigned_short_int_type()
Definition: c_types.cpp:43
Arrays with given size.
Definition: std_types.h:807
mp_integer precision
token_typet type
mp_integer field_width
representationt representation
length_modifierst length_modifier
std::list< flag_typet > flags
irep_idt value
The NIL expression.
Definition: std_expr.h:3073
Thrown when we encounter an instruction, parameters to an instruction etc.
int isdigit(int c)
Definition: ctype.c:24
void parse_field_width(std::string::const_iterator &it, format_tokent &curtok)
std::optional< typet > get_type(const format_tokent &token)
void parse_conversion_specifier(const std::string &arg_string, std::string::const_iterator &it, format_tokent &curtok)
void parse_flags(std::string::const_iterator &it, format_tokent &curtok)
void parse_length_modifier(std::string::const_iterator &it, format_tokent &curtok)
format_token_listt parse_format_string(const std::string &arg_string)
void parse_precision(std::string::const_iterator &it, format_tokent &curtok)
Format String Parser.
std::list< format_tokent > format_token_listt
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:525
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:423
API to expression classes.