CBMC
string_constraint_generator_code_points.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for Java functions dealing with
4  code points
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
12 
14 
15 #include <util/mathematical_expr.h>
16 
22 std::pair<exprt, string_constraintst>
24  const array_string_exprt &res,
25  const exprt &code_point)
26 {
27  string_constraintst constraints;
29  const typet &type = code_point.type();
30  PRECONDITION(type.id() == ID_signedbv);
31 
32  // We add axioms:
33  // a1 : code_point<0x010000 => |res|=1
34  // a2 : code_point>=0x010000 => |res|=2
35  // a3 : code_point<0x010000 => res[0]=code_point
36  // a4 : code_point>=0x010000 => res[0]=0xD800+(code_point-0x10000)/0x0400
37  // a5 : code_point>=0x010000 => res[1]=0xDC00+(code_point-0x10000)/0x0400
38  // For more explenations about this conversion, see:
39  // https://en.wikipedia.org/wiki/UTF-16
40 
41  exprt hex010000 = from_integer(0x010000, type);
42  exprt hexD800 = from_integer(0xD800, type);
43  exprt hexDC00 = from_integer(0xDC00, type);
44  exprt hex0400 = from_integer(0x0400, type);
45 
46  binary_relation_exprt small(code_point, ID_lt, hex010000);
48  constraints.existential.push_back(a1);
49 
50  implies_exprt a2(
52  constraints.existential.push_back(a2);
53 
54  typecast_exprt code_point_as_char(code_point, char_type);
55  implies_exprt a3(small, equal_exprt(res[0], code_point_as_char));
56  constraints.existential.push_back(a3);
57 
58  plus_exprt first_char(
59  hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400));
60  implies_exprt a4(
61  not_exprt(small),
62  equal_exprt(res[0], typecast_exprt(first_char, char_type)));
63  constraints.existential.push_back(a4);
64 
65  plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400));
66  implies_exprt a5(
67  not_exprt(small),
68  equal_exprt(res[1], typecast_exprt(second_char, char_type)));
69  constraints.existential.push_back(a5);
70 
71  return {from_integer(0, get_return_code_type()), constraints};
72 }
73 
80 static exprt is_high_surrogate(const exprt &chr)
81 {
82  return and_exprt(
83  binary_relation_exprt(chr, ID_ge, from_integer(0xD800, chr.type())),
84  binary_relation_exprt(chr, ID_le, from_integer(0xDBFF, chr.type())));
85 }
86 
93 static exprt is_low_surrogate(const exprt &chr)
94 {
95  return and_exprt(
96  binary_relation_exprt(chr, ID_ge, from_integer(0xDC00, chr.type())),
97  binary_relation_exprt(chr, ID_le, from_integer(0xDFFF, chr.type())));
98 }
99 
109 exprt pair_value(exprt char1, exprt char2, typet return_type)
110 {
111  exprt hex010000 = from_integer(0x010000, return_type);
112  exprt hex0800 = from_integer(0x0800, return_type);
113  exprt hex0400 = from_integer(0x0400, return_type);
114  mult_exprt m1(mod_exprt(char1, hex0800), hex0400);
115  mod_exprt m2(char2, hex0400);
116  return plus_exprt(hex010000, plus_exprt(m1, m2));
117 }
118 
123 std::pair<exprt, string_constraintst>
126 {
127  string_constraintst constraints;
128  const typet &return_type = f.type();
129  PRECONDITION(return_type.id() == ID_signedbv);
130  PRECONDITION(f.arguments().size() == 2);
132  const exprt &pos = f.arguments()[1];
133 
134  const symbol_exprt result = fresh_symbol("char", return_type);
135  const exprt index1 = from_integer(1, str.length_type());
136  const exprt &char1 = str[pos];
137  const exprt &char2 = str[plus_exprt(pos, index1)];
138  const typecast_exprt char1_as_int(char1, return_type);
139  const typecast_exprt char2_as_int(char2, return_type);
140  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
141  const exprt is_low = is_low_surrogate(str[plus_exprt(pos, index1)]);
142  const and_exprt return_pair(is_high_surrogate(str[pos]), is_low);
143 
144  constraints.existential.push_back(
145  implies_exprt(return_pair, equal_exprt(result, pair)));
146  constraints.existential.push_back(
147  implies_exprt(not_exprt(return_pair), equal_exprt(result, char1_as_int)));
148  return {result, constraints};
149 }
150 
155 std::pair<exprt, string_constraintst>
158 {
160  PRECONDITION(args.size() == 2);
161  typet return_type = f.type();
162  PRECONDITION(return_type.id() == ID_signedbv);
163  symbol_exprt result = fresh_symbol("char", return_type);
165  string_constraintst constraints;
166 
167  const exprt &char1 = str[minus_exprt(
168  args[1], from_integer(2, array_pool.get_or_create_length(str).type()))];
169  const exprt &char2 = str[minus_exprt(
170  args[1], from_integer(1, array_pool.get_or_create_length(str).type()))];
171  const typecast_exprt char1_as_int(char1, return_type);
172  const typecast_exprt char2_as_int(char2, return_type);
173 
174  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
175  const and_exprt return_pair(
176  is_high_surrogate(char1), is_low_surrogate(char2));
177 
178  constraints.existential.push_back(
179  implies_exprt(return_pair, equal_exprt(result, pair)));
180  constraints.existential.push_back(
181  implies_exprt(not_exprt(return_pair), equal_exprt(result, char2_as_int)));
182  return {result, constraints};
183 }
184 
190 std::pair<exprt, string_constraintst>
193 {
194  PRECONDITION(f.arguments().size() == 3);
195  string_constraintst constraints;
196  const exprt &begin = f.arguments()[1];
197  const exprt &end = f.arguments()[2];
198  const typet &return_type = f.type();
199  const symbol_exprt result = fresh_symbol("code_point_count", return_type);
200  const minus_exprt length(end, begin);
201  const div_exprt minimum(length, from_integer(2, length.type()));
202  constraints.existential.push_back(
203  binary_relation_exprt(result, ID_le, length));
204  constraints.existential.push_back(
205  binary_relation_exprt(result, ID_ge, minimum));
206 
207  return {result, constraints};
208 }
209 
216 std::pair<exprt, string_constraintst>
219 {
220  PRECONDITION(f.arguments().size() == 3);
221  string_constraintst constraints;
222  const exprt &index = f.arguments()[1];
223  const exprt &offset = f.arguments()[2];
224  const typet &return_type = f.type();
225  const symbol_exprt result = fresh_symbol("offset_by_code_point", return_type);
226 
227  const exprt minimum = plus_exprt(index, offset);
228  const exprt maximum = plus_exprt(minimum, offset);
229  constraints.existential.push_back(
230  binary_relation_exprt(result, ID_le, maximum));
231  constraints.existential.push_back(
232  binary_relation_exprt(result, ID_ge, minimum));
233 
234  return {result, constraints};
235 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
array_string_exprt get_string_expr(array_poolt &array_pool, const exprt &expr)
Fetch the string_exprt corresponding to the given refined_string_exprt.
Definition: array_pool.cpp:199
bitvector_typet char_type()
Definition: c_types.cpp:106
Boolean AND.
Definition: std_expr.h:2120
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:26
const typet & length_type() const
Definition: string_expr.h:70
exprt & content()
Definition: string_expr.h:75
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:762
Division.
Definition: std_expr.h:1152
Equality.
Definition: std_expr.h:1361
Base class for all expressions.
Definition: expr.h:56
typet & type()
Return the type of the expression.
Definition: expr.h:84
Application of (mathematical) function.
Boolean implication.
Definition: std_expr.h:2183
const irep_idt & id() const
Definition: irep.h:384
Binary minus.
Definition: std_expr.h:1061
Modulo defined as lhs-(rhs * truncate(lhs/rhs)).
Definition: std_expr.h:1223
Binary multiplication Associativity is not specified.
Definition: std_expr.h:1107
Boolean negation.
Definition: std_expr.h:2327
The plus expression Associativity is not specified.
Definition: std_expr.h:1002
std::pair< exprt, string_constraintst > add_axioms_for_offset_by_code_point(const function_application_exprt &f)
Add axioms corresponding the String.offsetByCodePointCount java function.
std::pair< exprt, string_constraintst > add_axioms_for_code_point(const array_string_exprt &res, const exprt &code_point)
add axioms for the conversion of an integer representing a java code point to a utf-16 string
std::pair< exprt, string_constraintst > add_axioms_for_code_point_count(const function_application_exprt &f)
Add axioms corresponding the String.codePointCount java function.
std::pair< exprt, string_constraintst > add_axioms_for_code_point_at(const function_application_exprt &f)
add axioms corresponding to the String.codePointAt java function
std::pair< exprt, string_constraintst > add_axioms_for_code_point_before(const function_application_exprt &f)
add axioms corresponding to the String.codePointBefore java function
Expression to hold a symbol (variable)
Definition: std_expr.h:131
const typet & subtype() const
Definition: type.h:187
Semantic type conversion.
Definition: std_expr.h:2068
The type of an expression, extends irept.
Definition: type.h:29
literalt pos(literalt a)
Definition: literal.h:194
API to expression classes for 'mathematical' expressions.
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
Generates string constraints to link results from string functions with their arguments.
exprt maximum(const exprt &a, const exprt &b)
exprt minimum(const exprt &a, const exprt &b)
signedbv_typet get_return_code_type()
static exprt is_low_surrogate(const exprt &chr)
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en....
static exprt is_high_surrogate(const exprt &chr)
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en....
exprt pair_value(exprt char1, exprt char2, typet return_type)
the output corresponds to the unicode character given by the pair of characters of inputs assuming it...
equal_exprt equal_to(exprt lhs, exprt rhs)
Definition: string_expr.h:55
Collection of constraints of different types: existential formulas, universal formulas,...
std::vector< exprt > existential
const type_with_subtypet & to_type_with_subtype(const typet &type)
Definition: type.h:208