CBMC
java_bytecode_language.h
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 
10 #ifndef CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_LANGUAGE_H
11 #define CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_LANGUAGE_H
12 
13 #include <util/json.h>
14 #include <util/prefix_filter.h>
15 #include <util/symbol.h> // IWYU pragma: keep
16 
17 #include <langapi/language.h>
18 
19 #include "ci_lazy_methods.h"
20 #include "ci_lazy_methods_needed.h"
21 #include "code_with_references.h" // IWYU pragma: keep
22 #include "java_class_loader.h"
26 #include "select_pointer_type.h"
27 #include "synthetic_methods_map.h"
28 
29 #include <memory>
30 
31 // clang-format off
32 #define JAVA_BYTECODE_LANGUAGE_OPTIONS /*NOLINT*/ \
33  "(disable-uncaught-exception-check)" \
34  "(throw-assertion-error)" \
35  "(assert-no-exceptions-thrown)" \
36  "(java-assume-inputs-non-null)" \
37  "(java-assume-inputs-interval):" \
38  "(java-assume-inputs-integral)" \
39  "(throw-runtime-exceptions)" \
40  "(max-nondet-array-length):" \
41  "(max-nondet-tree-depth):" \
42  "(java-max-vla-length):" \
43  "(java-cp-include-files):" \
44  "(ignore-manifest-main-class)" \
45  "(context-include):" \
46  "(context-exclude):" \
47  "(no-lazy-methods)" \
48  "(lazy-methods-extra-entry-point):" \
49  "(java-load-class):" \
50  "(java-no-load-class):" \
51  "(static-values):" \
52  "(java-lift-clinit-calls)"
53 
54 #define JAVA_BYTECODE_LANGUAGE_OPTIONS_HELP \
55  " {y--disable-uncaught-exception-check} \t " \
56  "ignore uncaught exceptions and errors\n" \
57  " {y--throw-assertion-error} \t " \
58  "throw java.lang.AssertionError on violated assert statements instead of " \
59  "failing at the location of the assert statement\n" \
60  " {y--throw-runtime-exceptions} \t " \
61  "make implicit runtime exceptions explicit\n" \
62  " {y--assert-no-exceptions-thrown} \t " \
63  "transform `throw` instructions into `assert FALSE` followed by " \
64  "`assume FALSE`.\n" \
65  " {y--max-nondet-array-length} {uN} \t " \
66  "limit nondet (e.g. input) array size to <= {uN}\n" \
67  " {y--max-nondet-tree-depth} {uN} \t " \
68  "limit size of nondet (e.g. input) object tree; at level {uN} references " \
69  "are set to null\n" \
70  " {y--java-assume-inputs-non-null} \t " \
71  "never initialize reference-typed parameter to the entry point with null\n" \
72  " {y--java-assume-inputs-interval} {y[}{uL}{y:}{uU}|{uL}{y:}|{y:}{uU}{y]} " \
73  "\t " \
74  "force numerical primitive-typed inputs (byte, short, int, long, float, " \
75  "double) to be initialized within the given range; lower bound {uL} and " \
76  "upper bound {uU} must be integers; does not work for arrays\n" \
77  " {y--java-assume-inputs-integral} \t " \
78  "force float and double inputs to have integer values; does not work for " \
79  "arrays\n" \
80  " {y--java-max-vla-length} {uN} \t " \
81  "limit the length of user-code-created arrays\n" \
82  " {y--java-cp-include-files} {ur} \t " \
83  "regexp or JSON list of files to load (with '@' prefix)\n" \
84  " {y--java-load-class} {uCLASS} \t also load code from class {uCLASS}\n" \
85  " {y--java-no-load-class} {uCLASS} \t never load code from class " \
86  "{uCLASS}\n" \
87  " {y--ignore-manifest-main-class} \t " \
88  "ignore Main-Class entries in JAR manifest files. If this option is " \
89  "specified and the options {y--function} and {y--main-class} are not, we " \
90  "can be certain that all classes in the JAR file are loaded.\n" \
91  " {y--context-include} {ui} \t " \
92  "only analyze code matching specification {ui}\n" \
93  " {y--context-exclude} {ue} \t " \
94  "only analyze code does not match specification {ue}. All other methods " \
95  "are excluded, i.e. we load their signatures and meta-information, but not " \
96  "their bodies. A specification is any prefix of a package, class or method " \
97  "name, e.g. \"org.cprover.\" or \"org.cprover.MyClass.\" or " \
98  "\"org.cprover.MyClass.methodToStub:(I)Z\". These options can be given " \
99  "multiple times. The default for context-include is 'all included'; " \
100  "default for context-exclude is 'nothing excluded'.\n" \
101  " {y--no-lazy-methods} \t " \
102  "load and translate all methods given on the command line and in " \
103  "{y--classpath}. Default is to load methods that appear to be reachable " \
104  "from the {y--function} entry point or main class Note that " \
105  "{y--show-symbol-table}, {y--show-goto-functions} and " \
106  "{y--show-properties} output are restricted to loaded methods by default.\n" \
107  " {y--lazy-methods-extra-entry-point} {uMETHODNAME} \t " \
108  "treat {uMETHODNAME} as a possible program entry point for the purpose of " \
109  "lazy method loading {uMETHODNAME} can be a regex that will be matched " \
110  "against all symbols. If missing a java:: prefix will be added. If no " \
111  "descriptor is found, all overloads of a method will also be added.\n" \
112  " {y--static-values} {uf} \t " \
113  "Load initial values of static fields from the given JSON file {uf}. We " \
114  "assign static fields to these values instead of calling the normal " \
115  "static initializer (clinit) method. The argument can be a relative or " \
116  "absolute path to the file.\n" \
117  " {y--java-lift-clinit-calls} \t " \
118  "Lifts clinit calls in function bodies to the top of the function. This " \
119  "may reduce the overall cost of static initialisation, but may be unsound " \
120  "if there are cyclic dependencies between static initializers due to " \
121  "potentially changing their order of execution, or if static initializers " \
122  "have side-effects such as updating another class' static field.\n" \
123 
124 #ifdef _WIN32
125  #define JAVA_CLASSPATH_SEPARATOR ";"
126 #else
127  #define JAVA_CLASSPATH_SEPARATOR ":"
128 #endif
129 
130 #define HELP_JAVA_CLASSPATH \
131  " {y-classpath} {udirs/jars}, {y-cp} {udirs/jars}, " \
132  "{y--classpath} {udirs/jars} \t " \
133  "set class search path of directories and jar files to a " \
134  JAVA_CLASSPATH_SEPARATOR "-separated list of directories and JAR " \
135  "archives to search for class files\n" \
136  " {y--main-class} {uclass-name} \t set the name of the main class\n"
137 
138 #define HELP_JAVA_METHOD_NAME \
139  " {umethod-name} \t " \
140  "fully qualified name of method used as entry point, e.g. " \
141  "mypackage.Myclass.foo:(I)Z\n"
142 
143 #define HELP_JAVA_CLASS_NAME \
144  " {uclass-name} \t " \
145  "name of class. The entry point is the method specified by --function, " \
146  "or otherwise, the public static void main(String[]) method of the given " \
147  "class.\n"
148 
149 #define OPT_JAVA_JAR \
150  "(jar):"
151 
152 #define HELP_JAVA_JAR \
153  " {y-jar} {ujarfile} \t " \
154  "JAR file to be checked. The entry point is the method specified by " \
155  "{y--function} or otherwise, the public static void main(String[]) method " \
156  "of the class specified by {y--main-class} or the main class specified in " \
157  "the JAR manifest (checked in this order).\n"
158 
159 #define OPT_JAVA_GOTO_BINARY \
160  "(gb):"
161 
162 #define HELP_JAVA_GOTO_BINARY \
163  " {y--gb} {ugoto-binary} \t " \
164  "goto-binary file to be checked. The entry point is the method specified " \
165  "by {y--function}, or otherwise, the public static void main(String[]) of " \
166  "the class specified by {y--main-class} (checked in this order).\n"
167 // clang-format on
168 
170 {
174 };
175 
187 {
188 public:
190 
191  std::unordered_multimap<irep_idt, symbolt> &
192  get(const symbol_table_baset &symbol_table);
193 
194  void reinitialize();
195 
196 private:
197  bool initialized = false;
198  std::unordered_multimap<irep_idt, symbolt> map;
199 };
200 
202 {
204 
206 
212  bool throw_assertion_error = false;
213  bool threading_support = false;
214  bool nondet_static = false;
216 
220 
225 
227  std::vector<irep_idt> java_load_classes;
232  std::optional<json_objectt> static_values_json;
233 
235  std::unordered_set<std::string> no_load_classes;
236 
237  std::vector<load_extra_methodst> extra_methods;
238 
246  std::optional<prefix_filtert> method_context;
247 
252 
254  std::optional<std::string> main_jar;
255 };
256 
257 #define JAVA_CLASS_MODEL_SUFFIX "@class_model"
258 
260 {
261 public:
262  void set_language_options(const optionst &, message_handlert &) override;
263 
264  virtual bool preprocess(
265  std::istream &instream,
266  const std::string &path,
267  std::ostream &outstream,
268  message_handlert &message_handler) override;
269 
270  // This is an extension to languaget
271  // required because parsing of Java programs can be initiated without
272  // opening a file first or providing a path to a file
273  // as dictated by \ref languaget.
274  virtual bool parse(message_handlert &);
275 
276  bool parse(
277  std::istream &instream,
278  const std::string &path,
279  message_handlert &message_handler) override;
280 
282  symbol_table_baset &symbol_table,
283  message_handlert &message_handler) override;
284 
285  bool typecheck(
286  symbol_table_baset &context,
287  const std::string &module,
288  message_handlert &message_handler) override;
289 
290  virtual bool final(symbol_table_baset &context) override;
291 
292  void show_parse(std::ostream &out, message_handlert &) override;
293 
294  virtual ~java_bytecode_languaget();
296  std::unique_ptr<select_pointer_typet> pointer_type_selector)
299  {
300  }
301 
304  std::unique_ptr<select_pointer_typet>(new select_pointer_typet()))
305  {
306  }
307 
308  bool from_expr(
309  const exprt &expr,
310  std::string &code,
311  const namespacet &ns) override;
312 
313  bool from_type(
314  const typet &type,
315  std::string &code,
316  const namespacet &ns) override;
317 
318  bool to_expr(
319  const std::string &code,
320  const std::string &module,
321  exprt &expr,
322  const namespacet &ns,
323  message_handlert &message_handler) override;
324 
325  std::unique_ptr<languaget> new_language() override
326  {
327  return std::make_unique<java_bytecode_languaget>();
328  }
329 
330  std::string id() const override { return "java"; }
331  std::string description() const override { return "Java Bytecode"; }
332  std::set<std::string> extensions() const override;
333 
334  void modules_provided(std::set<std::string> &modules) override;
335  virtual void
336  methods_provided(std::unordered_set<irep_idt> &methods) const override;
337  virtual void convert_lazy_method(
338  const irep_idt &function_id,
339  symbol_table_baset &symbol_table,
340  message_handlert &message_handler) override;
341 
342 protected:
344  const irep_idt &function_id,
345  symbol_table_baset &symbol_table,
347  message_handlert &message_handler)
348  {
350  function_id,
351  symbol_table,
352  std::optional<ci_lazy_methods_neededt>(),
354  message_handler);
355  }
357  const irep_idt &function_id,
358  symbol_table_baset &symbol_table,
359  std::optional<ci_lazy_methods_neededt> needed_lazy_methods,
361  message_handlert &);
363  const irep_idt &function_id,
364  symbol_table_baset &symbol_table,
365  std::optional<ci_lazy_methods_neededt> needed_lazy_methods,
367  message_handlert &);
368 
371 
372  std::optional<java_bytecode_language_optionst> language_options;
374  std::vector<irep_idt> main_jar_classes;
379 
380 private:
381  virtual std::vector<load_extra_methodst>
382  build_extra_entry_points(const optionst &) const;
383  const std::unique_ptr<const select_pointer_typet> pointer_type_selector;
384 
391 
396  std::unordered_map<std::string, object_creation_referencet> references;
397 
400 };
401 
402 std::unique_ptr<languaget> new_java_bytecode_language();
403 
404 void parse_java_language_options(const cmdlinet &cmd, optionst &options);
405 
406 prefix_filtert get_context(const optionst &options);
407 
408 #endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_LANGUAGE_H
Collect methods needed to be loaded using the lazy method.
Context-insensitive lazy methods container.
Non-graph-based representation of the class hierarchy.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
Base class for all expressions.
Definition: expr.h:56
virtual void methods_provided(std::unordered_set< irep_idt > &methods) const override
Provide feedback to language_filest so that when asked for a lazy method, it can delegate to this ins...
synthetic_methods_mapt synthetic_methods
Maps synthetic method names on to the particular type of synthetic method (static initializer,...
std::vector< irep_idt > main_jar_classes
bool to_expr(const std::string &code, const std::string &module, exprt &expr, const namespacet &ns, message_handlert &message_handler) override
Parses the given string into an expression.
std::string id() const override
stub_global_initializer_factoryt stub_global_initializer_factory
std::set< std::string > extensions() const override
bool from_type(const typet &type, std::string &code, const namespacet &ns) override
Formats the given type in a language-specific way.
std::unique_ptr< languaget > new_language() override
void set_language_options(const optionst &, message_handlert &) override
Consume options that are java bytecode specific.
virtual bool parse(message_handlert &)
We set the main class (i.e. class to start the class loading analysis, see java_class_loadert) when w...
java_bytecode_languaget(std::unique_ptr< select_pointer_typet > pointer_type_selector)
std::string description() const override
const std::unique_ptr< const select_pointer_typet > pointer_type_selector
java_object_factory_parameterst object_factory_parameters
void show_parse(std::ostream &out, message_handlert &) override
void initialize_class_loader(message_handlert &)
java_string_library_preprocesst string_preprocess
void modules_provided(std::set< std::string > &modules) override
java_class_loadert java_class_loader
virtual void convert_lazy_method(const irep_idt &function_id, symbol_table_baset &symbol_table, message_handlert &message_handler) override
Promote a lazy-converted method (one whose type is known but whose body hasn't been converted) into a...
virtual std::vector< load_extra_methodst > build_extra_entry_points(const optionst &) const
This method should be overloaded to provide alternative approaches for specifying extra entry points.
std::optional< java_bytecode_language_optionst > language_options
const select_pointer_typet & get_pointer_type_selector() const
void parse_from_main_class(message_handlert &)
bool convert_single_method_code(const irep_idt &function_id, symbol_table_baset &symbol_table, std::optional< ci_lazy_methods_neededt > needed_lazy_methods, lazy_class_to_declared_symbols_mapt &class_to_declared_symbols, message_handlert &)
Convert a method (one whose type is known but whose body hasn't been converted) but don't run typeche...
bool from_expr(const exprt &expr, std::string &code, const namespacet &ns) override
Formats the given expression in a language-specific way.
void convert_single_method(const irep_idt &function_id, symbol_table_baset &symbol_table, lazy_class_to_declared_symbols_mapt &class_to_declared_symbols, message_handlert &message_handler)
std::unordered_map< std::string, object_creation_referencet > references
Map used in all calls to functions that deterministically create objects (currently only assign_from_...
bool typecheck(symbol_table_baset &context, const std::string &module, message_handlert &message_handler) override
virtual bool preprocess(std::istream &instream, const std::string &path, std::ostream &outstream, message_handlert &message_handler) override
ANSI-C preprocessing.
bool generate_support_functions(symbol_table_baset &symbol_table, message_handlert &message_handler) override
Create language-specific support functions, such as __CPROVER_start, __CPROVER_initialize and languag...
bool do_ci_lazy_method_conversion(symbol_table_baset &, message_handlert &)
Uses a simple context-insensitive ('ci') analysis to determine which methods may be reachable from th...
Class responsible to load .class files.
Map classes to the symbols they declare but is only computed once it is needed and the map is then ke...
std::unordered_multimap< irep_idt, symbolt > & get(const symbol_table_baset &symbol_table)
std::unordered_multimap< irep_idt, symbolt > map
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:94
Provides filtering of strings vai inclusion/exclusion lists of prefixes.
Definition: prefix_filter.h:20
The symbol table base class interface.
The type of an expression, extends irept.
Definition: type.h:29
void parse_java_language_options(const cmdlinet &cmd, optionst &options)
Parse options that are java bytecode specific.
@ LAZY_METHODS_MODE_EAGER
@ LAZY_METHODS_MODE_EXTERNAL_DRIVER
@ LAZY_METHODS_MODE_CONTEXT_INSENSITIVE
prefix_filtert get_context(const optionst &options)
std::unique_ptr< languaget > new_java_bytecode_language()
std::unordered_multimap< irep_idt, symbolt > class_to_declared_symbols(const symbol_table_baset &symbol_table)
Produce code for simple implementation of String Java libraries.
Abstract interface to support a programming language.
Prefix Filtering.
Handle selection of correct pointer type (for example changing abstract classes to concrete versions)...
std::optional< std::string > main_jar
If set then a JAR file has been given via the -jar option.
bool assert_no_exceptions_thrown
Transform athrow bytecode instructions into assert FALSE followed by assume FALSE.
bool should_lift_clinit_calls
Should we lift clinit calls in function bodies to the top? For example, turning if(x) A....
std::optional< prefix_filtert > method_context
If set, method bodies are only elaborated if they pass the filter.
bool assume_inputs_non_null
assume inputs variables to be non-null
size_t max_user_array_length
max size for user code created arrays
std::vector< irep_idt > java_load_classes
list of classes to force load even without reference from the entry point
std::unordered_set< std::string > no_load_classes
List of classes to never load.
std::vector< load_extra_methodst > extra_methods
std::optional< json_objectt > static_values_json
JSON which contains initial values of static fields (right after the static initializer of the class ...
Symbol table entry.
Synthetic methods are particular methods internally generated by the Java frontend,...
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.