CBMC
java_bytecode_convert_class.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: JAVA Bytecode Language Conversion
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
13 
14 #ifdef DEBUG
15 #include <iostream>
16 #endif
17 
18 #include "java_root_class.h"
19 
20 #include <util/arith_tools.h>
21 #include <util/expr_initializer.h>
22 #include <util/namespace.h>
23 #include <util/prefix.h>
24 #include <util/std_expr.h>
25 #include <util/symbol_table_base.h>
26 
27 #include "ci_lazy_methods.h"
30 #include "java_types.h"
31 #include "java_utils.h"
32 
34 {
35 public:
37  symbol_table_baset &_symbol_table,
38  message_handlert &_message_handler,
39  size_t _max_array_length,
41  java_string_library_preprocesst &_string_preprocess,
42  const std::unordered_set<std::string> &no_load_classes)
43  : log(_message_handler),
44  symbol_table(_symbol_table),
45  max_array_length(_max_array_length),
47  string_preprocess(_string_preprocess),
49  {
50  }
51 
67  void operator()(
69  {
70  PRECONDITION(!parse_trees.empty());
71  const java_bytecode_parse_treet &parse_tree = parse_trees.front();
72 
73  // Add array types to the symbol table
75 
76  const bool loading_success =
77  parse_tree.loading_successful &&
78  !no_load_classes.count(id2string(parse_tree.parsed_class.name));
79  if(loading_success)
80  {
81  overlay_classest overlay_classes;
82  for(auto overlay_class_it = std::next(parse_trees.begin());
83  overlay_class_it != parse_trees.end();
84  ++overlay_class_it)
85  {
86  overlay_classes.push_front(std::cref(overlay_class_it->parsed_class));
87  }
88  convert(parse_tree.parsed_class, overlay_classes);
89  }
90 
91  // Add as string type if relevant
92  const irep_idt &class_name = parse_tree.parsed_class.name;
95  else if(!loading_success)
96  // Generate stub if couldn't load from bytecode and wasn't string type
98  class_name,
102  }
103 
108 
109 private:
112  const size_t max_array_length;
115 
116  // conversion
117  typedef std::list<std::reference_wrapper<const classt>> overlay_classest;
118  void convert(const classt &c, const overlay_classest &overlay_classes);
119  void convert(symbolt &class_symbol, const fieldt &f);
120 
136  static bool is_overlay_method(const methodt &method)
137  {
138  return method.has_annotation(ID_overlay_method);
139  }
140 
161  static bool is_ignored_method(
162  const irep_idt &class_name, const methodt &method)
163  {
164  static irep_idt org_cprover_CProver_name = "org.cprover.CProver";
165  return
166  (class_name == org_cprover_CProver_name &&
167  cprover_methods_to_ignore.count(id2string(method.name)) != 0) ||
168  method.has_annotation(ID_ignored_method);
169  }
170 
171  bool check_field_exists(
172  const fieldt &field,
173  const irep_idt &qualified_fieldname,
174  const struct_union_typet::componentst &fields) const;
175 
176  std::unordered_set<std::string> no_load_classes;
177 };
178 
189 static std::optional<std::string> extract_generic_superclass_reference(
190  const std::optional<std::string> &signature)
191 {
192  if(signature.has_value())
193  {
194  // skip the (potential) list of generic parameters at the beginning of the
195  // signature
196  const size_t start =
197  signature.value().front() == '<'
198  ? find_closing_delimiter(signature.value(), 0, '<', '>') + 1
199  : 0;
200 
201  // extract the superclass reference
202  const size_t end =
203  find_closing_semi_colon_for_reference_type(signature.value(), start);
204  const std::string superclass_ref =
205  signature.value().substr(start, (end - start) + 1);
206 
207  // if the superclass is generic then the reference is of form
208  // `Lsuperclass-name<generic-types;>;` if it is implicitly generic, then the
209  // reference is of the form
210  // `Lsuperclass-name<Tgeneric-types;>.Innerclass-Name`
211  if(superclass_ref.find('<') != std::string::npos)
212  return superclass_ref;
213  }
214  return {};
215 }
216 
229 static std::optional<std::string> extract_generic_interface_reference(
230  const std::optional<std::string> &signature,
231  const std::string &interface_name)
232 {
233  if(signature.has_value())
234  {
235  // skip the (potential) list of generic parameters at the beginning of the
236  // signature
237  size_t start =
238  signature.value().front() == '<'
239  ? find_closing_delimiter(signature.value(), 0, '<', '>') + 1
240  : 0;
241 
242  // skip the superclass reference (if there is at least one interface
243  // reference in the signature, then there is a superclass reference)
244  start =
245  find_closing_semi_colon_for_reference_type(signature.value(), start) + 1;
246 
247  // if the interface name includes package name, convert dots to slashes
248  std::string interface_name_slash_to_dot = interface_name;
249  std::replace(
250  interface_name_slash_to_dot.begin(),
251  interface_name_slash_to_dot.end(),
252  '.',
253  '/');
254 
255  start =
256  signature.value().find("L" + interface_name_slash_to_dot + "<", start);
257  if(start != std::string::npos)
258  {
259  const size_t &end =
260  find_closing_semi_colon_for_reference_type(signature.value(), start);
261  return signature.value().substr(start, (end - start) + 1);
262  }
263  }
264  return {};
265 }
266 
271  const classt &c,
272  const overlay_classest &overlay_classes)
273 {
274  std::string qualified_classname="java::"+id2string(c.name);
275  if(symbol_table.has_symbol(qualified_classname))
276  {
277  log.debug() << "Skip class " << c.name << " (already loaded)"
278  << messaget::eom;
279  return;
280  }
281 
282  java_class_typet class_type;
283  if(c.signature.has_value() && c.signature.value()[0]=='<')
284  {
285  java_generic_class_typet generic_class_type;
286 #ifdef DEBUG
287  std::cout << "INFO: found generic class signature "
288  << c.signature.value()
289  << " in parsed class "
290  << c.name << "\n";
291 #endif
292  try
293  {
294  const std::vector<typet> &generic_types=java_generic_type_from_string(
295  id2string(c.name),
296  c.signature.value());
297  for(const typet &t : generic_types)
298  {
299  generic_class_type.generic_types()
300  .push_back(to_java_generic_parameter(t));
301  }
302  class_type=generic_class_type;
303  }
305  {
306  log.debug() << "Class: " << c.name
307  << "\n could not parse signature: " << c.signature.value()
308  << "\n " << e.what()
309  << "\n ignoring that the class is generic" << messaget::eom;
310  }
311  }
312 
313  class_type.set_tag(c.name);
314  class_type.set_inner_name(c.inner_name);
315  class_type.set_abstract(c.is_abstract);
316  class_type.set_is_annotation(c.is_annotation);
317  class_type.set_interface(c.is_interface);
318  class_type.set_synthetic(c.is_synthetic);
319  class_type.set_final(c.is_final);
320  class_type.set_is_inner_class(c.is_inner_class);
321  class_type.set_is_static_class(c.is_static_class);
323  class_type.set_outer_class(c.outer_class);
324  class_type.set_super_class(c.super_class);
325  if(c.is_enum)
326  {
328  {
329  log.warning() << "Java Enum " << c.name
330  << " won't work properly because max "
331  << "array length (" << max_array_length
332  << ") is less than the "
333  << "enum size (" << c.enum_elements << ")" << messaget::eom;
334  }
335  class_type.set(
336  ID_java_enum_static_unwind,
338  class_type.set_is_enumeration(true);
339  }
340 
341  if(c.is_public)
342  class_type.set_access(ID_public);
343  else if(c.is_protected)
344  class_type.set_access(ID_protected);
345  else if(c.is_private)
346  class_type.set_access(ID_private);
347  else
348  class_type.set_access(ID_default);
349 
350  if(!c.super_class.empty())
351  {
352  const struct_tag_typet base("java::" + id2string(c.super_class));
353 
354  // if the superclass is generic then the class has the superclass reference
355  // including the generic info in its signature
356  // e.g., signature for class 'A<T>' that extends
357  // 'Generic<Integer>' is '<T:Ljava/lang/Object;>LGeneric<LInteger;>;'
358  const std::optional<std::string> &superclass_ref =
360  if(superclass_ref.has_value())
361  {
362  try
363  {
364  const java_generic_struct_tag_typet generic_base(
365  base, superclass_ref.value(), qualified_classname);
366  class_type.add_base(generic_base);
367  }
369  {
370  log.debug() << "Superclass: " << c.super_class
371  << " of class: " << c.name
372  << "\n could not parse signature: "
373  << superclass_ref.value() << "\n " << e.what()
374  << "\n ignoring that the superclass is generic"
375  << messaget::eom;
376  class_type.add_base(base);
377  }
378  }
379  else
380  {
381  class_type.add_base(base);
382  }
383  java_class_typet::componentt base_class_field;
384  base_class_field.type() = class_type.bases().at(0).type();
385  base_class_field.set_name("@" + id2string(c.super_class));
386  base_class_field.set_base_name("@" + id2string(c.super_class));
387  base_class_field.set_pretty_name("@" + id2string(c.super_class));
388  class_type.components().push_back(base_class_field);
389  }
390 
391  // interfaces are recorded as bases
392  for(const auto &interface : c.implements)
393  {
394  const struct_tag_typet base("java::" + id2string(interface));
395 
396  // if the interface is generic then the class has the interface reference
397  // including the generic info in its signature
398  // e.g., signature for class 'A implements GenericInterface<Integer>' is
399  // 'Ljava/lang/Object;LGenericInterface<LInteger;>;'
400  const std::optional<std::string> interface_ref =
402  if(interface_ref.has_value())
403  {
404  try
405  {
406  const java_generic_struct_tag_typet generic_base(
407  base, interface_ref.value(), qualified_classname);
408  class_type.add_base(generic_base);
409  }
411  {
412  log.debug() << "Interface: " << interface << " of class: " << c.name
413  << "\n could not parse signature: " << interface_ref.value()
414  << "\n " << e.what()
415  << "\n ignoring that the interface is generic"
416  << messaget::eom;
417  class_type.add_base(base);
418  }
419  }
420  else
421  {
422  class_type.add_base(base);
423  }
424  }
425 
426  // now do lambda method handles (bootstrap methods)
427  for(const auto &lambda_entry : c.lambda_method_handle_map)
428  {
429  // if the handle is of unknown type, we still need to store it to preserve
430  // the correct indexing (invokedynamic instructions will retrieve
431  // method handles by index)
432  lambda_entry.second.is_unknown_handle()
433  ? class_type.add_unknown_lambda_method_handle()
434  : class_type.add_lambda_method_handle(
435  lambda_entry.second.get_method_descriptor(),
436  lambda_entry.second.handle_type);
437  }
438 
439  // Load annotations
440  if(!c.annotations.empty())
441  convert_annotations(c.annotations, class_type.get_annotations());
442 
443  // the base name is the name of the class without the package
444  const irep_idt base_name = [](const std::string &full_name) {
445  const size_t last_dot = full_name.find_last_of('.');
446  return last_dot == std::string::npos
447  ? full_name
448  : std::string(full_name, last_dot + 1, std::string::npos);
449  }(id2string(c.name));
450 
451  // produce class symbol
452  class_type.set_name(qualified_classname);
453  type_symbolt new_symbol{qualified_classname, class_type, ID_java};
454  new_symbol.base_name = base_name;
455  new_symbol.pretty_name=c.name;
456 
457  symbolt *class_symbol;
458 
459  // add before we do members
460  log.debug() << "Adding symbol: class '" << c.name << "'" << messaget::eom;
461  if(symbol_table.move(new_symbol, class_symbol))
462  {
463  log.error() << "failed to add class symbol " << new_symbol.name
464  << messaget::eom;
465  throw 0;
466  }
467 
468  // Now do fields
469  const class_typet::componentst &fields =
470  to_class_type(class_symbol->type).components();
471  // Include fields from overlay classes as they will be required by the
472  // methods defined there
473  for(auto overlay_class : overlay_classes)
474  {
475  for(const auto &field : overlay_class.get().fields)
476  {
477  std::string field_id = qualified_classname + "." + id2string(field.name);
478  if(check_field_exists(field, field_id, fields))
479  {
480  std::string err =
481  "Duplicate field definition for " + field_id + " in overlay class";
482  // TODO: This could just be a warning if the types match
483  log.error() << err << messaget::eom;
484  throw err.c_str();
485  }
486  log.debug() << "Adding symbol from overlay class: field '" << field.name
487  << "'" << messaget::eom;
488  convert(*class_symbol, field);
489  POSTCONDITION(check_field_exists(field, field_id, fields));
490  }
491  }
492  for(const auto &field : c.fields)
493  {
494  std::string field_id = qualified_classname + "." + id2string(field.name);
495  if(check_field_exists(field, field_id, fields))
496  {
497  // TODO: This could be a warning if the types match
498  log.error() << "Field definition for " << field_id
499  << " already loaded from overlay class" << messaget::eom;
500  continue;
501  }
502  log.debug() << "Adding symbol: field '" << field.name << "'"
503  << messaget::eom;
504  convert(*class_symbol, field);
505  POSTCONDITION(check_field_exists(field, field_id, fields));
506  }
507 
508  // Now do methods
509  std::set<irep_idt> overlay_methods;
510  for(auto overlay_class : overlay_classes)
511  {
512  for(const methodt &method : overlay_class.get().methods)
513  {
514  const irep_idt method_identifier =
515  qualified_classname + "." + id2string(method.name)
516  + ":" + method.descriptor;
517  if(is_ignored_method(c.name, method))
518  {
519  log.debug() << "Ignoring method: '" << method_identifier << "'"
520  << messaget::eom;
521  continue;
522  }
523  if(method_bytecode.contains_method(method_identifier))
524  {
525  // This method has already been discovered and added to method_bytecode
526  // while parsing an overlay class that appears later in the classpath
527  // (we're working backwards)
528  // Warn the user if the definition already found was not an overlay,
529  // otherwise simply don't load this earlier definition
530  if(overlay_methods.count(method_identifier) == 0)
531  {
532  // This method was defined in a previous class definition without
533  // being marked as an overlay method
534  log.warning()
535  << "Method " << method_identifier
536  << " exists in an overlay class without being marked as an "
537  "overlay and also exists in another overlay class that appears "
538  "earlier in the classpath"
539  << messaget::eom;
540  }
541  continue;
542  }
543  // Always run the lazy pre-stage, as it symbol-table
544  // registers the function.
545  log.debug() << "Adding symbol from overlay class: method '"
546  << method_identifier << "'" << messaget::eom;
547  java_bytecode_convert_method_lazy(
548  *class_symbol,
549  method_identifier,
550  method,
551  symbol_table,
552  log.get_message_handler());
553  method_bytecode.add(qualified_classname, method_identifier, method);
554  if(is_overlay_method(method))
555  overlay_methods.insert(method_identifier);
556  }
557  }
558  for(const methodt &method : c.methods)
559  {
560  const irep_idt method_identifier=
561  qualified_classname + "." + id2string(method.name)
562  + ":" + method.descriptor;
563  if(is_ignored_method(c.name, method))
564  {
565  log.debug() << "Ignoring method: '" << method_identifier << "'"
566  << messaget::eom;
567  continue;
568  }
569  if(method_bytecode.contains_method(method_identifier))
570  {
571  // This method has already been discovered while parsing an overlay
572  // class
573  // If that definition is an overlay then we simply don't load this
574  // original definition and we remove it from the list of overlays
575  if(overlay_methods.erase(method_identifier) == 0)
576  {
577  // This method was defined in a previous class definition without
578  // being marked as an overlay method
579  log.warning()
580  << "Method " << method_identifier
581  << " exists in an overlay class without being marked as an overlay "
582  "and also exists in the underlying class"
583  << messaget::eom;
584  }
585  continue;
586  }
587  // Always run the lazy pre-stage, as it symbol-table
588  // registers the function.
589  log.debug() << "Adding symbol: method '" << method_identifier << "'"
590  << messaget::eom;
591  java_bytecode_convert_method_lazy(
592  *class_symbol,
593  method_identifier,
594  method,
595  symbol_table,
596  log.get_message_handler());
597  method_bytecode.add(qualified_classname, method_identifier, method);
598  if(is_overlay_method(method))
599  {
600  log.warning()
601  << "Method " << method_identifier
602  << " marked as an overlay where defined in the underlying class"
603  << messaget::eom;
604  }
605  }
606  if(!overlay_methods.empty())
607  {
608  log.error()
609  << "Overlay methods defined in overlay classes did not exist in the "
610  "underlying class:\n";
611  for(const irep_idt &method_id : overlay_methods)
612  log.error() << " " << method_id << "\n";
613  log.error() << messaget::eom;
614  }
615 
616  // is this a root class?
617  if(c.super_class.empty())
618  java_root_class(*class_symbol);
619 }
620 
621 bool java_bytecode_convert_classt::check_field_exists(
622  const java_bytecode_parse_treet::fieldt &field,
623  const irep_idt &qualified_fieldname,
624  const struct_union_typet::componentst &fields) const
625 {
626  if(field.is_static)
627  return symbol_table.has_symbol(qualified_fieldname);
628 
629  auto existing_field = std::find_if(
630  fields.begin(),
631  fields.end(),
632  [&field](const struct_union_typet::componentt &f)
633  {
634  return f.get_name() == field.name;
635  });
636  return existing_field != fields.end();
637 }
638 
642 void java_bytecode_convert_classt::convert(
643  symbolt &class_symbol,
644  const fieldt &f)
645 {
646  typet field_type;
647  if(f.signature.has_value())
648  {
649  field_type = *java_type_from_string_with_exception(
650  f.descriptor, f.signature, id2string(class_symbol.name));
651 
653  if(is_java_generic_parameter(field_type))
654  {
655 #ifdef DEBUG
656  std::cout << "fieldtype: generic "
657  << to_java_generic_parameter(field_type).type_variable()
658  .get_identifier()
659  << " name " << f.name << "\n";
660 #endif
661  }
662 
665  else if(is_java_generic_type(field_type))
666  {
667  java_generic_typet &with_gen_type=
668  to_java_generic_type(field_type);
669 #ifdef DEBUG
670  std::cout << "fieldtype: generic container type "
671  << std::to_string(with_gen_type.generic_type_arguments().size())
672  << " type " << with_gen_type.id()
673  << " name " << f.name
674  << " subtype id " << with_gen_type.subtype().id() << "\n";
675 #endif
676  field_type=with_gen_type;
677  }
678  }
679  else
680  field_type = *java_type_from_string(f.descriptor);
681 
682  // determine access
683  irep_idt access;
684 
685  if(f.is_private)
686  access = ID_private;
687  else if(f.is_protected)
688  access = ID_protected;
689  else if(f.is_public)
690  access = ID_public;
691  else
692  access = ID_default;
693 
694  auto &class_type = to_java_class_type(class_symbol.type);
695 
696  // is this a static field?
697  if(f.is_static)
698  {
699  const irep_idt field_identifier =
700  id2string(class_symbol.name) + "." + id2string(f.name);
701 
702  class_type.static_members().emplace_back();
703  auto &component = class_type.static_members().back();
704 
705  component.set_name(field_identifier);
706  component.set_base_name(f.name);
707  component.set_pretty_name(f.name);
708  component.set_access(access);
709  component.set_is_final(f.is_final);
710  component.type() = field_type;
711 
712  // Create the symbol
713  symbolt new_symbol{
714  id2string(class_symbol.name) + "." + id2string(f.name),
715  field_type,
716  ID_java};
717 
718  new_symbol.is_static_lifetime=true;
719  new_symbol.is_lvalue=true;
720  new_symbol.is_state_var=true;
721  new_symbol.base_name=f.name;
722  // Provide a static field -> class link, like
723  // java_bytecode_convert_method::convert does for method -> class.
724  set_declaring_class(new_symbol, class_symbol.name);
725  new_symbol.type.set(ID_C_field, f.name);
726  new_symbol.type.set(ID_C_constant, f.is_final);
727  new_symbol.pretty_name=id2string(class_symbol.pretty_name)+
728  "."+id2string(f.name);
729 
730  // These annotations use `ID_C_access` instead of `ID_access` like methods
731  // to avoid type clashes in expressions like `some_static_field = 0`, where
732  // with ID_access the constant '0' would need to have an access modifier
733  // too, or else appear to have incompatible type.
734  if(f.is_public)
735  new_symbol.type.set(ID_C_access, ID_public);
736  else if(f.is_protected)
737  new_symbol.type.set(ID_C_access, ID_protected);
738  else if(f.is_private)
739  new_symbol.type.set(ID_C_access, ID_private);
740  else
741  new_symbol.type.set(ID_C_access, ID_default);
742 
743  const namespacet ns(symbol_table);
744  const auto value = zero_initializer(field_type, class_symbol.location, ns);
745  if(!value.has_value())
746  {
747  log.error().source_location = class_symbol.location;
748  log.error() << "failed to zero-initialize " << f.name << messaget::eom;
749  throw 0;
750  }
751  new_symbol.value = *value;
752 
753  // Load annotations
754  if(!f.annotations.empty())
755  {
756  convert_annotations(
757  f.annotations,
758  type_checked_cast<annotated_typet>(new_symbol.type).get_annotations());
759  }
760 
761  // Do we have the static field symbol already?
762  const auto s_it=symbol_table.symbols.find(new_symbol.name);
763  if(s_it!=symbol_table.symbols.end())
764  symbol_table.erase(s_it); // erase, we stubbed it
765 
766  const bool failed = symbol_table.add(new_symbol);
767  CHECK_RETURN_WITH_DIAGNOSTICS(!failed, "failed to add static field symbol");
768  }
769  else
770  {
771  class_type.components().emplace_back();
772  auto &component = class_type.components().back();
773 
774  component.set_name(f.name);
775  component.set_base_name(f.name);
776  component.set_pretty_name(f.name);
777  component.set_access(access);
778  component.set_is_final(f.is_final);
779  component.type() = field_type;
780 
781  // Load annotations
782  if(!f.annotations.empty())
783  {
784  convert_annotations(
785  f.annotations,
786  static_cast<annotated_typet &>(component.type()).get_annotations());
787  }
788  }
789 }
790 
791 void add_java_array_types(symbol_table_baset &symbol_table)
792 {
793  const std::string letters="ijsbcfdza";
794 
795  for(const char l : letters)
796  {
797  struct_tag_typet struct_tag_type =
798  to_struct_tag_type(java_array_type(l).subtype());
799 
800  const irep_idt &struct_tag_type_identifier =
801  struct_tag_type.get_identifier();
802  if(symbol_table.has_symbol(struct_tag_type_identifier))
803  return;
804 
805  java_class_typet class_type;
806  // we have the base class, java.lang.Object, length and data
807  // of appropriate type
808  class_type.set_tag(struct_tag_type_identifier);
809  // Note that non-array types don't have "java::" at the beginning of their
810  // tag, and their name is "java::" + their tag. Since arrays do have
811  // "java::" at the beginning of their tag we set the name to be the same as
812  // the tag.
813  class_type.set_name(struct_tag_type_identifier);
814 
815  class_type.components().reserve(3);
816  java_class_typet::componentt base_class_component(
817  "@java.lang.Object", struct_tag_typet("java::java.lang.Object"));
818  base_class_component.set_pretty_name("@java.lang.Object");
819  base_class_component.set_base_name("@java.lang.Object");
820  class_type.components().push_back(base_class_component);
821 
822  java_class_typet::componentt length_component("length", java_int_type());
823  length_component.set_pretty_name("length");
824  length_component.set_base_name("length");
825  class_type.components().push_back(length_component);
826 
827  java_class_typet::componentt data_component(
828  "data", java_reference_type(java_type_from_char(l)));
829  data_component.set_pretty_name("data");
830  data_component.set_base_name("data");
831  class_type.components().push_back(data_component);
832 
833  if(l == 'a')
834  {
835  // This is a reference array (java::array[reference]). Add extra fields to
836  // carry the innermost element type and array dimension.
837  java_class_typet::componentt array_element_classid_component(
838  JAVA_ARRAY_ELEMENT_CLASSID_FIELD_NAME, string_typet());
839  array_element_classid_component.set_pretty_name(
840  JAVA_ARRAY_ELEMENT_CLASSID_FIELD_NAME);
841  array_element_classid_component.set_base_name(
842  JAVA_ARRAY_ELEMENT_CLASSID_FIELD_NAME);
843  class_type.components().push_back(array_element_classid_component);
844 
845  java_class_typet::componentt array_dimension_component(
846  JAVA_ARRAY_DIMENSION_FIELD_NAME, java_int_type());
847  array_dimension_component.set_pretty_name(
848  JAVA_ARRAY_DIMENSION_FIELD_NAME);
849  array_dimension_component.set_base_name(JAVA_ARRAY_DIMENSION_FIELD_NAME);
850  class_type.components().push_back(array_dimension_component);
851  }
852 
853  class_type.add_base(struct_tag_typet("java::java.lang.Object"));
854 
855  INVARIANT(
856  is_valid_java_array(class_type),
857  "Constructed a new type representing a Java Array "
858  "object that doesn't match expectations");
859 
860  type_symbolt symbol{struct_tag_type_identifier, class_type, ID_java};
861  symbol.base_name = struct_tag_type.get(ID_C_base_name);
862  symbol_table.add(symbol);
863 
864  // Also provide a clone method:
865  // ----------------------------
866 
867  const irep_idt clone_name =
868  id2string(struct_tag_type_identifier) + ".clone:()Ljava/lang/Object;";
869  java_method_typet::parametert this_param(
870  java_reference_type(struct_tag_type));
871  this_param.set_identifier(id2string(clone_name)+"::this");
872  this_param.set_base_name(ID_this);
873  this_param.set_this();
874  const java_method_typet clone_type({this_param}, java_lang_object_type());
875 
876  parameter_symbolt this_symbol;
877  this_symbol.name=this_param.get_identifier();
878  this_symbol.base_name=this_param.get_base_name();
879  this_symbol.pretty_name=this_symbol.base_name;
880  this_symbol.type=this_param.type();
881  this_symbol.mode=ID_java;
882  symbol_table.add(this_symbol);
883 
884  const irep_idt local_name=
885  id2string(clone_name)+"::cloned_array";
886  auxiliary_symbolt local_symbol;
887  local_symbol.name=local_name;
888  local_symbol.base_name="cloned_array";
889  local_symbol.pretty_name=local_symbol.base_name;
890  local_symbol.type = java_reference_type(struct_tag_type);
891  local_symbol.mode=ID_java;
892  symbol_table.add(local_symbol);
893  const auto local_symexpr = local_symbol.symbol_expr();
894 
895  code_declt declare_cloned(local_symexpr);
896 
897  source_locationt location;
898  location.set_function(local_name);
899  side_effect_exprt java_new_array(
900  ID_java_new_array, java_reference_type(struct_tag_type), location);
901  dereference_exprt old_array{this_symbol.symbol_expr()};
902  dereference_exprt new_array{local_symexpr};
903  member_exprt old_length(
904  old_array, length_component.get_name(), length_component.type());
905  java_new_array.copy_to_operands(old_length);
906  code_frontend_assignt create_blank(local_symexpr, java_new_array);
907 
908  codet copy_type_information = code_skipt();
909  if(l == 'a')
910  {
911  // Reference arrays carry additional type information in their classids
912  // which should be copied:
913  const auto &array_dimension_component =
914  class_type.get_component(JAVA_ARRAY_DIMENSION_FIELD_NAME);
915  const auto &array_element_classid_component =
916  class_type.get_component(JAVA_ARRAY_ELEMENT_CLASSID_FIELD_NAME);
917 
918  member_exprt old_array_dimension(old_array, array_dimension_component);
919  member_exprt old_array_element_classid(
920  old_array, array_element_classid_component);
921 
922  member_exprt new_array_dimension(new_array, array_dimension_component);
923  member_exprt new_array_element_classid(
924  new_array, array_element_classid_component);
925 
926  copy_type_information = code_blockt{
927  {code_frontend_assignt(new_array_dimension, old_array_dimension),
928  code_frontend_assignt(
929  new_array_element_classid, old_array_element_classid)}};
930  }
931 
932  member_exprt old_data(
933  old_array, data_component.get_name(), data_component.type());
934  member_exprt new_data(
935  new_array, data_component.get_name(), data_component.type());
936 
937  /*
938  // TODO use this instead of a loop.
939  codet array_copy;
940  array_copy.set_statement(ID_array_copy);
941  array_copy.add_to_operands(std::move(new_data), std::move(old_data));
942  clone_body.add_to_operands(std::move(array_copy));
943  */
944 
945  // Begin for-loop to replace:
946 
947  const irep_idt index_name=
948  id2string(clone_name)+"::index";
949  auxiliary_symbolt index_symbol;
950  index_symbol.name=index_name;
951  index_symbol.base_name="index";
952  index_symbol.pretty_name=index_symbol.base_name;
953  index_symbol.type = length_component.type();
954  index_symbol.mode=ID_java;
955  symbol_table.add(index_symbol);
956  const auto &index_symexpr=index_symbol.symbol_expr();
957 
958  code_declt declare_index(index_symexpr);
959 
960  dereference_exprt old_cell(
961  plus_exprt(old_data, index_symexpr),
962  to_type_with_subtype(old_data.type()).subtype());
963  dereference_exprt new_cell(
964  plus_exprt(new_data, index_symexpr),
965  to_type_with_subtype(new_data.type()).subtype());
966 
967  const code_fort copy_loop = code_fort::from_index_bounds(
968  from_integer(0, index_symexpr.type()),
969  old_length,
970  index_symexpr,
971  code_frontend_assignt(std::move(new_cell), std::move(old_cell)),
972  location);
973 
974  member_exprt new_base_class(
975  new_array, base_class_component.get_name(), base_class_component.type());
976  address_of_exprt retval(new_base_class);
977  code_returnt return_inst(retval);
978 
979  const code_blockt clone_body({declare_cloned,
980  create_blank,
981  copy_type_information,
982  declare_index,
983  copy_loop,
984  return_inst});
985 
986  symbolt clone_symbol{clone_name, clone_type, ID_java};
987  clone_symbol.pretty_name =
988  id2string(struct_tag_type_identifier) + ".clone:()";
989  clone_symbol.base_name="clone";
990  clone_symbol.value=clone_body;
991  symbol_table.add(clone_symbol);
992  }
993 }
994 
995 bool java_bytecode_convert_class(
996  const java_class_loadert::parse_tree_with_overlayst &parse_trees,
997  symbol_table_baset &symbol_table,
998  message_handlert &message_handler,
999  size_t max_array_length,
1000  method_bytecodet &method_bytecode,
1001  java_string_library_preprocesst &string_preprocess,
1002  const std::unordered_set<std::string> &no_load_classes)
1003 {
1004  java_bytecode_convert_classt java_bytecode_convert_class(
1005  symbol_table,
1006  message_handler,
1007  max_array_length,
1008  method_bytecode,
1009  string_preprocess,
1010  no_load_classes);
1011 
1012  try
1013  {
1014  java_bytecode_convert_class(parse_trees);
1015  return false;
1016  }
1017 
1018  catch(int)
1019  {
1020  }
1021 
1022  catch(const char *e)
1023  {
1024  messaget log{message_handler};
1025  log.error() << e << messaget::eom;
1026  }
1027 
1028  catch(const std::string &e)
1029  {
1030  messaget log{message_handler};
1031  log.error() << e << messaget::eom;
1032  }
1033 
1034  return true;
1035 }
1036 
1037 static std::string get_final_name_component(const std::string &name)
1038 {
1039  return name.substr(name.rfind("::") + 2);
1040 }
1041 
1042 static std::string get_without_final_name_component(const std::string &name)
1043 {
1044  return name.substr(0, name.rfind("::"));
1045 }
1046 
1059 static void find_and_replace_parameter(
1060  java_generic_parametert &parameter,
1061  const std::vector<java_generic_parametert> &replacement_parameters)
1062 {
1063  // get the name of the parameter, e.g., `T` from `java::Class::T`
1064  const std::string &parameter_full_name =
1065  id2string(parameter.type_variable_ref().get_identifier());
1066  const std::string parameter_name =
1067  get_final_name_component(parameter_full_name);
1068 
1069  // check if there is a replacement parameter with the same name
1070  const auto replacement_parameter_it = std::find_if(
1071  replacement_parameters.begin(),
1072  replacement_parameters.end(),
1073  [&parameter_name](const java_generic_parametert &replacement_param) {
1074  return parameter_name ==
1075  get_final_name_component(
1076  id2string(replacement_param.type_variable().get_identifier()));
1077  });
1078  if(replacement_parameter_it == replacement_parameters.end())
1079  return;
1080 
1081  // A replacement parameter was found, update the identifier
1082  const std::string &replacement_parameter_full_name =
1083  id2string(replacement_parameter_it->type_variable().get_identifier());
1084 
1085  // Check the replacement parameter comes from an outer class
1086  PRECONDITION(has_prefix(
1087  replacement_parameter_full_name,
1088  get_without_final_name_component(parameter_full_name)));
1089 
1090  parameter.type_variable_ref().set_identifier(replacement_parameter_full_name);
1091 }
1092 
1098 static void find_and_replace_parameters(
1099  typet &type,
1100  const std::vector<java_generic_parametert> &replacement_parameters)
1101 {
1102  if(is_java_generic_parameter(type))
1103  {
1104  find_and_replace_parameter(
1105  to_java_generic_parameter(type), replacement_parameters);
1106  }
1107  else if(is_java_generic_type(type))
1108  {
1109  java_generic_typet &generic_type = to_java_generic_type(type);
1110  std::vector<reference_typet> &arguments =
1111  generic_type.generic_type_arguments();
1112  for(auto &argument : arguments)
1113  {
1114  find_and_replace_parameters(argument, replacement_parameters);
1115  }
1116  }
1117  else if(is_java_generic_struct_tag_type(type))
1118  {
1119  java_generic_struct_tag_typet &generic_base =
1120  to_java_generic_struct_tag_type(type);
1121  std::vector<reference_typet> &gen_types = generic_base.generic_types();
1122  for(auto &gen_type : gen_types)
1123  {
1124  find_and_replace_parameters(gen_type, replacement_parameters);
1125  }
1126  }
1127 }
1128 
1132 void convert_annotations(
1133  const java_bytecode_parse_treet::annotationst &parsed_annotations,
1134  std::vector<java_annotationt> &java_annotations)
1135 {
1136  for(const auto &annotation : parsed_annotations)
1137  {
1138  java_annotations.emplace_back(annotation.type);
1139  std::vector<java_annotationt::valuet> &values =
1140  java_annotations.back().get_values();
1141  std::transform(
1142  annotation.element_value_pairs.begin(),
1143  annotation.element_value_pairs.end(),
1144  std::back_inserter(values),
1145  [](const decltype(annotation.element_value_pairs)::value_type &value) {
1146  return java_annotationt::valuet(value.element_name, value.value);
1147  });
1148  }
1149 }
1150 
1155 void convert_java_annotations(
1156  const std::vector<java_annotationt> &java_annotations,
1157  java_bytecode_parse_treet::annotationst &annotations)
1158 {
1159  for(const auto &java_annotation : java_annotations)
1160  {
1161  annotations.emplace_back(java_bytecode_parse_treet::annotationt());
1162  auto &annotation = annotations.back();
1163  annotation.type = java_annotation.get_type();
1164 
1165  std::transform(
1166  java_annotation.get_values().begin(),
1167  java_annotation.get_values().end(),
1168  std::back_inserter(annotation.element_value_pairs),
1169  [](const java_annotationt::valuet &value)
1170  -> java_bytecode_parse_treet::annotationt::element_value_pairt {
1171  return {value.get_name(), value.get_value()};
1172  });
1173  }
1174 }
1175 
1180 void mark_java_implicitly_generic_class_type(
1181  const irep_idt &class_name,
1182  symbol_table_baset &symbol_table)
1183 {
1184  const std::string qualified_class_name = "java::" + id2string(class_name);
1185  PRECONDITION(symbol_table.has_symbol(qualified_class_name));
1186  // This will have its type changed
1187  symbolt &class_symbol = symbol_table.get_writeable_ref(qualified_class_name);
1188  const java_class_typet &class_type = to_java_class_type(class_symbol.type);
1189 
1190  // the class must be an inner non-static class, i.e., have a field this$*
1191  // TODO this should be simplified once static inner classes are marked
1192  // during parsing
1193  bool no_this_field = std::none_of(
1194  class_type.components().begin(),
1195  class_type.components().end(),
1196  [](const struct_union_typet::componentt &component)
1197  {
1198  return id2string(component.get_name()).substr(0, 5) == "this$";
1199  });
1200  if(no_this_field)
1201  {
1202  return;
1203  }
1204 
1205  // create a vector of all generic type parameters of all outer classes, in
1206  // the order from the outer-most inwards
1207  std::vector<java_generic_parametert> implicit_generic_type_parameters;
1208  std::string::size_type outer_class_delimiter =
1209  qualified_class_name.rfind('$');
1210  while(outer_class_delimiter != std::string::npos)
1211  {
1212  std::string outer_class_name =
1213  qualified_class_name.substr(0, outer_class_delimiter);
1214  if(symbol_table.has_symbol(outer_class_name))
1215  {
1216  const symbolt &outer_class_symbol =
1217  symbol_table.lookup_ref(outer_class_name);
1218  const java_class_typet &outer_class_type =
1219  to_java_class_type(outer_class_symbol.type);
1220  if(is_java_generic_class_type(outer_class_type))
1221  {
1222  for(const java_generic_parametert &outer_generic_type_parameter :
1223  to_java_generic_class_type(outer_class_type).generic_types())
1224  {
1225  // Create a new generic type parameter with name in the form:
1226  // java::Outer$Inner::Outer::T
1227  irep_idt identifier = qualified_class_name + "::" +
1228  id2string(strip_java_namespace_prefix(
1229  outer_generic_type_parameter.get_name()));
1230  java_generic_parameter_tagt bound = to_java_generic_parameter_tag(
1231  outer_generic_type_parameter.base_type());
1232  bound.type_variable_ref().set_identifier(identifier);
1233  implicit_generic_type_parameters.emplace_back(identifier, bound);
1234  }
1235  }
1236  outer_class_delimiter = outer_class_name.rfind('$');
1237  }
1238  else
1239  {
1240  throw missing_outer_class_symbol_exceptiont(
1241  outer_class_name, qualified_class_name);
1242  }
1243  }
1244 
1245  // if there are any implicit generic type parameters, mark the class
1246  // implicitly generic and update identifiers of type parameters used in fields
1247  if(!implicit_generic_type_parameters.empty())
1248  {
1249  java_implicitly_generic_class_typet new_class_type(
1250  class_type, implicit_generic_type_parameters);
1251 
1252  // Prepend existing parameters so choose those above any inherited
1253  if(is_java_generic_class_type(class_type))
1254  {
1255  const java_generic_class_typet::generic_typest &class_type_params =
1256  to_java_generic_class_type(class_type).generic_types();
1257  implicit_generic_type_parameters.insert(
1258  implicit_generic_type_parameters.begin(),
1259  class_type_params.begin(),
1260  class_type_params.end());
1261  }
1262 
1263  for(auto &field : new_class_type.components())
1264  {
1265  find_and_replace_parameters(
1266  field.type(), implicit_generic_type_parameters);
1267  }
1268 
1269  for(auto &base : new_class_type.bases())
1270  {
1271  find_and_replace_parameters(
1272  base.type(), implicit_generic_type_parameters);
1273  }
1274 
1275  class_symbol.type = new_class_type;
1276  }
1277 }
Collect methods needed to be loaded using the lazy method.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:38
bool empty() const
Definition: dstring.h:89
void set(const irep_idt &name, const irep_idt &value)
Definition: irep.h:408
static bool is_overlay_method(const methodt &method)
Check if a method is an overlay method by searching for ID_overlay_method in its list of annotations.
void operator()(const java_class_loadert::parse_tree_with_overlayst &parse_trees)
Converts all the class parse trees into a class symbol and adds it to the symbol table.
java_bytecode_parse_treet::methodt methodt
java_bytecode_parse_treet::fieldt fieldt
std::unordered_set< std::string > no_load_classes
java_string_library_preprocesst & string_preprocess
java_bytecode_parse_treet::annotationt annotationt
java_bytecode_parse_treet::classt classt
void convert(const classt &c, const overlay_classest &overlay_classes)
Convert a class, adding symbols to the symbol table for its members.
std::list< std::reference_wrapper< const classt > > overlay_classest
bool check_field_exists(const fieldt &field, const irep_idt &qualified_fieldname, const struct_union_typet::componentst &fields) const
java_bytecode_convert_classt(symbol_table_baset &_symbol_table, message_handlert &_message_handler, size_t _max_array_length, method_bytecodet &method_bytecode, java_string_library_preprocesst &_string_preprocess, const std::unordered_set< std::string > &no_load_classes)
static bool is_ignored_method(const irep_idt &class_name, const methodt &method)
Check if a method is an ignored method, by one of two mechanisms:
std::list< java_bytecode_parse_treet > parse_tree_with_overlayst
A list of parse trees supporting overlay classes.
void set_is_annotation(bool is_annotation)
marks class an annotation
Definition: java_types.h:445
void set_inner_name(const irep_idt &name)
Set the name of a java inner class.
Definition: java_types.h:575
void set_final(bool is_final)
Definition: java_types.h:387
void set_is_anonymous_class(const bool &is_anonymous_class)
Definition: java_types.h:377
void set_abstract(bool abstract)
marks class abstract
Definition: java_types.h:421
void set_super_class(const irep_idt &super_class)
Definition: java_types.h:357
void set_synthetic(bool synthetic)
marks class synthetic
Definition: java_types.h:433
void set_is_inner_class(const bool &is_inner_class)
Definition: java_types.h:337
void set_is_static_class(const bool &is_static_class)
Definition: java_types.h:367
void set_is_enumeration(const bool is_enumeration)
marks class as an enumeration
Definition: java_types.h:409
void set_access(const irep_idt &access)
Definition: java_types.h:327
const componentst & components() const
Definition: java_types.h:223
void set_interface(bool interface)
marks class an interface
Definition: java_types.h:457
void set_outer_class(const irep_idt &outer_class)
Definition: java_types.h:347
Class to hold a class with generics, extends the java class type with a vector of java generic type p...
Definition: java_types.h:972
const generic_typest & generic_types() const
Definition: java_types.h:981
Class to hold type with generic type arguments, for example java.util.List in either a reference of t...
Definition: java_types.h:857
void add_string_type(const irep_idt &class_name, symbol_table_baset &symbol_table)
Add to the symbol table type declaration for a String-like Java class.
bool is_known_string_type(irep_idt class_name)
Check whether a class name is known as a string type.
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
mstreamt & warning() const
Definition: message.h:404
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
static eomt eom
Definition: message.h:297
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:493
void add_base(const struct_tag_typet &base)
Add a base class/struct.
Definition: std_types.cpp:98
const basest & bases() const
Get the collection of base classes/structs.
Definition: std_types.h:262
void set_tag(const irep_idt &tag)
Definition: std_types.h:169
std::vector< componentt > componentst
Definition: std_types.h:140
The symbol table base class interface.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
Symbol table entry.
Definition: symbol.h:28
The type of an expression, extends irept.
Definition: type.h:29
An exception that is raised for unsupported class signature.
Definition: java_types.h:1131
Expression Initialization.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:40
void add_java_array_types(symbol_table_baset &symbol_table)
Register in the symbol_table new symbols for the objects java::array[X] where X is byte,...
static std::optional< std::string > extract_generic_interface_reference(const std::optional< std::string > &signature, const std::string &interface_name)
Auxiliary function to extract the generic interface reference of an interface with the specified name...
static std::optional< std::string > extract_generic_superclass_reference(const std::optional< std::string > &signature)
Auxiliary function to extract the generic superclass reference from the class signature.
JAVA Bytecode Language Conversion.
JAVA Bytecode Language Conversion.
Produce code for simple implementation of String Java libraries.
size_t find_closing_semi_colon_for_reference_type(const std::string src, size_t starting_point)
Finds the closing semi-colon ending a ClassTypeSignature that starts at starting_point.
Definition: java_types.cpp:516
std::vector< typet > java_generic_type_from_string(const std::string &class_name, const std::string &src)
Converts the content of a generic class type into a vector of Java types, that is each type variable ...
Definition: java_types.cpp:749
const java_generic_parametert & to_java_generic_parameter(const typet &type)
Definition: java_types.h:826
const std::unordered_set< std::string > cprover_methods_to_ignore
Methods belonging to the class org.cprover.CProver that should be ignored (not converted),...
Definition: java_utils.cpp:529
void generate_class_stub(const irep_idt &class_name, symbol_table_baset &symbol_table, message_handlert &message_handler, const struct_union_typet::componentst &componentst)
Definition: java_utils.cpp:162
size_t find_closing_delimiter(const std::string &src, size_t open_pos, char open_char, char close_char)
Finds the corresponding closing delimiter to the given opening delimiter.
Definition: java_utils.cpp:302
#define PRECONDITION(CONDITION)
Definition: invariant.h:463
API to expression classes.
void replace(const union_find_replacet &replace_map, string_not_contains_constraintt &constraint)
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
std::optional< std::string > signature
bool has_annotation(const irep_idt &annotation_id) const
Author: Diffblue Ltd.