diff --git a/regression/cbmc-java/invalid_classpath/Test.class b/regression/cbmc-java/invalid_classpath/Test.class new file mode 100644 index 00000000000..aaa01c760f2 Binary files /dev/null and b/regression/cbmc-java/invalid_classpath/Test.class differ diff --git a/regression/cbmc-java/invalid_classpath/Test.java b/regression/cbmc-java/invalid_classpath/Test.java new file mode 100644 index 00000000000..e3957a5157d --- /dev/null +++ b/regression/cbmc-java/invalid_classpath/Test.java @@ -0,0 +1,3 @@ +public class Test { + public void main() {} +} diff --git a/regression/cbmc-java/invalid_classpath/test-jar.desc b/regression/cbmc-java/invalid_classpath/test-jar.desc new file mode 100644 index 00000000000..ad020054856 --- /dev/null +++ b/regression/cbmc-java/invalid_classpath/test-jar.desc @@ -0,0 +1,10 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--classpath ./NotHere.jar +^EXIT=6$ +^SIGNAL=0$ +^the program has no entry point$ +^failed to load class `Test'$ +-- +-- +symex-driven lazy loading should emit "the program has no entry point" but currently doesn't diff --git a/regression/cbmc-java/invalid_classpath/test-path.desc b/regression/cbmc-java/invalid_classpath/test-path.desc new file mode 100644 index 00000000000..282ae0209f8 --- /dev/null +++ b/regression/cbmc-java/invalid_classpath/test-path.desc @@ -0,0 +1,10 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--classpath ./NotHere +^EXIT=6$ +^SIGNAL=0$ +^the program has no entry point$ +^failed to load class `Test'$ +-- +-- +symex-driven lazy loading should emit "the program has no entry point" but currently doesn't diff --git a/regression/cbmc-java/overlay-class/Test.class b/regression/cbmc-java/overlay-class/Test.class new file mode 100644 index 00000000000..0e10ee8742c Binary files /dev/null and b/regression/cbmc-java/overlay-class/Test.class differ diff --git a/regression/cbmc-java/overlay-class/Test.java b/regression/cbmc-java/overlay-class/Test.java new file mode 100644 index 00000000000..acce06c7d4d --- /dev/null +++ b/regression/cbmc-java/overlay-class/Test.java @@ -0,0 +1,14 @@ +public class Test +{ + public int x; + + public static void main(String[] args) + { + assert(false); + } + + public static void notOverlain() + { + assert(true); + } +} diff --git a/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.class b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.class new file mode 100644 index 00000000000..49c5688dd41 Binary files /dev/null and b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.class differ diff --git a/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.java b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.java new file mode 100644 index 00000000000..99021b8f6cc --- /dev/null +++ b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.java @@ -0,0 +1,4 @@ +package com.diffblue; + +public @interface OverlayClassImplementation { +} diff --git a/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayMethodImplementation.class b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayMethodImplementation.class new file mode 100644 index 00000000000..cf6e1900736 Binary files /dev/null and b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayMethodImplementation.class differ diff --git a/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayMethodImplementation.java b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayMethodImplementation.java new file mode 100644 index 00000000000..d3d7211f594 --- /dev/null +++ b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayMethodImplementation.java @@ -0,0 +1,4 @@ +package com.diffblue; + +public @interface OverlayMethodImplementation { +} diff --git a/regression/cbmc-java/overlay-class/correct-overlay/Test.class b/regression/cbmc-java/overlay-class/correct-overlay/Test.class new file mode 100644 index 00000000000..c6caf42f66e Binary files /dev/null and b/regression/cbmc-java/overlay-class/correct-overlay/Test.class differ diff --git a/regression/cbmc-java/overlay-class/correct-overlay/Test.java b/regression/cbmc-java/overlay-class/correct-overlay/Test.java new file mode 100644 index 00000000000..7618d61f874 --- /dev/null +++ b/regression/cbmc-java/overlay-class/correct-overlay/Test.java @@ -0,0 +1,14 @@ +import com.diffblue.OverlayClassImplementation; +import com.diffblue.OverlayMethodImplementation; + +@OverlayClassImplementation +public class Test +{ + public int x; + + @OverlayMethodImplementation + public static void main(String[] args) + { + assert(true); + } +} diff --git a/regression/cbmc-java/overlay-class/correct-test.desc b/regression/cbmc-java/overlay-class/correct-test.desc new file mode 100644 index 00000000000..fea8647abf4 --- /dev/null +++ b/regression/cbmc-java/overlay-class/correct-test.desc @@ -0,0 +1,17 @@ +CORE +Test.class +--classpath `./format_classpath.sh . annotations correct-overlay` --verbosity 10 +^Getting class `Test' from file \.[\\/]Test\.class$ +^Getting class `Test' from file correct-overlay[\\/]Test\.class$ +^Adding symbol from overlay class: field 'x'$ +^Adding symbol from overlay class: method 'java::Test\.:\(\)V'$ +^Field definition for java::Test\.x already loaded from overlay class$ +^Adding symbol from overlay class: method 'java::Test\.main:\(\[Ljava/lang/String;\)V'$ +^Method java::Test\.:\(\)V exists in an overlay class without being marked as an overlay and also exists in the underlying class +^Adding symbol: method 'java::Test\.notOverlain:\(\)V'$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^Skipping class Test marked with OverlayClassImplementation but found before original definition$ +^Skipping duplicate definition of class Test not marked with OverlayClassImplementation$ diff --git a/regression/cbmc-java/overlay-class/duplicate-test.desc b/regression/cbmc-java/overlay-class/duplicate-test.desc new file mode 100644 index 00000000000..1ac968acb9c --- /dev/null +++ b/regression/cbmc-java/overlay-class/duplicate-test.desc @@ -0,0 +1,17 @@ +CORE +Test.class +--classpath `./format_classpath.sh . annotations . correct-overlay` --verbosity 10 +^Getting class `Test' from file \.[\\/]Test\.class$ +^Getting class `Test' from file correct-overlay[\\/]Test\.class$ +^Skipping duplicate definition of class Test not marked with OverlayClassImplementation$ +^Adding symbol from overlay class: field 'x'$ +^Adding symbol from overlay class: method 'java::Test\.:\(\)V'$ +^Field definition for java::Test\.x already loaded from overlay class$ +^Adding symbol from overlay class: method 'java::Test\.main:\(\[Ljava/lang/String;\)V'$ +^Method java::Test\.:\(\)V exists in an overlay class without being marked as an overlay and also exists in the underlying class +^Adding symbol: method 'java::Test\.notOverlain:\(\)V'$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^Skipping class Test marked with OverlayClassImplementation but found before original definition$ diff --git a/regression/cbmc-java/overlay-class/format_classpath.sh b/regression/cbmc-java/overlay-class/format_classpath.sh new file mode 100755 index 00000000000..b905ab54537 --- /dev/null +++ b/regression/cbmc-java/overlay-class/format_classpath.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +unameOut="$(uname -s)" +case "${unameOut}" in + CYGWIN*) separator=";";; + MINGW*) separator=";";; + MSYS*) separator=";";; + Windows*) separator=";";; + *) separator=":" +esac + +echo -n `IFS=$separator; echo "$*"` diff --git a/regression/cbmc-java/overlay-class/misordered-test.desc b/regression/cbmc-java/overlay-class/misordered-test.desc new file mode 100644 index 00000000000..acd447a376f --- /dev/null +++ b/regression/cbmc-java/overlay-class/misordered-test.desc @@ -0,0 +1,16 @@ +CORE +Test.class +--classpath `./format_classpath.sh annotations correct-overlay .` --verbosity 10 +^Getting class `Test' from file correct-overlay[\\/]Test\.class$ +^Getting class `Test' from file \.[\\/]Test\.class$ +^Skipping class Test marked with OverlayClassImplementation but found before original definition$ +^Adding symbol: field 'x'$ +^Adding symbol: method 'java::Test\.main:\(\[Ljava/lang/String;\)V'$ +^Adding symbol: method 'java::Test\.notOverlain:\(\)V'$ +^VERIFICATION FAILED$ +^EXIT=10$ +^SIGNAL=0$ +-- +^Skipping duplicate definition of class Test not marked with OverlayClassImplementation$ +^Adding symbol from overlay class +exists in an overlay class without being marked as an overlay and also exists in the underlying class diff --git a/regression/cbmc-java/overlay-class/unmarked-overlay/Test.class b/regression/cbmc-java/overlay-class/unmarked-overlay/Test.class new file mode 100644 index 00000000000..9153608f2ba Binary files /dev/null and b/regression/cbmc-java/overlay-class/unmarked-overlay/Test.class differ diff --git a/regression/cbmc-java/overlay-class/unmarked-overlay/Test.java b/regression/cbmc-java/overlay-class/unmarked-overlay/Test.java new file mode 100644 index 00000000000..5fa843f407d --- /dev/null +++ b/regression/cbmc-java/overlay-class/unmarked-overlay/Test.java @@ -0,0 +1,11 @@ +import com.diffblue.OverlayClassImplementation; +import com.diffblue.OverlayMethodImplementation; + +public class Test +{ + @OverlayMethodImplementation + public static void main(String[] args) + { + assert(false); + } +} diff --git a/regression/cbmc-java/overlay-class/unmarked-test.desc b/regression/cbmc-java/overlay-class/unmarked-test.desc new file mode 100644 index 00000000000..915f857fb87 --- /dev/null +++ b/regression/cbmc-java/overlay-class/unmarked-test.desc @@ -0,0 +1,16 @@ +CORE +Test.class +--classpath `./format_classpath.sh . annotations unmarked-overlay` --verbosity 10 +^Getting class `Test' from file \.[\\/]Test\.class$ +^Getting class `Test' from file unmarked-overlay[\\/]Test\.class$ +^Skipping duplicate definition of class Test not marked with OverlayClassImplementation$ +^Adding symbol: field 'x'$ +^Adding symbol: method 'java::Test\.main:\(\[Ljava/lang/String;\)V'$ +^Adding symbol: method 'java::Test\.notOverlain:\(\)V'$ +^VERIFICATION FAILED$ +^EXIT=10$ +^SIGNAL=0$ +-- +^Skipping class Test marked with OverlayClassImplementation but found before original definition$ +^Adding symbol from overlay class +exists in an overlay class without being marked as an overlay and also exists in the underlying class diff --git a/regression/cbmc-java/provide_object_implementation/java/lang/Object.class b/regression/cbmc-java/provide_object_implementation/java/lang/Object.class new file mode 100644 index 00000000000..868d3dbea58 Binary files /dev/null and b/regression/cbmc-java/provide_object_implementation/java/lang/Object.class differ diff --git a/regression/cbmc-java/provide_object_implementation/java/lang/Object.java b/regression/cbmc-java/provide_object_implementation/java/lang/Object.java new file mode 100644 index 00000000000..0ba3304a00c --- /dev/null +++ b/regression/cbmc-java/provide_object_implementation/java/lang/Object.java @@ -0,0 +1,33 @@ +package java.lang; + +public class Object { + public final Class getClass() { + return null; + } + + public int hashCode() { return 0; } + + public boolean equals(Object obj) { return (this == obj); } + + protected Object clone() throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } + + public String toString() { return "object"; } + + public final void notify() {} + + public final void notifyAll() {} + + public final void wait(long timeout) throws InterruptedException { + throw new InterruptedException(); + } + + public final void wait(long timeout, int nanos) throws InterruptedException { + wait(timeout); + } + + public final void wait() throws InterruptedException { wait(0); } + + protected void finalize() throws Throwable { } +} diff --git a/regression/cbmc-java/provide_object_implementation/test.desc b/regression/cbmc-java/provide_object_implementation/test.desc new file mode 100644 index 00000000000..381ae109d1e --- /dev/null +++ b/regression/cbmc-java/provide_object_implementation/test.desc @@ -0,0 +1,10 @@ +CORE symex-driven-lazy-loading-expected-failure +java/lang/Object.class + +^EXIT=6$ +^SIGNAL=0$ +^the program has no entry point$ +-- +^failed to add class symbol java::java\.lang\.Object$ +-- +symex-driven lazy loading should emit "the program has no entry point" but currently doesn't diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 32344f24b2a..30f5deacf10 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -85,15 +85,15 @@ bool ci_lazy_methodst::operator()( reachable_classes.push_back(main_class); else reachable_classes = main_jar_classes; - for(const auto &classname : reachable_classes) + for(const irep_idt &class_name : reachable_classes) { - const auto &methods= - java_class_loader.class_map.at(classname).parsed_class.methods; + const auto &methods = + java_class_loader.get_original_class(class_name).parsed_class.methods; for(const auto &method : methods) { - const irep_idt methodid="java::"+id2string(classname)+"."+ - id2string(method.name)+":"+ - id2string(method.descriptor); + const irep_idt methodid = + "java::" + id2string(class_name) + "." + id2string(method.name) + + ":" + id2string(method.descriptor); method_worklist2.push_back(methodid); } } diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 711bba8949e..33912bd85b7 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -48,23 +48,53 @@ class java_bytecode_convert_classt:public messaget { } - void operator()(const java_bytecode_parse_treet &parse_tree) + /// Converts all the class parse trees into a class symbol and adds it to the + /// symbol table. + /// \param parse_trees: The parse trees found for the class to be converted. + /// \remarks + /// Allows multiple definitions of the same class to appear on the + /// classpath, so long as all but the first definition are marked with the + /// attribute `\@java::com.diffblue.OverlayClassImplementation`. + /// Overlay class definitions can contain methods with the same signature + /// as methods in the original class, so long as these are marked with the + /// attribute `\@java::com.diffblue.OverlayMethodImplementation`; such + /// overlay methods are replaced in the original file with the version + /// found in the last overlay on the classpath. Later definitions can + /// also contain new supporting methods and fields that are merged in. + /// This will allow insertion of Java methods into library classes to + /// handle, for example, modelling dependency injection. + void operator()( + const java_class_loadert::parse_tree_with_overlayst &parse_trees) { - // add array types to the symbol table + PRECONDITION(!parse_trees.empty()); + const java_bytecode_parse_treet &parse_tree = parse_trees.front(); + + // Add array types to the symbol table add_array_types(symbol_table); const bool loading_success = parse_tree.loading_successful && !no_load_classes.count(id2string(parse_tree.parsed_class.name)); if(loading_success) - convert(parse_tree.parsed_class); + { + overlay_classest overlay_classes; + for(auto overlay_class_it = std::next(parse_trees.begin()); + overlay_class_it != parse_trees.end(); + ++overlay_class_it) + { + overlay_classes.push_front(std::cref(overlay_class_it->parsed_class)); + } + convert(parse_tree.parsed_class, overlay_classes); + } - if(string_preprocess.is_known_string_type(parse_tree.parsed_class.name)) - string_preprocess.add_string_type( - parse_tree.parsed_class.name, symbol_table); + // Add as string type if relevant + const irep_idt &class_name = parse_tree.parsed_class.name; + if(string_preprocess.is_known_string_type(class_name)) + string_preprocess.add_string_type(class_name, symbol_table); else if(!loading_success) + // Generate stub if couldn't load from bytecode and wasn't string type generate_class_stub( - parse_tree.parsed_class.name, + class_name, symbol_table, get_message_handler(), struct_union_typet::componentst{}); @@ -72,19 +102,29 @@ class java_bytecode_convert_classt:public messaget typedef java_bytecode_parse_treet::classt classt; typedef java_bytecode_parse_treet::fieldt fieldt; + typedef java_bytecode_parse_treet::methodt methodt; -protected: +private: symbol_tablet &symbol_table; const size_t max_array_length; method_bytecodet &method_bytecode; java_string_library_preprocesst &string_preprocess; // conversion - void convert(const classt &c); + typedef std::list> overlay_classest; + void convert(const classt &c, const overlay_classest &overlay_classes); void convert(symbolt &class_symbol, const fieldt &f); // see definition below for more info static void add_array_types(symbol_tablet &symbol_table); + + static bool is_overlay_method(const methodt &method); + + bool check_field_exists( + const fieldt &field, + const irep_idt &qualified_fieldname, + const struct_union_typet::componentst &fields) const; + std::unordered_set no_load_classes; }; @@ -176,7 +216,12 @@ static optionalt extract_generic_interface_reference( return {}; } -void java_bytecode_convert_classt::convert(const classt &c) +/// Convert a class, adding symbols to the symbol table for its members +/// \param c: Bytecode of the class to convert +/// \param overlay_classes: Bytecode of any overlays for the class to convert +void java_bytecode_convert_classt::convert( + const classt &c, + const overlay_classest &overlay_classes) { std::string qualified_classname="java::"+id2string(c.name); if(symbol_table.has_symbol(qualified_classname)) @@ -347,20 +392,115 @@ void java_bytecode_convert_classt::convert(const classt &c) throw 0; } - // now do fields + // Now do fields + const class_typet::componentst &fields = + to_class_type(class_symbol->type).components(); + // Include fields from overlay classes as they will be required by the + // methods defined there + for(auto overlay_class : overlay_classes) + { + for(const auto &field : overlay_class.get().fields) + { + std::string field_id = qualified_classname + "." + id2string(field.name); + if(check_field_exists(field, field_id, fields)) + { + std::string err = + "Duplicate field definition for " + field_id + " in overlay class"; + // TODO: This could just be a warning if the types match + error() << err << eom; + throw err.c_str(); + } + debug() + << "Adding symbol from overlay class: field '" << field.name << "'" + << eom; + convert(*class_symbol, field); + POSTCONDITION(check_field_exists(field, field_id, fields)); + } + } for(const auto &field : c.fields) { + std::string field_id = qualified_classname + "." + id2string(field.name); + if(check_field_exists(field, field_id, fields)) + { + // TODO: This could be a warning if the types match + error() + << "Field definition for " << field_id + << " already loaded from overlay class" << eom; + continue; + } debug() << "Adding symbol: field '" << field.name << "'" << eom; convert(*class_symbol, field); + POSTCONDITION(check_field_exists(field, field_id, fields)); } - // now do methods - for(const auto &method : c.methods) + // Now do methods + std::set overlay_methods; + for(auto overlay_class : overlay_classes) + { + for(const methodt &method : overlay_class.get().methods) + { + const irep_idt method_identifier = + qualified_classname + "." + id2string(method.name) + + ":" + method.descriptor; + if(method_bytecode.contains_method(method_identifier)) + { + // This method has already been discovered and added to method_bytecode + // while parsing an overlay class that appears later in the classpath + // (we're working backwards) + // Warn the user if the definition already found was not an overlay, + // otherwise simply don't load this earlier definition + if(overlay_methods.count(method_identifier) == 0) + { + // This method was defined in a previous class definition without + // being marked as an overlay method + warning() + << "Method " << method_identifier + << " exists in an overlay class without being marked as an " + "overlay and also exists in another overlay class that appears " + "earlier in the classpath" + << eom; + } + continue; + } + // Always run the lazy pre-stage, as it symbol-table + // registers the function. + debug() + << "Adding symbol from overlay class: method '" << method_identifier + << "'" << eom; + java_bytecode_convert_method_lazy( + *class_symbol, + method_identifier, + method, + symbol_table, + get_message_handler()); + method_bytecode.add(qualified_classname, method_identifier, method); + if(is_overlay_method(method)) + overlay_methods.insert(method_identifier); + } + } + for(const methodt &method : c.methods) { const irep_idt method_identifier= - id2string(qualified_classname)+ - "."+id2string(method.name)+ - ":"+method.descriptor; + qualified_classname + "." + id2string(method.name) + + ":" + method.descriptor; + if(method_bytecode.contains_method(method_identifier)) + { + // This method has already been discovered while parsing an overlay + // class + // If that definition is an overlay then we simply don't load this + // original definition and we remove it from the list of overlays + if(overlay_methods.erase(method_identifier) == 0) + { + // This method was defined in a previous class definition without + // being marked as an overlay method + warning() + << "Method " << method_identifier + << " exists in an overlay class without being marked as an overlay " + "and also exists in the underlying class" + << eom; + } + continue; + } // Always run the lazy pre-stage, as it symbol-table // registers the function. debug() << "Adding symbol: method '" << method_identifier << "'" << eom; @@ -371,6 +511,21 @@ void java_bytecode_convert_classt::convert(const classt &c) symbol_table, get_message_handler()); method_bytecode.add(qualified_classname, method_identifier, method); + if(is_overlay_method(method)) + { + warning() + << "Method " << method_identifier + << " marked as an overlay where defined in the underlying class" << eom; + } + } + if(!overlay_methods.empty()) + { + error() + << "Overlay methods defined in overlay classes did not exist in the " + "underlying class:\n"; + for(const irep_idt &method_id : overlay_methods) + error() << " " << method_id << "\n"; + error() << eom; } // is this a root class? @@ -378,6 +533,27 @@ void java_bytecode_convert_classt::convert(const classt &c) java_root_class(*class_symbol); } +bool java_bytecode_convert_classt::check_field_exists( + const java_bytecode_parse_treet::fieldt &field, + const irep_idt &qualified_fieldname, + const struct_union_typet::componentst &fields) const +{ + if(field.is_static) + return symbol_table.has_symbol(qualified_fieldname); + + auto existing_field = std::find_if( + fields.begin(), + fields.end(), + [&field](const struct_union_typet::componentt &f) + { + return f.get_name() == field.name; + }); + return existing_field != fields.end(); +} + +/// Convert a field, adding a symbol to teh symbol table for it +/// \param class_symbol: The already added symbol for the containing class +/// \param f: The bytecode for the field to convert void java_bytecode_convert_classt::convert( symbolt &class_symbol, const fieldt &f) @@ -666,8 +842,14 @@ void java_bytecode_convert_classt::add_array_types(symbol_tablet &symbol_table) } } +bool java_bytecode_convert_classt::is_overlay_method(const methodt &method) +{ + return java_bytecode_parse_treet::does_annotation_exist( + method.annotations, ID_overlay_method); +} + bool java_bytecode_convert_class( - const java_bytecode_parse_treet &parse_tree, + const java_class_loadert::parse_tree_with_overlayst &parse_trees, symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, @@ -685,7 +867,7 @@ bool java_bytecode_convert_class( try { - java_bytecode_convert_class(parse_tree); + java_bytecode_convert_class(parse_trees); return false; } diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index 33edc2b1e09..5cb021f946a 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -21,7 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com /// See class \ref java_bytecode_convert_classt bool java_bytecode_convert_class( - const java_bytecode_parse_treet &parse_tree, + const java_class_loadert::parse_tree_with_overlayst &parse_trees, symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 4f62931d7d8..0c26b23e2ab 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -43,7 +43,7 @@ Author: Daniel Kroening, kroening@kroening.com void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) { assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null"); - java_class_loader.use_core_models=!cmd.isset("no-core-models"); + java_class_loader.set_use_core_models(!cmd.isset("no-core-models")); string_refinement_enabled=cmd.isset("refine-strings"); throw_runtime_exceptions=cmd.isset("java-throw-runtime-exceptions"); if(cmd.isset("java-max-input-array-length")) @@ -194,7 +194,7 @@ bool java_bytecode_languaget::parse( { status() << "JAR file without entry point: loading class files" << eom; java_class_loader.load_entire_jar(class_loader_limit, path); - for(const auto &kv : java_class_loader.jar_map.at(path).entries) + for(const auto &kv : java_class_loader.get_jar_index(path)) main_jar_classes.push_back(kv.first); } else @@ -234,13 +234,13 @@ static void infer_opaque_type_fields( instruction.statement == "putfield") { const exprt &fieldref = instruction.args[0]; - const symbolt *class_symbol = - symbol_table.lookup(fieldref.get(ID_class)); + irep_idt class_symbol_id = fieldref.get(ID_class); + const symbolt *class_symbol = symbol_table.lookup(class_symbol_id); INVARIANT( - class_symbol != nullptr, "all field types should have been loaded"); + class_symbol != nullptr, + "all types containing fields should have been loaded"); const class_typet *class_type = &to_class_type(class_symbol->type); - irep_idt class_symbol_id = fieldref.get(ID_class); const irep_idt &component_name = fieldref.get(ID_component_name); while(!class_type->has_component(component_name)) { @@ -249,22 +249,24 @@ static void infer_opaque_type_fields( // Accessing a field of an incomplete (opaque) type. symbolt &writable_class_symbol = symbol_table.get_writeable_ref(class_symbol_id); - auto &add_to_components = + auto &components = to_struct_type(writable_class_symbol.type).components(); - add_to_components.push_back( - struct_typet::componentt(component_name, fieldref.type())); - add_to_components.back().set_base_name(component_name); - add_to_components.back().set_pretty_name(component_name); + components.emplace_back(component_name, fieldref.type()); + components.back().set_base_name(component_name); + components.back().set_pretty_name(component_name); break; } else { // Not present here: check the superclass. INVARIANT( - class_type->bases().size() != 0, - "class missing an expected field should have a superclass"); + !class_type->bases().empty(), + "class '" + id2string(class_symbol->name) + + "' (which was missing a field '" + id2string(component_name) + + "' referenced from method '" + id2string(method.name) + + "') should have an opaque superclass"); const symbol_typet &superclass_type = - to_symbol_type(class_type->bases()[0].type()); + to_symbol_type(class_type->bases().front().type()); class_symbol_id = superclass_type.get_identifier(); class_type = &to_class_type(ns.follow(superclass_type)); } @@ -577,9 +579,9 @@ bool java_bytecode_languaget::typecheck( // Must load java.lang.Object first to avoid stubbing // This ordering could alternatively be enforced by // moving the code below to the class loader. - java_class_loadert::class_mapt::const_iterator it= - java_class_loader.class_map.find("java.lang.Object"); - if(it!=java_class_loader.class_map.end()) + java_class_loadert::parse_tree_with_overridest_mapt::const_iterator it = + java_class_loader.get_class_with_overlays_map().find("java.lang.Object"); + if(it != java_class_loader.get_class_with_overlays_map().end()) { if( java_bytecode_convert_class( @@ -597,17 +599,14 @@ bool java_bytecode_languaget::typecheck( // first generate a new struct symbol for each class and a new function symbol // for every method - for(java_class_loadert::class_mapt::const_iterator - c_it=java_class_loader.class_map.begin(); - c_it!=java_class_loader.class_map.end(); - c_it++) + for(const auto &class_trees : java_class_loader.get_class_with_overlays_map()) { - if(c_it->second.parsed_class.name.empty()) + if(class_trees.second.front().parsed_class.name.empty()) continue; if( java_bytecode_convert_class( - c_it->second, + class_trees.second, symbol_table, get_message_handler(), max_user_array_length, @@ -625,14 +624,14 @@ bool java_bytecode_languaget::typecheck( // find and mark all implicitly generic class types // this can only be done once all the class symbols have been created - for(const auto &c : java_class_loader.class_map) + for(const auto &c : java_class_loader.get_class_with_overlays_map()) { - if(c.second.parsed_class.name.empty()) + if(c.second.front().parsed_class.name.empty()) continue; try { mark_java_implicitly_generic_class_type( - c.second.parsed_class.name, symbol_table); + c.second.front().parsed_class.name, symbol_table); } catch(missing_outer_class_symbol_exceptiont &) { @@ -646,9 +645,10 @@ bool java_bytecode_languaget::typecheck( // Infer fields on opaque types based on the method instructions just loaded. // For example, if we don't have bytecode for field x of class A, but we can // see an int-typed getfield instruction referring to it, add that field now. - for(const auto &c : java_class_loader.class_map) + for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map()) { - infer_opaque_type_fields(c.second, symbol_table); + for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second) + infer_opaque_type_fields(parse_tree, symbol_table); } // Create global variables for constants (String and Class literals) up front. @@ -656,12 +656,13 @@ bool java_bytecode_languaget::typecheck( // literal globals' existence when __CPROVER_initialize is generated in // `generate_support_functions`. const std::size_t before_constant_globals_size = symbol_table.symbols.size(); - for(auto &c : java_class_loader.class_map) + for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map()) { - generate_constant_global_variables( - c.second, - symbol_table, - string_refinement_enabled); + for(java_bytecode_parse_treet &parse_tree : class_to_trees.second) + { + generate_constant_global_variables( + parse_tree, symbol_table, string_refinement_enabled); + } } status() << "Java: added " << (symbol_table.symbols.size() - before_constant_globals_size) @@ -680,10 +681,13 @@ bool java_bytecode_languaget::typecheck( { journalling_symbol_tablet symbol_table_journal = journalling_symbol_tablet::wrap(symbol_table); - for(const auto &c : java_class_loader.class_map) + for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map()) { - create_stub_global_symbols( - c.second, symbol_table_journal, class_hierarchy, *this); + for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second) + { + create_stub_global_symbols( + parse_tree, symbol_table_journal, class_hierarchy, *this); + } } stub_global_initializer_factory.create_stub_global_initializer_symbols( @@ -996,7 +1000,20 @@ bool java_bytecode_languaget::final(symbol_table_baset &symbol_table) void java_bytecode_languaget::show_parse(std::ostream &out) { - java_class_loader(main_class).output(out); + java_class_loadert::parse_tree_with_overlayst &parse_trees = + java_class_loader(main_class); + parse_trees.front().output(out); + if(parse_trees.size() > 1) + { + out << "\n\nClass has the following overlays:\n\n"; + for(auto parse_tree_it = std::next(parse_trees.begin()); + parse_tree_it != parse_trees.end(); + ++parse_tree_it) + { + parse_tree_it->output(out); + } + out << "End of class overlays.\n"; + } } std::unique_ptr new_java_bytecode_language() diff --git a/src/java_bytecode/java_bytecode_parse_tree.cpp b/src/java_bytecode/java_bytecode_parse_tree.cpp index 6f6f0b92d41..5b73a88e911 100644 --- a/src/java_bytecode/java_bytecode_parse_tree.cpp +++ b/src/java_bytecode/java_bytecode_parse_tree.cpp @@ -8,6 +8,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_parse_tree.h" +#include #include #include @@ -119,6 +120,25 @@ void java_bytecode_parse_treet::annotationt::element_value_pairt::output( out << expr2java(value, ns); } +bool java_bytecode_parse_treet::does_annotation_exist( + const annotationst &annotations, + const irep_idt &annotation_type_name) +{ + return + std::find_if( + annotations.begin(), + annotations.end(), + [&annotation_type_name](const annotationt &annotation) + { + if(annotation.type.id() != ID_pointer) + return false; + typet type = annotation.type.subtype(); + return + type.id() == ID_symbol + && to_symbol_type(type).get_identifier() == annotation_type_name; + }) != annotations.end(); +} + void java_bytecode_parse_treet::methodt::output(std::ostream &out) const { symbol_tablet symbol_table; diff --git a/src/java_bytecode/java_bytecode_parse_tree.h b/src/java_bytecode/java_bytecode_parse_tree.h index f41108500d2..7e583af3673 100644 --- a/src/java_bytecode/java_bytecode_parse_tree.h +++ b/src/java_bytecode/java_bytecode_parse_tree.h @@ -44,6 +44,10 @@ class java_bytecode_parse_treet typedef std::vector annotationst; + static bool does_annotation_exist( + const annotationst &annotations, + const irep_idt &annotation_type_name); + class instructiont { public: diff --git a/src/java_bytecode/java_class_loader.cpp b/src/java_bytecode/java_class_loader.cpp index f4c2ffb5d88..6954e963af1 100644 --- a/src/java_bytecode/java_class_loader.cpp +++ b/src/java_bytecode/java_class_loader.cpp @@ -9,7 +9,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_class_loader.h" #include -#include #include #include @@ -17,7 +16,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_bytecode_parser.h" -#include "jar_file.h" #include "library/java_core_models.inc" @@ -28,11 +26,10 @@ Author: Daniel Kroening, kroening@kroening.com /// JAVA_CORE_MODELS_SIZE, another macro defined in java_core_models.inc. unsigned char java_core_models[] = { JAVA_CORE_MODELS_DATA }; -java_bytecode_parse_treet &java_class_loadert::operator()( +java_class_loadert::parse_tree_with_overlayst &java_class_loadert::operator()( const irep_idt &class_name) { std::stack queue; - // Always require java.lang.Object, as it is the base of // internal classes such as array types. queue.push("java.lang.Object"); @@ -57,214 +54,252 @@ java_bytecode_parse_treet &java_class_loadert::operator()( irep_idt c=queue.top(); queue.pop(); - // do we have the class already? - if(class_map.find(c)!=class_map.end()) - continue; // got it already + if(class_map.count(c) != 0) + continue; debug() << "Reading class " << c << eom; - java_bytecode_parse_treet &parse_tree= + parse_tree_with_overlayst &parse_trees = get_parse_tree(class_loader_limit, c); - // add any dependencies to queue - for(java_bytecode_parse_treet::class_refst::const_iterator - it=parse_tree.class_refs.begin(); - it!=parse_tree.class_refs.end(); - it++) - queue.push(*it); + // Add any dependencies to queue + for(const java_bytecode_parse_treet &parse_tree : parse_trees) + for(const irep_idt &class_ref : parse_tree.class_refs) + queue.push(class_ref); // Add any extra dependencies provided by our caller: if(get_extra_class_refs) { - std::vector extra_class_refs = - get_extra_class_refs(c); - - for(const irep_idt &id : extra_class_refs) + for(const irep_idt &id : get_extra_class_refs(c)) queue.push(id); } } - return class_map[class_name]; -} - -void java_class_loadert::set_java_cp_include_files( - std::string &_java_cp_include_files) -{ - java_cp_include_files=_java_cp_include_files; -} - -/// Sets a function that provides extra dependencies for a particular class. -/// Currently used by the string preprocessor to note that even if we don't -/// have a definition of core string types, it will nontheless give them -/// certain known superclasses and interfaces, such as Serializable. -void java_class_loadert::set_extra_class_refs_function( - java_class_loadert::get_extra_class_refs_functiont func) -{ - get_extra_class_refs = func; + return class_map.at(class_name); } -/// Retrieves a class file from a jar and loads it into the tree -bool java_class_loadert::get_class_file( - java_class_loader_limitt &class_loader_limit, +optionalt java_class_loadert::get_class_from_jar( const irep_idt &class_name, const std::string &jar_file, - java_bytecode_parse_treet &parse_tree) + const jar_indext &jar_index, + java_class_loader_limitt &class_loader_limit) { - const auto &jm=jar_map[jar_file]; - - auto jm_it=jm.entries.find(class_name); + auto jar_index_it = jar_index.find(class_name); + if(jar_index_it == jar_index.end()) + return {}; - if(jm_it!=jm.entries.end()) - { - debug() << "Getting class `" << class_name << "' from JAR " - << jar_file << eom; - - std::string data=jar_pool(class_loader_limit, jar_file) - .get_entry(jm_it->second.class_file_name); - - std::istringstream istream(data); + debug() + << "Getting class `" << class_name << "' from JAR " << jar_file << eom; - java_bytecode_parse( - istream, - parse_tree, - get_message_handler()); + std::string data = + jar_pool(class_loader_limit, jar_file).get_entry(jar_index_it->second); - return true; - } - return false; + java_bytecode_parse_treet parse_tree; + std::istringstream istream(data); + if(java_bytecode_parse(istream, parse_tree, get_message_handler())) + return {}; + return parse_tree; } -/// Retrieves a class file from the internal jar and loads it into the tree -bool java_class_loadert::get_internal_class_file( - java_class_loader_limitt &class_loader_limit, - const irep_idt &class_name, - java_bytecode_parse_treet &parse_tree) +static bool is_overlay_class(const java_bytecode_parse_treet::classt &c) { - // Add internal jar file. The name is used to load it once only and - // reference it later. - std::string core_models="core-models.jar"; - jar_pool(class_loader_limit, - core_models, - java_core_models, - JAVA_CORE_MODELS_SIZE); - - // This does not read from the jar file but from the jar_filet object - // as we've just created it - read_jar_file(class_loader_limit, core_models); - return - get_class_file(class_loader_limit, class_name, core_models, parse_tree); + return java_bytecode_parse_treet::does_annotation_exist( + c.annotations, ID_overlay_class); } -java_bytecode_parse_treet &java_class_loadert::get_parse_tree( +/// Check through all the places class parse trees can appear and returns the +/// first implementation it finds plus any overlay class implementations +/// \param class_loader_limit: Filter to decide whether to load classes +/// \param class_name: Name of class to load +/// \returns The list of valid implementations, including overlays +/// \remarks +/// Allows multiple definitions of the same class to appear on the +/// classpath, so long as all but the first definition are marked with the +/// attribute `\@java::com.diffblue.OverlayClassImplementation`. +java_class_loadert::parse_tree_with_overlayst & +java_class_loadert::get_parse_tree( java_class_loader_limitt &class_loader_limit, const irep_idt &class_name) { - java_bytecode_parse_treet &parse_tree=class_map[class_name]; + parse_tree_with_overlayst &parse_trees = class_map[class_name]; + PRECONDITION(parse_trees.empty()); - // First check given JAR files - for(const auto &jf : jar_files) + // First add all given JAR files + for(const auto &jar_file : jar_files) { - read_jar_file(class_loader_limit, jf); - if(get_class_file(class_loader_limit, class_name, jf, parse_tree)) - return parse_tree; + jar_index_optcreft index = read_jar_file(class_loader_limit, jar_file); + if(!index) + continue; + optionalt parse_tree = + get_class_from_jar(class_name, jar_file, *index, class_loader_limit); + if(parse_tree) + parse_trees.push_back(*parse_tree); } + // Then add core models if(use_core_models) { - if(get_internal_class_file(class_loader_limit, class_name, parse_tree)) - return parse_tree; + // Add internal jar file. The name is used to load it once only and + // reference it later. + std::string core_models = "core-models.jar"; + jar_pool( + class_loader_limit, core_models, java_core_models, JAVA_CORE_MODELS_SIZE); + + // This does not read from the jar file but from the jar_filet object we + // just created + jar_index_optcreft index = read_jar_file(class_loader_limit, core_models); + if(index) + { + optionalt parse_tree = + get_class_from_jar(class_name, core_models, *index, class_loader_limit); + if(parse_tree) + parse_trees.push_back(*parse_tree); + } } - // See if we can find it in the class path - for(const auto &cp : config.java.classpath) + // Then add everything on the class path + for(const auto &cp_entry : config.java.classpath) { - // in a JAR? - if(has_suffix(cp, ".jar")) + if(has_suffix(cp_entry, ".jar")) { - read_jar_file(class_loader_limit, cp); - if(get_class_file(class_loader_limit, class_name, cp, parse_tree)) - return parse_tree; + jar_index_optcreft index = read_jar_file(class_loader_limit, cp_entry); + if(!index) + continue; + optionalt parse_tree = + get_class_from_jar(class_name, cp_entry, *index, class_loader_limit); + if(parse_tree) + parse_trees.push_back(*parse_tree); } else { - // in a given directory? - std::string full_path= + // Look in the given directory + const std::string class_file = class_name_to_file(class_name); + const std::string full_path = #ifdef _WIN32 - cp+'\\'+class_name_to_file(class_name); + cp_entry + '\\' + class_file; #else - cp+'/'+class_name_to_file(class_name); + cp_entry + '/' + class_file; #endif - // full class path starts with './' - if(class_loader_limit.load_class_file(full_path.substr(2)) && - std::ifstream(full_path)) + if(!class_loader_limit.load_class_file(class_file)) + continue; + + if(std::ifstream(full_path)) { - if(!java_bytecode_parse( - full_path, - parse_tree, - get_message_handler())) - return parse_tree; + debug() + << "Getting class `" << class_name << "' from file " << full_path + << eom; + java_bytecode_parse_treet parse_tree; + if(!java_bytecode_parse(full_path, parse_tree, get_message_handler())) + parse_trees.push_back(std::move(parse_tree)); } } } - // not found + if(!parse_trees.empty()) + { + auto parse_tree_it = parse_trees.begin(); + // If the first class implementation is an overlay emit a warning and + // skip over it until we find a non-overlay class + while(parse_tree_it != parse_trees.end()) + { + // Skip parse trees that failed to load - though these shouldn't exist yet + if(parse_tree_it->loading_successful) + { + if(!is_overlay_class(parse_tree_it->parsed_class)) + { + // Keep the non-overlay class + ++parse_tree_it; + break; + } + warning() + << "Skipping class " << class_name + << " marked with OverlayClassImplementation but found before" + " original definition" + << eom; + } + auto unloaded_or_overlay_out_of_order_it = parse_tree_it; + ++parse_tree_it; + parse_trees.erase(unloaded_or_overlay_out_of_order_it); + } + // Collect overlay classes + while(parse_tree_it != parse_trees.end()) + { + // Remove non-initial classes that aren't overlays + if(!is_overlay_class(parse_tree_it->parsed_class)) + { + warning() + << "Skipping duplicate definition of class " << class_name + << " not marked with OverlayClassImplementation" << eom; + auto duplicate_non_overlay_it = parse_tree_it; + ++parse_tree_it; + parse_trees.erase(duplicate_non_overlay_it); + } + else + ++parse_tree_it; + } + return parse_trees; + } + + // Not found or failed to load warning() << "failed to load class `" << class_name << '\'' << eom; + java_bytecode_parse_treet parse_tree; parse_tree.parsed_class.name=class_name; - return parse_tree; + parse_trees.push_back(std::move(parse_tree)); + return parse_trees; } void java_class_loadert::load_entire_jar( java_class_loader_limitt &class_loader_limit, - const std::string &file) + const std::string &jar_path) { - read_jar_file(class_loader_limit, file); - - const auto &jm=jar_map[file]; + jar_index_optcreft jar_index = read_jar_file(class_loader_limit, jar_path); + if(!jar_index) + return; - jar_files.push_front(file); + jar_files.push_front(jar_path); - for(const auto &e : jm.entries) + for(const auto &e : jar_index->get()) operator()(e.first); jar_files.pop_front(); } -void java_class_loadert::read_jar_file( +java_class_loadert::jar_index_optcreft java_class_loadert::read_jar_file( java_class_loader_limitt &class_loader_limit, - const irep_idt &file) + const std::string &jar_path) { - // done already? - if(jar_map.find(file)!=jar_map.end()) - return; + auto existing_it = jars_by_path.find(jar_path); + if(existing_it != jars_by_path.end()) + return std::cref(existing_it->second); std::vector filenames; try { - filenames=this->jar_pool(class_loader_limit, id2string(file)).filenames(); + filenames = this->jar_pool(class_loader_limit, jar_path).filenames(); } catch(const std::runtime_error &) { - error() << "failed to open JAR file `" << file << "'" << eom; - return; + error() << "failed to open JAR file `" << jar_path << "'" << eom; + return jar_index_optcreft(); } - debug() << "adding JAR file `" << file << "'" << eom; + debug() << "Adding JAR file `" << jar_path << "'" << eom; - // create a new entry in the map and initialize using the list of file names + // Create a new entry in the map and initialize using the list of file names // that were retained in the jar_filet by the class_loader_limit filter - auto &jm=jar_map[file]; + jar_indext &jar_index = jars_by_path[jar_path]; for(auto &file_name : filenames) { - // does it end on .class? if(has_suffix(file_name, ".class")) { - debug() << "read class file " << file_name << " from " << file << eom; + debug() + << "Found class file " << file_name << " in JAR `" << jar_path << "'" + << eom; irep_idt class_name=file_to_class_name(file_name); - - // record - jm.entries[class_name].class_file_name=file_name; + jar_index[class_name] = file_name; } } + return std::cref(jar_index); } std::string java_class_loadert::file_to_class_name(const std::string &file) @@ -330,15 +365,6 @@ jar_filet &java_class_loadert::jar_pool( return it->second; } -/// Adds the list of classes to the load queue, forcing them to be loaded even -/// without explicit reference -/// \param classes: list of class identifiers -void java_class_loadert::add_load_classes(const std::vector &classes) -{ - for(const auto &id : classes) - java_load_classes.push_back(id); -} - jar_filet &java_class_loadert::jar_pool( java_class_loader_limitt &class_loader_limit, const std::string &buffer_name, diff --git a/src/java_bytecode/java_class_loader.h b/src/java_bytecode/java_class_loader.h index 58241e6536a..6f46072aaa2 100644 --- a/src/java_bytecode/java_class_loader.h +++ b/src/java_bytecode/java_class_loader.h @@ -15,6 +15,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include "java_bytecode_parse_tree.h" #include "java_class_loader_limit.h" @@ -23,49 +24,25 @@ Author: Daniel Kroening, kroening@kroening.com class java_class_loadert:public messaget { public: - // Default constructor does not use core models - // for backward compatibility of unit tests - java_class_loadert() : - use_core_models(true) - {} + /// A map associating logical class names with the name of the .class file + /// implementing it for all classes inside a single JAR file + typedef std::map jar_indext; - java_bytecode_parse_treet &operator()(const irep_idt &); - - void set_java_cp_include_files(std::string &); - void add_load_classes(const std::vector &); + typedef std::list parse_tree_with_overlayst; + typedef std::map + parse_tree_with_overridest_mapt; /// A function that yields a list of extra dependencies based on a class name. typedef std::function(const irep_idt &)> get_extra_class_refs_functiont; - void set_extra_class_refs_function(get_extra_class_refs_functiont func); - - static std::string file_to_class_name(const std::string &); - static std::string class_name_to_file(const irep_idt &); - - void add_jar_file(const std::string &f) + // Default constructor does not use core models + // for backward compatibility of unit tests + java_class_loadert() : use_core_models(true) { - jar_files.push_back(f); } - void load_entire_jar(java_class_loader_limitt &, const std::string &f); - - void read_jar_file(java_class_loader_limitt &, const irep_idt &); - - /// Attempts to load the class from the given jar. - /// Returns true if found and loaded - bool get_class_file( - java_class_loader_limitt &class_loader_limit, - const irep_idt &class_name, - const std::string &jar_file, - java_bytecode_parse_treet &parse_tree); - - /// Attempts to load the class from the internal core models. - /// Returns true if found and loaded - bool get_internal_class_file( - java_class_loader_limitt &class_loader_limit, - const irep_idt &class_name, - java_bytecode_parse_treet &parse_tree); + parse_tree_with_overlayst &operator()(const irep_idt &class_name); /// Given a \p class_name (e.g. "java.lang.Thread") try to load the /// corresponding .class file by first scanning all .jar files whose @@ -74,58 +51,80 @@ class java_class_loadert:public messaget /// \p limit to limit the class files that it might (directly or indirectly) /// load and returns a default-constructed parse tree when unable to find the /// .class file. - java_bytecode_parse_treet &get_parse_tree( - java_class_loader_limitt &limit, const irep_idt &class_name); + parse_tree_with_overlayst &get_parse_tree( + java_class_loader_limitt &class_loader_limit, + const irep_idt &class_name); + + void set_java_cp_include_files(std::string &java_cp_include_files) + { + this->java_cp_include_files = java_cp_include_files; + } + /// Sets a function that provides extra dependencies for a particular class. + /// Currently used by the string preprocessor to note that even if we don't + /// have a definition of core string types, it will nontheless give them + /// certain known superclasses and interfaces, such as Serializable. + void set_extra_class_refs_function(get_extra_class_refs_functiont func) + { + get_extra_class_refs = func; + } + /// Adds the list of classes to the load queue, forcing them to be loaded even + /// without explicit reference + /// \param classes: list of class identifiers + void add_load_classes(const std::vector &classes) + { + for(const auto &id : classes) + java_load_classes.push_back(id); + } + void add_jar_file(const std::string &f) + { + jar_files.push_back(f); + } + void set_use_core_models(bool use_core_models) + { + this->use_core_models = use_core_models; + } + + static std::string file_to_class_name(const std::string &); + static std::string class_name_to_file(const irep_idt &); - /// Maps class names to the bytecode parse trees. - typedef std::map class_mapt; - class_mapt class_map; + void load_entire_jar(java_class_loader_limitt &, const std::string &jar_path); + + const jar_indext &get_jar_index(const std::string &jar_path) + { + return jars_by_path.at(jar_path); + } + /// Map from class names to the bytecode parse trees + fixed_keys_map_wrappert + get_class_with_overlays_map() + { + return fixed_keys_map_wrappert(class_map); + } + const java_bytecode_parse_treet &get_original_class( + const irep_idt &class_name) + { + return class_map.at(class_name).front(); + } - /// Load jar archive(from cache if already loaded) + /// Load jar archive or retrieve from cache if already loaded /// \param limit /// \param filename name of the file - jar_filet &jar_pool(java_class_loader_limitt &limit, - const std::string &filename); + jar_filet &jar_pool( + java_class_loader_limitt &limit, const std::string &filename); - /// Load jar archive(from cache if already loaded) + /// Load jar archive or retrieve from cache if already loaded /// \param limit /// \param buffer_name name of the original file /// \param pmem memory pointer to the contents of the file /// \param size size of the memory buffer /// Note that this mocks the existence of a file which may /// or may not exist since the actual data bis retrieved from memory. - jar_filet &jar_pool(java_class_loader_limitt &limit, - const std::string &buffer_name, - const void *pmem, - size_t size); - - /// An object of this class represents the information of _a single JAR file_ - /// that is relevant for a class loader: a map associating logical class names - /// with with handles (struct entryt) that describe one .class file inside the - /// JAR. Currently the handle only contains the name of the .class file. - class jar_map_entryt - { - public: - struct entryt - { - std::string class_file_name; - }; - - /// Maps class names to jar file entries. - typedef std::map entriest; - entriest entries; - }; - - /// Maps .jar filesystem paths to .class file descriptors (see - /// jar_map_entryt). - typedef std::map jar_mapt; - jar_mapt jar_map; - - /// List of filesystem paths to .jar files that will be used, in the given - /// order, to find and load a class, provided its name (see \ref - /// get_parse_tree). - std::list jar_files; + jar_filet &jar_pool( + java_class_loader_limitt &limit, + const std::string &buffer_name, + const void *pmem, + size_t size); +private: /// Either a regular expression matching files that will be allowed to be /// loaded or a string of the form `@PATH` where PATH is the file path of a /// json file storing an explicit list of files allowed to be loaded. See @@ -133,13 +132,36 @@ class java_class_loadert:public messaget /// information. std::string java_cp_include_files; + /// List of filesystem paths to .jar files that will be used, in the given + /// order, to find and load a class, provided its name (see \ref + /// get_parse_tree). + std::list jar_files; + /// Indicates that the core models should be loaded bool use_core_models; -private: - std::map m_archives; + /// Classes to be explicitly loaded std::vector java_load_classes; get_extra_class_refs_functiont get_extra_class_refs; + + /// The jar_indext for each jar file we've read + std::map jars_by_path; + + /// Jar files that have been loaded + std::map m_archives; + /// Map from class names to the bytecode parse trees + parse_tree_with_overridest_mapt class_map; + + typedef optionalt> + jar_index_optcreft; + jar_index_optcreft read_jar_file( + java_class_loader_limitt &class_loader_limit, + const std::string &jar_path); + optionalt get_class_from_jar( + const irep_idt &class_name, + const std::string &jar_file, + const jar_indext &jar_index, + java_class_loader_limitt &class_loader_limit); }; #endif // CPROVER_JAVA_BYTECODE_JAVA_CLASS_LOADER_H diff --git a/src/java_bytecode/java_class_loader_limit.cpp b/src/java_bytecode/java_class_loader_limit.cpp index 4c98e23c671..c04e9caa4c2 100644 --- a/src/java_bytecode/java_class_loader_limit.cpp +++ b/src/java_bytecode/java_class_loader_limit.cpp @@ -22,8 +22,8 @@ void java_class_loader_limitt::setup_class_load_limit( throw "class regexp cannot be empty, `get_language_options` not called?"; // '@' signals file reading with list of class files to load - regex_match=java_cp_include_files[0]!='@'; - if(regex_match) + use_regex_match = java_cp_include_files[0] != '@'; + if(use_regex_match) regex_matcher=std::regex(java_cp_include_files); else { @@ -49,16 +49,14 @@ void java_class_loader_limitt::setup_class_load_limit( /// \par parameters: class file name /// \return true if file should be loaded, else false -bool java_class_loader_limitt::load_class_file(const irep_idt &file_name) +bool java_class_loader_limitt::load_class_file(const std::string &file_name) { - if(regex_match) + if(use_regex_match) { - return std::regex_match( - id2string(file_name), - string_matcher, - regex_matcher); + std::smatch string_matches; + return std::regex_match(file_name, string_matches, regex_matcher); } - // load .class file only if it is in the match set else - return set_matcher.find(id2string(file_name))!=set_matcher.end(); + // load .class file only if it is in the match set + return set_matcher.find(file_name) != set_matcher.end(); } diff --git a/src/java_bytecode/java_class_loader_limit.h b/src/java_bytecode/java_class_loader_limit.h index a9a9fb33579..93c663bcb21 100644 --- a/src/java_bytecode/java_class_loader_limit.h +++ b/src/java_bytecode/java_class_loader_limit.h @@ -20,24 +20,23 @@ Author: Daniel Kroening, kroening@kroening.com class java_class_loader_limitt:public messaget { + /// Whether to use regex_matcher instead of set_matcher + bool use_regex_match; std::regex regex_matcher; std::set set_matcher; - bool regex_match; - std::smatch string_matcher; void setup_class_load_limit(const std::string &); public: explicit java_class_loader_limitt( - message_handlert &_message_handler, - const std::string &java_cp_include_files): - messaget(_message_handler), - regex_match(false) + message_handlert &message_handler, + const std::string &java_cp_include_files) + : messaget(message_handler) { setup_class_load_limit(java_cp_include_files); } - bool load_class_file(const irep_idt &class_file_name); + bool load_class_file(const std::string &class_file_name); }; #endif diff --git a/src/util/fixed_keys_map_wrapper.h b/src/util/fixed_keys_map_wrapper.h new file mode 100644 index 00000000000..644a7b5727c --- /dev/null +++ b/src/util/fixed_keys_map_wrapper.h @@ -0,0 +1,117 @@ +// Copyright 2018 DiffBlue Limited. All Rights Reserved. + +/// \file +/// A wrapper for maps that gives read-write access to elements but without +/// allowing addition or removal of elements + +#ifndef CPROVER_UTIL_FIXED_KEYS_MAP_WRAPPER_H +#define CPROVER_UTIL_FIXED_KEYS_MAP_WRAPPER_H + +template +class fixed_keys_map_wrappert +{ +private: + mapt ↦ + +public: + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::iterator iterator; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::const_iterator const_iterator; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::reverse_iterator reverse_iterator; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::const_reverse_iterator const_reverse_iterator; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::key_type key_type; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::mapped_type mapped_type; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::size_type size_type; + + explicit fixed_keys_map_wrappert(mapt &map) : map(map) + { + } + + iterator begin() + { + return map.begin(); + } + const_iterator begin() const + { + return map.begin(); + } + iterator end() + { + return map.end(); + } + const_iterator end() const + { + return map.end(); + } + reverse_iterator rbegin() + { + return map.rbegin(); + } + const_reverse_iterator rbegin() const + { + return map.rbegin(); + } + reverse_iterator rend() + { + return map.rend(); + } + const_reverse_iterator rend() const + { + return map.rend(); + } + const_iterator cbegin() const + { + return map.begin(); + } + const_iterator cend() const + { + return map.end(); + } + const_reverse_iterator crbegin() const + { + return map.rbegin(); + } + const_reverse_iterator crend() const + { + return map.rend(); + } + + bool empty() const + { + return map.empty(); + } + size_type size() const + { + return map.size(); + } + size_type count(const key_type &key) const + { + return map.count(key); + } + + const mapped_type &at(const key_type &key) const + { + return map.at(key); + } + mapped_type &at(const key_type &key) + { + return map.at(key); + } + + iterator find(const key_type &key) + { + return map.find(key); + } + const_iterator find(const key_type &key) const + { + return map.find(key); + } +}; + +#endif // CPROVER_UTIL_FIXED_KEYS_MAP_WRAPPER_H diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index 643377bf4fc..b95433a35ab 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -841,6 +841,8 @@ IREP_ID_TWO(java_lambda_method_handles, lambda_method_handles) IREP_ID_ONE(havoc_object) IREP_ID_TWO(overflow_shl, overflow-shl) IREP_ID_TWO(C_no_initialization_required, #no_initialization_required) +IREP_ID_TWO(overlay_class, java::com.diffblue.OverlayClassImplementation) +IREP_ID_TWO(overlay_method, java::com.diffblue.OverlayMethodImplementation) #undef IREP_ID_ONE #undef IREP_ID_TWO