From 7c0ea4de502f7145872907fd61f97ec9284191f7 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Wed, 7 Mar 2018 11:55:40 +0000 Subject: [PATCH 1/3] Tidied up java_class_loader_limitt --- src/java_bytecode/java_class_loader_limit.cpp | 18 ++++++++---------- src/java_bytecode/java_class_loader_limit.h | 13 ++++++------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/java_bytecode/java_class_loader_limit.cpp b/src/java_bytecode/java_class_loader_limit.cpp index 4c98e23c671..c04e9caa4c2 100644 --- a/src/java_bytecode/java_class_loader_limit.cpp +++ b/src/java_bytecode/java_class_loader_limit.cpp @@ -22,8 +22,8 @@ void java_class_loader_limitt::setup_class_load_limit( throw "class regexp cannot be empty, `get_language_options` not called?"; // '@' signals file reading with list of class files to load - regex_match=java_cp_include_files[0]!='@'; - if(regex_match) + use_regex_match = java_cp_include_files[0] != '@'; + if(use_regex_match) regex_matcher=std::regex(java_cp_include_files); else { @@ -49,16 +49,14 @@ void java_class_loader_limitt::setup_class_load_limit( /// \par parameters: class file name /// \return true if file should be loaded, else false -bool java_class_loader_limitt::load_class_file(const irep_idt &file_name) +bool java_class_loader_limitt::load_class_file(const std::string &file_name) { - if(regex_match) + if(use_regex_match) { - return std::regex_match( - id2string(file_name), - string_matcher, - regex_matcher); + std::smatch string_matches; + return std::regex_match(file_name, string_matches, regex_matcher); } - // load .class file only if it is in the match set else - return set_matcher.find(id2string(file_name))!=set_matcher.end(); + // load .class file only if it is in the match set + return set_matcher.find(file_name) != set_matcher.end(); } diff --git a/src/java_bytecode/java_class_loader_limit.h b/src/java_bytecode/java_class_loader_limit.h index a9a9fb33579..93c663bcb21 100644 --- a/src/java_bytecode/java_class_loader_limit.h +++ b/src/java_bytecode/java_class_loader_limit.h @@ -20,24 +20,23 @@ Author: Daniel Kroening, kroening@kroening.com class java_class_loader_limitt:public messaget { + /// Whether to use regex_matcher instead of set_matcher + bool use_regex_match; std::regex regex_matcher; std::set set_matcher; - bool regex_match; - std::smatch string_matcher; void setup_class_load_limit(const std::string &); public: explicit java_class_loader_limitt( - message_handlert &_message_handler, - const std::string &java_cp_include_files): - messaget(_message_handler), - regex_match(false) + message_handlert &message_handler, + const std::string &java_cp_include_files) + : messaget(message_handler) { setup_class_load_limit(java_cp_include_files); } - bool load_class_file(const irep_idt &class_file_name); + bool load_class_file(const std::string &class_file_name); }; #endif From dbc2f715f90fd8fc1feccf83e2bfb52961b106ab Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Thu, 5 Apr 2018 18:09:27 +0100 Subject: [PATCH 2/3] Improve code and error message in infer_opaque_type_fields --- src/java_bytecode/java_bytecode_language.cpp | 26 +++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 4f62931d7d8..1ffdae375ff 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -234,13 +234,13 @@ static void infer_opaque_type_fields( instruction.statement == "putfield") { const exprt &fieldref = instruction.args[0]; - const symbolt *class_symbol = - symbol_table.lookup(fieldref.get(ID_class)); + irep_idt class_symbol_id = fieldref.get(ID_class); + const symbolt *class_symbol = symbol_table.lookup(class_symbol_id); INVARIANT( - class_symbol != nullptr, "all field types should have been loaded"); + class_symbol != nullptr, + "all types containing fields should have been loaded"); const class_typet *class_type = &to_class_type(class_symbol->type); - irep_idt class_symbol_id = fieldref.get(ID_class); const irep_idt &component_name = fieldref.get(ID_component_name); while(!class_type->has_component(component_name)) { @@ -249,22 +249,24 @@ static void infer_opaque_type_fields( // Accessing a field of an incomplete (opaque) type. symbolt &writable_class_symbol = symbol_table.get_writeable_ref(class_symbol_id); - auto &add_to_components = + auto &components = to_struct_type(writable_class_symbol.type).components(); - add_to_components.push_back( - struct_typet::componentt(component_name, fieldref.type())); - add_to_components.back().set_base_name(component_name); - add_to_components.back().set_pretty_name(component_name); + components.emplace_back(component_name, fieldref.type()); + components.back().set_base_name(component_name); + components.back().set_pretty_name(component_name); break; } else { // Not present here: check the superclass. INVARIANT( - class_type->bases().size() != 0, - "class missing an expected field should have a superclass"); + !class_type->bases().empty(), + "class '" + id2string(class_symbol->name) + + "' (which was missing a field '" + id2string(component_name) + + "' referenced from method '" + id2string(method.name) + + "') should have an opaque superclass"); const symbol_typet &superclass_type = - to_symbol_type(class_type->bases()[0].type()); + to_symbol_type(class_type->bases().front().type()); class_symbol_id = superclass_type.get_identifier(); class_type = &to_class_type(ns.follow(superclass_type)); } From 3d4183a733e5a096c06c0525d9dcdbcf6626b7f0 Mon Sep 17 00:00:00 2001 From: Nathan Phillips Date: Fri, 6 Apr 2018 19:52:56 +0100 Subject: [PATCH 3/3] Add ability to overlay classes with new definitions of existing methods Allows multiple definitions of the same class to appear on the classpath, so long as all but the first definition are marked with the attribute @java::com.diffblue.security.OverlayClassImplementation. Overlay class definitions can contain methods with the same signature as methods in the original class, so long as these are marked with the attribute @java::com.diffblue.security.OverlayMethodImplementation; such overlay methods are replaced in the original file with the version found in the last overlay on the classpath. Later definitions can also contain new supporting methods and fields that are merged in. This will allow insertion of Java methods into library classes to handle, for example, modelling dependency injection. Includes regression tests Improvements over earlier version: Allows loading of classes such as java.lang.Object from an explicit model Prevent attempting to load any class more than once Invalid values in the classpath are ignored Handles duplicate files appearing in multiple items on the classpath Filter loaded class files to remove duplicates or misordered overlays before following links to other classes so as not to add dependencies of duplicate classes --- .../cbmc-java/invalid_classpath/Test.class | Bin 0 -> 228 bytes .../cbmc-java/invalid_classpath/Test.java | 3 + .../cbmc-java/invalid_classpath/test-jar.desc | 10 + .../invalid_classpath/test-path.desc | 10 + regression/cbmc-java/overlay-class/Test.class | Bin 0 -> 583 bytes regression/cbmc-java/overlay-class/Test.java | 14 + .../diffblue/OverlayClassImplementation.class | Bin 0 -> 185 bytes .../diffblue/OverlayClassImplementation.java | 4 + .../OverlayMethodImplementation.class | Bin 0 -> 187 bytes .../diffblue/OverlayMethodImplementation.java | 4 + .../overlay-class/correct-overlay/Test.class | Bin 0 -> 412 bytes .../overlay-class/correct-overlay/Test.java | 14 + .../cbmc-java/overlay-class/correct-test.desc | 17 + .../overlay-class/duplicate-test.desc | 17 + .../overlay-class/format_classpath.sh | 12 + .../overlay-class/misordered-test.desc | 16 + .../overlay-class/unmarked-overlay/Test.class | Bin 0 -> 601 bytes .../overlay-class/unmarked-overlay/Test.java | 11 + .../overlay-class/unmarked-test.desc | 16 + .../java/lang/Object.class | Bin 0 -> 1108 bytes .../java/lang/Object.java | 33 ++ .../provide_object_implementation/test.desc | 10 + src/java_bytecode/ci_lazy_methods.cpp | 12 +- .../java_bytecode_convert_class.cpp | 218 +++++++++++-- .../java_bytecode_convert_class.h | 2 +- src/java_bytecode/java_bytecode_language.cpp | 65 ++-- .../java_bytecode_parse_tree.cpp | 20 ++ src/java_bytecode/java_bytecode_parse_tree.h | 4 + src/java_bytecode/java_class_loader.cpp | 300 ++++++++++-------- src/java_bytecode/java_class_loader.h | 174 +++++----- src/util/fixed_keys_map_wrapper.h | 117 +++++++ src/util/irep_ids.def | 2 + 32 files changed, 842 insertions(+), 263 deletions(-) create mode 100644 regression/cbmc-java/invalid_classpath/Test.class create mode 100644 regression/cbmc-java/invalid_classpath/Test.java create mode 100644 regression/cbmc-java/invalid_classpath/test-jar.desc create mode 100644 regression/cbmc-java/invalid_classpath/test-path.desc create mode 100644 regression/cbmc-java/overlay-class/Test.class create mode 100644 regression/cbmc-java/overlay-class/Test.java create mode 100644 regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.class create mode 100644 regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.java create mode 100644 regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayMethodImplementation.class create mode 100644 regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayMethodImplementation.java create mode 100644 regression/cbmc-java/overlay-class/correct-overlay/Test.class create mode 100644 regression/cbmc-java/overlay-class/correct-overlay/Test.java create mode 100644 regression/cbmc-java/overlay-class/correct-test.desc create mode 100644 regression/cbmc-java/overlay-class/duplicate-test.desc create mode 100755 regression/cbmc-java/overlay-class/format_classpath.sh create mode 100644 regression/cbmc-java/overlay-class/misordered-test.desc create mode 100644 regression/cbmc-java/overlay-class/unmarked-overlay/Test.class create mode 100644 regression/cbmc-java/overlay-class/unmarked-overlay/Test.java create mode 100644 regression/cbmc-java/overlay-class/unmarked-test.desc create mode 100644 regression/cbmc-java/provide_object_implementation/java/lang/Object.class create mode 100644 regression/cbmc-java/provide_object_implementation/java/lang/Object.java create mode 100644 regression/cbmc-java/provide_object_implementation/test.desc create mode 100644 src/util/fixed_keys_map_wrapper.h diff --git a/regression/cbmc-java/invalid_classpath/Test.class b/regression/cbmc-java/invalid_classpath/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..aaa01c760f28cd1b395b9b0710afe3558ea2fa39 GIT binary patch literal 228 zcmZ9Fs}90I5Jm6wNlSTH1Of*LYLEm&f+8Sj@Y}K>+w_r?mj6PLAou`2iZEM&V8qPM zxhJ#l&+`RfiM|gLT^E6i9>H0uLe*=6Ih~yd)}}m5!eFNgxo`4VR*{G^CRQ#~LGX`d zQzde%H1i^Ptrw}di2xRCmNXy?H3~CPT*5<~%B02w27ju{0Bf+hJG_l~5>I>n0NwdP YaJf4z)KTZBj@p!se@$QXI5gmY0WqT?_W%F@ literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/invalid_classpath/Test.java b/regression/cbmc-java/invalid_classpath/Test.java new file mode 100644 index 00000000000..e3957a5157d --- /dev/null +++ b/regression/cbmc-java/invalid_classpath/Test.java @@ -0,0 +1,3 @@ +public class Test { + public void main() {} +} diff --git a/regression/cbmc-java/invalid_classpath/test-jar.desc b/regression/cbmc-java/invalid_classpath/test-jar.desc new file mode 100644 index 00000000000..ad020054856 --- /dev/null +++ b/regression/cbmc-java/invalid_classpath/test-jar.desc @@ -0,0 +1,10 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--classpath ./NotHere.jar +^EXIT=6$ +^SIGNAL=0$ +^the program has no entry point$ +^failed to load class `Test'$ +-- +-- +symex-driven lazy loading should emit "the program has no entry point" but currently doesn't diff --git a/regression/cbmc-java/invalid_classpath/test-path.desc b/regression/cbmc-java/invalid_classpath/test-path.desc new file mode 100644 index 00000000000..282ae0209f8 --- /dev/null +++ b/regression/cbmc-java/invalid_classpath/test-path.desc @@ -0,0 +1,10 @@ +CORE symex-driven-lazy-loading-expected-failure +Test.class +--classpath ./NotHere +^EXIT=6$ +^SIGNAL=0$ +^the program has no entry point$ +^failed to load class `Test'$ +-- +-- +symex-driven lazy loading should emit "the program has no entry point" but currently doesn't diff --git a/regression/cbmc-java/overlay-class/Test.class b/regression/cbmc-java/overlay-class/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..0e10ee8742c250b399d8a78e8203ca55d466bfbc GIT binary patch literal 583 zcmZWlO-sW-5Ph4E#-?elU$ymH@Sq;_(wibmRS>E@R0Q>uCQDh3iEPs1Z}AU!R>6XT z-u+SHY^cq_Kz3$l-kbMkzkl980o1W-!@_JHCaM-{HgwEdn6ojD1q+J|?1BChhN(3- zj=4-k6vl@lb~^$0sdL4U+ZCZm_89c_jSB{&5&4{9yeUF{*1PL)d9J|JxN}9wP+7lj z-ne~tJ8;A9b~}+G?CvOHv7NZy?Wub|#23OSY4y1bQrg(A7YudmcGQy|KNf0g{+!3j zmKx~5LD7MY0z)-ZIQX-DBxNMYh9X#4a*+9H`~})03Lnrf2Qa=ANlQsKMUZNV6elUu!Adk?Y9v;pqWb;`6^SuDg4I)TU}DmI dL+%ll@dQJEg_cSE`BO3`G-qFJF+&#(m0$3dWOV=l literal 0 HcmV?d00001 diff --git a/regression/cbmc-java/overlay-class/Test.java b/regression/cbmc-java/overlay-class/Test.java new file mode 100644 index 00000000000..acce06c7d4d --- /dev/null +++ b/regression/cbmc-java/overlay-class/Test.java @@ -0,0 +1,14 @@ +public class Test +{ + public int x; + + public static void main(String[] args) + { + assert(false); + } + + public static void notOverlain() + { + assert(true); + } +} diff --git a/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.class b/regression/cbmc-java/overlay-class/annotations/com/diffblue/OverlayClassImplementation.class new file mode 100644 index 0000000000000000000000000000000000000000..49c5688dd416ab1b0291bbb8050d86c74080395b GIT binary patch literal 185 zcmX^0Z`VEs1_l!bPId-1b_RBK1`b9BuHgLAqU2P!%$!t426_Lo)S{fkO6Q!!;$qL- zf}GUc)Vz|!lFa-(y{yEtL`DYnv lfW{+)67%x%AqML^!kKD}42%p+K+FtuCIgUW0g_A%tN1mu4SeQ zhEWh@sTa%TGE7S0%_@;6{P|kwhb;Ef$3&z;eUm2)?mR2=NLEL@97;}ixR1E3qgM4K{$GSX1oqUxmS0PWE$*h@onC|U#) f2nQ~:\(\)V'$ +^Field definition for java::Test\.x already loaded from overlay class$ +^Adding symbol from overlay class: method 'java::Test\.main:\(\[Ljava/lang/String;\)V'$ +^Method java::Test\.:\(\)V exists in an overlay class without being marked as an overlay and also exists in the underlying class +^Adding symbol: method 'java::Test\.notOverlain:\(\)V'$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^Skipping class Test marked with OverlayClassImplementation but found before original definition$ +^Skipping duplicate definition of class Test not marked with OverlayClassImplementation$ diff --git a/regression/cbmc-java/overlay-class/duplicate-test.desc b/regression/cbmc-java/overlay-class/duplicate-test.desc new file mode 100644 index 00000000000..1ac968acb9c --- /dev/null +++ b/regression/cbmc-java/overlay-class/duplicate-test.desc @@ -0,0 +1,17 @@ +CORE +Test.class +--classpath `./format_classpath.sh . annotations . correct-overlay` --verbosity 10 +^Getting class `Test' from file \.[\\/]Test\.class$ +^Getting class `Test' from file correct-overlay[\\/]Test\.class$ +^Skipping duplicate definition of class Test not marked with OverlayClassImplementation$ +^Adding symbol from overlay class: field 'x'$ +^Adding symbol from overlay class: method 'java::Test\.:\(\)V'$ +^Field definition for java::Test\.x already loaded from overlay class$ +^Adding symbol from overlay class: method 'java::Test\.main:\(\[Ljava/lang/String;\)V'$ +^Method java::Test\.:\(\)V exists in an overlay class without being marked as an overlay and also exists in the underlying class +^Adding symbol: method 'java::Test\.notOverlain:\(\)V'$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^Skipping class Test marked with OverlayClassImplementation but found before original definition$ diff --git a/regression/cbmc-java/overlay-class/format_classpath.sh b/regression/cbmc-java/overlay-class/format_classpath.sh new file mode 100755 index 00000000000..b905ab54537 --- /dev/null +++ b/regression/cbmc-java/overlay-class/format_classpath.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +unameOut="$(uname -s)" +case "${unameOut}" in + CYGWIN*) separator=";";; + MINGW*) separator=";";; + MSYS*) separator=";";; + Windows*) separator=";";; + *) separator=":" +esac + +echo -n `IFS=$separator; echo "$*"` diff --git a/regression/cbmc-java/overlay-class/misordered-test.desc b/regression/cbmc-java/overlay-class/misordered-test.desc new file mode 100644 index 00000000000..acd447a376f --- /dev/null +++ b/regression/cbmc-java/overlay-class/misordered-test.desc @@ -0,0 +1,16 @@ +CORE +Test.class +--classpath `./format_classpath.sh annotations correct-overlay .` --verbosity 10 +^Getting class `Test' from file correct-overlay[\\/]Test\.class$ +^Getting class `Test' from file \.[\\/]Test\.class$ +^Skipping class Test marked with OverlayClassImplementation but found before original definition$ +^Adding symbol: field 'x'$ +^Adding symbol: method 'java::Test\.main:\(\[Ljava/lang/String;\)V'$ +^Adding symbol: method 'java::Test\.notOverlain:\(\)V'$ +^VERIFICATION FAILED$ +^EXIT=10$ +^SIGNAL=0$ +-- +^Skipping duplicate definition of class Test not marked with OverlayClassImplementation$ +^Adding symbol from overlay class +exists in an overlay class without being marked as an overlay and also exists in the underlying class diff --git a/regression/cbmc-java/overlay-class/unmarked-overlay/Test.class b/regression/cbmc-java/overlay-class/unmarked-overlay/Test.class new file mode 100644 index 0000000000000000000000000000000000000000..9153608f2ba35c9af611c59b337289f2d43da02f GIT binary patch literal 601 zcmYLG!A{#i5PfSqabgS!2~E>dpp`gK4$UPu5GtZnLJCD92&r(|*c(`CuhDuPslUPx za0a9zwNiWUM^((aKrTBoyYIbuGyC`7&))$0cpJb+r;Z9<`sfC5(ed#rz-uh}c*D?M z<#{f&k(tUr$()Z85i_t8hU%tN(rhufYwwR4yqzo-42^+Q;!BZ^gg&xhdPuoc44t*p z!3Cf2Uc%K`Z)mhsXB(C{J2X7{w$Cpg@r#FrGBOo=Y9ez<65C2;hL;W))(26R_F_36 zkCH<44kkh;{QJHz=UKd$UM3OXjcp|%+%k0yg*R_I84mex}JNaW^&brT}MOo$rl z4Be@{?WdV{wazrnUIqu zJKDHUKfsz@+nq8O%$lJyq-u#GqgYwL2fMLA%+gscFFhj6p#i6a_9Rkhq9K}$AYP1+=v_C;R;=yp-irEz^u`Ol z(L@uy_eU8|Kio%+VN3gVPtSSId7iUhzrXzea38HaZkZ6+G;lkQJ0>)2n#kjB9u0xq zuH!j!Pe5%nPXx4m|I`+kJ9IqzD16zqgJY}fvSRdYx$jz|k-$Qud3bKUwzgf%>u<-! z9f3l}>3fz81A>?SMeMfr2r>rNXpl%yuT6RTRcN_vy4aZN`Lug(_vB9Vxj?xit=@|# z)-d&<_gvqjM0G|YB?y?0-t_FDbbOCJ4e57e;CTHkJgL%8&hw>n_LlCGhlj4q9b8zB z}V2b#Z!RfdX)iKBS^W-FL$9VN)25%5i!9t4LB2yuv z*t+)~dF=zVn4n-{#6*<<9ZKN=6Tl*tQcrcBktgFj#IvtZerA-)SY|~dCi-6EsEN1L z_&1 getClass() { + return null; + } + + public int hashCode() { return 0; } + + public boolean equals(Object obj) { return (this == obj); } + + protected Object clone() throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } + + public String toString() { return "object"; } + + public final void notify() {} + + public final void notifyAll() {} + + public final void wait(long timeout) throws InterruptedException { + throw new InterruptedException(); + } + + public final void wait(long timeout, int nanos) throws InterruptedException { + wait(timeout); + } + + public final void wait() throws InterruptedException { wait(0); } + + protected void finalize() throws Throwable { } +} diff --git a/regression/cbmc-java/provide_object_implementation/test.desc b/regression/cbmc-java/provide_object_implementation/test.desc new file mode 100644 index 00000000000..381ae109d1e --- /dev/null +++ b/regression/cbmc-java/provide_object_implementation/test.desc @@ -0,0 +1,10 @@ +CORE symex-driven-lazy-loading-expected-failure +java/lang/Object.class + +^EXIT=6$ +^SIGNAL=0$ +^the program has no entry point$ +-- +^failed to add class symbol java::java\.lang\.Object$ +-- +symex-driven lazy loading should emit "the program has no entry point" but currently doesn't diff --git a/src/java_bytecode/ci_lazy_methods.cpp b/src/java_bytecode/ci_lazy_methods.cpp index 32344f24b2a..30f5deacf10 100644 --- a/src/java_bytecode/ci_lazy_methods.cpp +++ b/src/java_bytecode/ci_lazy_methods.cpp @@ -85,15 +85,15 @@ bool ci_lazy_methodst::operator()( reachable_classes.push_back(main_class); else reachable_classes = main_jar_classes; - for(const auto &classname : reachable_classes) + for(const irep_idt &class_name : reachable_classes) { - const auto &methods= - java_class_loader.class_map.at(classname).parsed_class.methods; + const auto &methods = + java_class_loader.get_original_class(class_name).parsed_class.methods; for(const auto &method : methods) { - const irep_idt methodid="java::"+id2string(classname)+"."+ - id2string(method.name)+":"+ - id2string(method.descriptor); + const irep_idt methodid = + "java::" + id2string(class_name) + "." + id2string(method.name) + + ":" + id2string(method.descriptor); method_worklist2.push_back(methodid); } } diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index 711bba8949e..33912bd85b7 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -48,23 +48,53 @@ class java_bytecode_convert_classt:public messaget { } - void operator()(const java_bytecode_parse_treet &parse_tree) + /// Converts all the class parse trees into a class symbol and adds it to the + /// symbol table. + /// \param parse_trees: The parse trees found for the class to be converted. + /// \remarks + /// Allows multiple definitions of the same class to appear on the + /// classpath, so long as all but the first definition are marked with the + /// attribute `\@java::com.diffblue.OverlayClassImplementation`. + /// Overlay class definitions can contain methods with the same signature + /// as methods in the original class, so long as these are marked with the + /// attribute `\@java::com.diffblue.OverlayMethodImplementation`; such + /// overlay methods are replaced in the original file with the version + /// found in the last overlay on the classpath. Later definitions can + /// also contain new supporting methods and fields that are merged in. + /// This will allow insertion of Java methods into library classes to + /// handle, for example, modelling dependency injection. + void operator()( + const java_class_loadert::parse_tree_with_overlayst &parse_trees) { - // add array types to the symbol table + PRECONDITION(!parse_trees.empty()); + const java_bytecode_parse_treet &parse_tree = parse_trees.front(); + + // Add array types to the symbol table add_array_types(symbol_table); const bool loading_success = parse_tree.loading_successful && !no_load_classes.count(id2string(parse_tree.parsed_class.name)); if(loading_success) - convert(parse_tree.parsed_class); + { + overlay_classest overlay_classes; + for(auto overlay_class_it = std::next(parse_trees.begin()); + overlay_class_it != parse_trees.end(); + ++overlay_class_it) + { + overlay_classes.push_front(std::cref(overlay_class_it->parsed_class)); + } + convert(parse_tree.parsed_class, overlay_classes); + } - if(string_preprocess.is_known_string_type(parse_tree.parsed_class.name)) - string_preprocess.add_string_type( - parse_tree.parsed_class.name, symbol_table); + // Add as string type if relevant + const irep_idt &class_name = parse_tree.parsed_class.name; + if(string_preprocess.is_known_string_type(class_name)) + string_preprocess.add_string_type(class_name, symbol_table); else if(!loading_success) + // Generate stub if couldn't load from bytecode and wasn't string type generate_class_stub( - parse_tree.parsed_class.name, + class_name, symbol_table, get_message_handler(), struct_union_typet::componentst{}); @@ -72,19 +102,29 @@ class java_bytecode_convert_classt:public messaget typedef java_bytecode_parse_treet::classt classt; typedef java_bytecode_parse_treet::fieldt fieldt; + typedef java_bytecode_parse_treet::methodt methodt; -protected: +private: symbol_tablet &symbol_table; const size_t max_array_length; method_bytecodet &method_bytecode; java_string_library_preprocesst &string_preprocess; // conversion - void convert(const classt &c); + typedef std::list> overlay_classest; + void convert(const classt &c, const overlay_classest &overlay_classes); void convert(symbolt &class_symbol, const fieldt &f); // see definition below for more info static void add_array_types(symbol_tablet &symbol_table); + + static bool is_overlay_method(const methodt &method); + + bool check_field_exists( + const fieldt &field, + const irep_idt &qualified_fieldname, + const struct_union_typet::componentst &fields) const; + std::unordered_set no_load_classes; }; @@ -176,7 +216,12 @@ static optionalt extract_generic_interface_reference( return {}; } -void java_bytecode_convert_classt::convert(const classt &c) +/// Convert a class, adding symbols to the symbol table for its members +/// \param c: Bytecode of the class to convert +/// \param overlay_classes: Bytecode of any overlays for the class to convert +void java_bytecode_convert_classt::convert( + const classt &c, + const overlay_classest &overlay_classes) { std::string qualified_classname="java::"+id2string(c.name); if(symbol_table.has_symbol(qualified_classname)) @@ -347,20 +392,115 @@ void java_bytecode_convert_classt::convert(const classt &c) throw 0; } - // now do fields + // Now do fields + const class_typet::componentst &fields = + to_class_type(class_symbol->type).components(); + // Include fields from overlay classes as they will be required by the + // methods defined there + for(auto overlay_class : overlay_classes) + { + for(const auto &field : overlay_class.get().fields) + { + std::string field_id = qualified_classname + "." + id2string(field.name); + if(check_field_exists(field, field_id, fields)) + { + std::string err = + "Duplicate field definition for " + field_id + " in overlay class"; + // TODO: This could just be a warning if the types match + error() << err << eom; + throw err.c_str(); + } + debug() + << "Adding symbol from overlay class: field '" << field.name << "'" + << eom; + convert(*class_symbol, field); + POSTCONDITION(check_field_exists(field, field_id, fields)); + } + } for(const auto &field : c.fields) { + std::string field_id = qualified_classname + "." + id2string(field.name); + if(check_field_exists(field, field_id, fields)) + { + // TODO: This could be a warning if the types match + error() + << "Field definition for " << field_id + << " already loaded from overlay class" << eom; + continue; + } debug() << "Adding symbol: field '" << field.name << "'" << eom; convert(*class_symbol, field); + POSTCONDITION(check_field_exists(field, field_id, fields)); } - // now do methods - for(const auto &method : c.methods) + // Now do methods + std::set overlay_methods; + for(auto overlay_class : overlay_classes) + { + for(const methodt &method : overlay_class.get().methods) + { + const irep_idt method_identifier = + qualified_classname + "." + id2string(method.name) + + ":" + method.descriptor; + if(method_bytecode.contains_method(method_identifier)) + { + // This method has already been discovered and added to method_bytecode + // while parsing an overlay class that appears later in the classpath + // (we're working backwards) + // Warn the user if the definition already found was not an overlay, + // otherwise simply don't load this earlier definition + if(overlay_methods.count(method_identifier) == 0) + { + // This method was defined in a previous class definition without + // being marked as an overlay method + warning() + << "Method " << method_identifier + << " exists in an overlay class without being marked as an " + "overlay and also exists in another overlay class that appears " + "earlier in the classpath" + << eom; + } + continue; + } + // Always run the lazy pre-stage, as it symbol-table + // registers the function. + debug() + << "Adding symbol from overlay class: method '" << method_identifier + << "'" << eom; + java_bytecode_convert_method_lazy( + *class_symbol, + method_identifier, + method, + symbol_table, + get_message_handler()); + method_bytecode.add(qualified_classname, method_identifier, method); + if(is_overlay_method(method)) + overlay_methods.insert(method_identifier); + } + } + for(const methodt &method : c.methods) { const irep_idt method_identifier= - id2string(qualified_classname)+ - "."+id2string(method.name)+ - ":"+method.descriptor; + qualified_classname + "." + id2string(method.name) + + ":" + method.descriptor; + if(method_bytecode.contains_method(method_identifier)) + { + // This method has already been discovered while parsing an overlay + // class + // If that definition is an overlay then we simply don't load this + // original definition and we remove it from the list of overlays + if(overlay_methods.erase(method_identifier) == 0) + { + // This method was defined in a previous class definition without + // being marked as an overlay method + warning() + << "Method " << method_identifier + << " exists in an overlay class without being marked as an overlay " + "and also exists in the underlying class" + << eom; + } + continue; + } // Always run the lazy pre-stage, as it symbol-table // registers the function. debug() << "Adding symbol: method '" << method_identifier << "'" << eom; @@ -371,6 +511,21 @@ void java_bytecode_convert_classt::convert(const classt &c) symbol_table, get_message_handler()); method_bytecode.add(qualified_classname, method_identifier, method); + if(is_overlay_method(method)) + { + warning() + << "Method " << method_identifier + << " marked as an overlay where defined in the underlying class" << eom; + } + } + if(!overlay_methods.empty()) + { + error() + << "Overlay methods defined in overlay classes did not exist in the " + "underlying class:\n"; + for(const irep_idt &method_id : overlay_methods) + error() << " " << method_id << "\n"; + error() << eom; } // is this a root class? @@ -378,6 +533,27 @@ void java_bytecode_convert_classt::convert(const classt &c) java_root_class(*class_symbol); } +bool java_bytecode_convert_classt::check_field_exists( + const java_bytecode_parse_treet::fieldt &field, + const irep_idt &qualified_fieldname, + const struct_union_typet::componentst &fields) const +{ + if(field.is_static) + return symbol_table.has_symbol(qualified_fieldname); + + auto existing_field = std::find_if( + fields.begin(), + fields.end(), + [&field](const struct_union_typet::componentt &f) + { + return f.get_name() == field.name; + }); + return existing_field != fields.end(); +} + +/// Convert a field, adding a symbol to teh symbol table for it +/// \param class_symbol: The already added symbol for the containing class +/// \param f: The bytecode for the field to convert void java_bytecode_convert_classt::convert( symbolt &class_symbol, const fieldt &f) @@ -666,8 +842,14 @@ void java_bytecode_convert_classt::add_array_types(symbol_tablet &symbol_table) } } +bool java_bytecode_convert_classt::is_overlay_method(const methodt &method) +{ + return java_bytecode_parse_treet::does_annotation_exist( + method.annotations, ID_overlay_method); +} + bool java_bytecode_convert_class( - const java_bytecode_parse_treet &parse_tree, + const java_class_loadert::parse_tree_with_overlayst &parse_trees, symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, @@ -685,7 +867,7 @@ bool java_bytecode_convert_class( try { - java_bytecode_convert_class(parse_tree); + java_bytecode_convert_class(parse_trees); return false; } diff --git a/src/java_bytecode/java_bytecode_convert_class.h b/src/java_bytecode/java_bytecode_convert_class.h index 33edc2b1e09..5cb021f946a 100644 --- a/src/java_bytecode/java_bytecode_convert_class.h +++ b/src/java_bytecode/java_bytecode_convert_class.h @@ -21,7 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com /// See class \ref java_bytecode_convert_classt bool java_bytecode_convert_class( - const java_bytecode_parse_treet &parse_tree, + const java_class_loadert::parse_tree_with_overlayst &parse_trees, symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 1ffdae375ff..0c26b23e2ab 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -43,7 +43,7 @@ Author: Daniel Kroening, kroening@kroening.com void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) { assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null"); - java_class_loader.use_core_models=!cmd.isset("no-core-models"); + java_class_loader.set_use_core_models(!cmd.isset("no-core-models")); string_refinement_enabled=cmd.isset("refine-strings"); throw_runtime_exceptions=cmd.isset("java-throw-runtime-exceptions"); if(cmd.isset("java-max-input-array-length")) @@ -194,7 +194,7 @@ bool java_bytecode_languaget::parse( { status() << "JAR file without entry point: loading class files" << eom; java_class_loader.load_entire_jar(class_loader_limit, path); - for(const auto &kv : java_class_loader.jar_map.at(path).entries) + for(const auto &kv : java_class_loader.get_jar_index(path)) main_jar_classes.push_back(kv.first); } else @@ -579,9 +579,9 @@ bool java_bytecode_languaget::typecheck( // Must load java.lang.Object first to avoid stubbing // This ordering could alternatively be enforced by // moving the code below to the class loader. - java_class_loadert::class_mapt::const_iterator it= - java_class_loader.class_map.find("java.lang.Object"); - if(it!=java_class_loader.class_map.end()) + java_class_loadert::parse_tree_with_overridest_mapt::const_iterator it = + java_class_loader.get_class_with_overlays_map().find("java.lang.Object"); + if(it != java_class_loader.get_class_with_overlays_map().end()) { if( java_bytecode_convert_class( @@ -599,17 +599,14 @@ bool java_bytecode_languaget::typecheck( // first generate a new struct symbol for each class and a new function symbol // for every method - for(java_class_loadert::class_mapt::const_iterator - c_it=java_class_loader.class_map.begin(); - c_it!=java_class_loader.class_map.end(); - c_it++) + for(const auto &class_trees : java_class_loader.get_class_with_overlays_map()) { - if(c_it->second.parsed_class.name.empty()) + if(class_trees.second.front().parsed_class.name.empty()) continue; if( java_bytecode_convert_class( - c_it->second, + class_trees.second, symbol_table, get_message_handler(), max_user_array_length, @@ -627,14 +624,14 @@ bool java_bytecode_languaget::typecheck( // find and mark all implicitly generic class types // this can only be done once all the class symbols have been created - for(const auto &c : java_class_loader.class_map) + for(const auto &c : java_class_loader.get_class_with_overlays_map()) { - if(c.second.parsed_class.name.empty()) + if(c.second.front().parsed_class.name.empty()) continue; try { mark_java_implicitly_generic_class_type( - c.second.parsed_class.name, symbol_table); + c.second.front().parsed_class.name, symbol_table); } catch(missing_outer_class_symbol_exceptiont &) { @@ -648,9 +645,10 @@ bool java_bytecode_languaget::typecheck( // Infer fields on opaque types based on the method instructions just loaded. // For example, if we don't have bytecode for field x of class A, but we can // see an int-typed getfield instruction referring to it, add that field now. - for(const auto &c : java_class_loader.class_map) + for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map()) { - infer_opaque_type_fields(c.second, symbol_table); + for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second) + infer_opaque_type_fields(parse_tree, symbol_table); } // Create global variables for constants (String and Class literals) up front. @@ -658,12 +656,13 @@ bool java_bytecode_languaget::typecheck( // literal globals' existence when __CPROVER_initialize is generated in // `generate_support_functions`. const std::size_t before_constant_globals_size = symbol_table.symbols.size(); - for(auto &c : java_class_loader.class_map) + for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map()) { - generate_constant_global_variables( - c.second, - symbol_table, - string_refinement_enabled); + for(java_bytecode_parse_treet &parse_tree : class_to_trees.second) + { + generate_constant_global_variables( + parse_tree, symbol_table, string_refinement_enabled); + } } status() << "Java: added " << (symbol_table.symbols.size() - before_constant_globals_size) @@ -682,10 +681,13 @@ bool java_bytecode_languaget::typecheck( { journalling_symbol_tablet symbol_table_journal = journalling_symbol_tablet::wrap(symbol_table); - for(const auto &c : java_class_loader.class_map) + for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map()) { - create_stub_global_symbols( - c.second, symbol_table_journal, class_hierarchy, *this); + for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second) + { + create_stub_global_symbols( + parse_tree, symbol_table_journal, class_hierarchy, *this); + } } stub_global_initializer_factory.create_stub_global_initializer_symbols( @@ -998,7 +1000,20 @@ bool java_bytecode_languaget::final(symbol_table_baset &symbol_table) void java_bytecode_languaget::show_parse(std::ostream &out) { - java_class_loader(main_class).output(out); + java_class_loadert::parse_tree_with_overlayst &parse_trees = + java_class_loader(main_class); + parse_trees.front().output(out); + if(parse_trees.size() > 1) + { + out << "\n\nClass has the following overlays:\n\n"; + for(auto parse_tree_it = std::next(parse_trees.begin()); + parse_tree_it != parse_trees.end(); + ++parse_tree_it) + { + parse_tree_it->output(out); + } + out << "End of class overlays.\n"; + } } std::unique_ptr new_java_bytecode_language() diff --git a/src/java_bytecode/java_bytecode_parse_tree.cpp b/src/java_bytecode/java_bytecode_parse_tree.cpp index 6f6f0b92d41..5b73a88e911 100644 --- a/src/java_bytecode/java_bytecode_parse_tree.cpp +++ b/src/java_bytecode/java_bytecode_parse_tree.cpp @@ -8,6 +8,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_bytecode_parse_tree.h" +#include #include #include @@ -119,6 +120,25 @@ void java_bytecode_parse_treet::annotationt::element_value_pairt::output( out << expr2java(value, ns); } +bool java_bytecode_parse_treet::does_annotation_exist( + const annotationst &annotations, + const irep_idt &annotation_type_name) +{ + return + std::find_if( + annotations.begin(), + annotations.end(), + [&annotation_type_name](const annotationt &annotation) + { + if(annotation.type.id() != ID_pointer) + return false; + typet type = annotation.type.subtype(); + return + type.id() == ID_symbol + && to_symbol_type(type).get_identifier() == annotation_type_name; + }) != annotations.end(); +} + void java_bytecode_parse_treet::methodt::output(std::ostream &out) const { symbol_tablet symbol_table; diff --git a/src/java_bytecode/java_bytecode_parse_tree.h b/src/java_bytecode/java_bytecode_parse_tree.h index f41108500d2..7e583af3673 100644 --- a/src/java_bytecode/java_bytecode_parse_tree.h +++ b/src/java_bytecode/java_bytecode_parse_tree.h @@ -44,6 +44,10 @@ class java_bytecode_parse_treet typedef std::vector annotationst; + static bool does_annotation_exist( + const annotationst &annotations, + const irep_idt &annotation_type_name); + class instructiont { public: diff --git a/src/java_bytecode/java_class_loader.cpp b/src/java_bytecode/java_class_loader.cpp index f4c2ffb5d88..6954e963af1 100644 --- a/src/java_bytecode/java_class_loader.cpp +++ b/src/java_bytecode/java_class_loader.cpp @@ -9,7 +9,6 @@ Author: Daniel Kroening, kroening@kroening.com #include "java_class_loader.h" #include -#include #include #include @@ -17,7 +16,6 @@ Author: Daniel Kroening, kroening@kroening.com #include #include "java_bytecode_parser.h" -#include "jar_file.h" #include "library/java_core_models.inc" @@ -28,11 +26,10 @@ Author: Daniel Kroening, kroening@kroening.com /// JAVA_CORE_MODELS_SIZE, another macro defined in java_core_models.inc. unsigned char java_core_models[] = { JAVA_CORE_MODELS_DATA }; -java_bytecode_parse_treet &java_class_loadert::operator()( +java_class_loadert::parse_tree_with_overlayst &java_class_loadert::operator()( const irep_idt &class_name) { std::stack queue; - // Always require java.lang.Object, as it is the base of // internal classes such as array types. queue.push("java.lang.Object"); @@ -57,214 +54,252 @@ java_bytecode_parse_treet &java_class_loadert::operator()( irep_idt c=queue.top(); queue.pop(); - // do we have the class already? - if(class_map.find(c)!=class_map.end()) - continue; // got it already + if(class_map.count(c) != 0) + continue; debug() << "Reading class " << c << eom; - java_bytecode_parse_treet &parse_tree= + parse_tree_with_overlayst &parse_trees = get_parse_tree(class_loader_limit, c); - // add any dependencies to queue - for(java_bytecode_parse_treet::class_refst::const_iterator - it=parse_tree.class_refs.begin(); - it!=parse_tree.class_refs.end(); - it++) - queue.push(*it); + // Add any dependencies to queue + for(const java_bytecode_parse_treet &parse_tree : parse_trees) + for(const irep_idt &class_ref : parse_tree.class_refs) + queue.push(class_ref); // Add any extra dependencies provided by our caller: if(get_extra_class_refs) { - std::vector extra_class_refs = - get_extra_class_refs(c); - - for(const irep_idt &id : extra_class_refs) + for(const irep_idt &id : get_extra_class_refs(c)) queue.push(id); } } - return class_map[class_name]; -} - -void java_class_loadert::set_java_cp_include_files( - std::string &_java_cp_include_files) -{ - java_cp_include_files=_java_cp_include_files; -} - -/// Sets a function that provides extra dependencies for a particular class. -/// Currently used by the string preprocessor to note that even if we don't -/// have a definition of core string types, it will nontheless give them -/// certain known superclasses and interfaces, such as Serializable. -void java_class_loadert::set_extra_class_refs_function( - java_class_loadert::get_extra_class_refs_functiont func) -{ - get_extra_class_refs = func; + return class_map.at(class_name); } -/// Retrieves a class file from a jar and loads it into the tree -bool java_class_loadert::get_class_file( - java_class_loader_limitt &class_loader_limit, +optionalt java_class_loadert::get_class_from_jar( const irep_idt &class_name, const std::string &jar_file, - java_bytecode_parse_treet &parse_tree) + const jar_indext &jar_index, + java_class_loader_limitt &class_loader_limit) { - const auto &jm=jar_map[jar_file]; - - auto jm_it=jm.entries.find(class_name); + auto jar_index_it = jar_index.find(class_name); + if(jar_index_it == jar_index.end()) + return {}; - if(jm_it!=jm.entries.end()) - { - debug() << "Getting class `" << class_name << "' from JAR " - << jar_file << eom; - - std::string data=jar_pool(class_loader_limit, jar_file) - .get_entry(jm_it->second.class_file_name); - - std::istringstream istream(data); + debug() + << "Getting class `" << class_name << "' from JAR " << jar_file << eom; - java_bytecode_parse( - istream, - parse_tree, - get_message_handler()); + std::string data = + jar_pool(class_loader_limit, jar_file).get_entry(jar_index_it->second); - return true; - } - return false; + java_bytecode_parse_treet parse_tree; + std::istringstream istream(data); + if(java_bytecode_parse(istream, parse_tree, get_message_handler())) + return {}; + return parse_tree; } -/// Retrieves a class file from the internal jar and loads it into the tree -bool java_class_loadert::get_internal_class_file( - java_class_loader_limitt &class_loader_limit, - const irep_idt &class_name, - java_bytecode_parse_treet &parse_tree) +static bool is_overlay_class(const java_bytecode_parse_treet::classt &c) { - // Add internal jar file. The name is used to load it once only and - // reference it later. - std::string core_models="core-models.jar"; - jar_pool(class_loader_limit, - core_models, - java_core_models, - JAVA_CORE_MODELS_SIZE); - - // This does not read from the jar file but from the jar_filet object - // as we've just created it - read_jar_file(class_loader_limit, core_models); - return - get_class_file(class_loader_limit, class_name, core_models, parse_tree); + return java_bytecode_parse_treet::does_annotation_exist( + c.annotations, ID_overlay_class); } -java_bytecode_parse_treet &java_class_loadert::get_parse_tree( +/// Check through all the places class parse trees can appear and returns the +/// first implementation it finds plus any overlay class implementations +/// \param class_loader_limit: Filter to decide whether to load classes +/// \param class_name: Name of class to load +/// \returns The list of valid implementations, including overlays +/// \remarks +/// Allows multiple definitions of the same class to appear on the +/// classpath, so long as all but the first definition are marked with the +/// attribute `\@java::com.diffblue.OverlayClassImplementation`. +java_class_loadert::parse_tree_with_overlayst & +java_class_loadert::get_parse_tree( java_class_loader_limitt &class_loader_limit, const irep_idt &class_name) { - java_bytecode_parse_treet &parse_tree=class_map[class_name]; + parse_tree_with_overlayst &parse_trees = class_map[class_name]; + PRECONDITION(parse_trees.empty()); - // First check given JAR files - for(const auto &jf : jar_files) + // First add all given JAR files + for(const auto &jar_file : jar_files) { - read_jar_file(class_loader_limit, jf); - if(get_class_file(class_loader_limit, class_name, jf, parse_tree)) - return parse_tree; + jar_index_optcreft index = read_jar_file(class_loader_limit, jar_file); + if(!index) + continue; + optionalt parse_tree = + get_class_from_jar(class_name, jar_file, *index, class_loader_limit); + if(parse_tree) + parse_trees.push_back(*parse_tree); } + // Then add core models if(use_core_models) { - if(get_internal_class_file(class_loader_limit, class_name, parse_tree)) - return parse_tree; + // Add internal jar file. The name is used to load it once only and + // reference it later. + std::string core_models = "core-models.jar"; + jar_pool( + class_loader_limit, core_models, java_core_models, JAVA_CORE_MODELS_SIZE); + + // This does not read from the jar file but from the jar_filet object we + // just created + jar_index_optcreft index = read_jar_file(class_loader_limit, core_models); + if(index) + { + optionalt parse_tree = + get_class_from_jar(class_name, core_models, *index, class_loader_limit); + if(parse_tree) + parse_trees.push_back(*parse_tree); + } } - // See if we can find it in the class path - for(const auto &cp : config.java.classpath) + // Then add everything on the class path + for(const auto &cp_entry : config.java.classpath) { - // in a JAR? - if(has_suffix(cp, ".jar")) + if(has_suffix(cp_entry, ".jar")) { - read_jar_file(class_loader_limit, cp); - if(get_class_file(class_loader_limit, class_name, cp, parse_tree)) - return parse_tree; + jar_index_optcreft index = read_jar_file(class_loader_limit, cp_entry); + if(!index) + continue; + optionalt parse_tree = + get_class_from_jar(class_name, cp_entry, *index, class_loader_limit); + if(parse_tree) + parse_trees.push_back(*parse_tree); } else { - // in a given directory? - std::string full_path= + // Look in the given directory + const std::string class_file = class_name_to_file(class_name); + const std::string full_path = #ifdef _WIN32 - cp+'\\'+class_name_to_file(class_name); + cp_entry + '\\' + class_file; #else - cp+'/'+class_name_to_file(class_name); + cp_entry + '/' + class_file; #endif - // full class path starts with './' - if(class_loader_limit.load_class_file(full_path.substr(2)) && - std::ifstream(full_path)) + if(!class_loader_limit.load_class_file(class_file)) + continue; + + if(std::ifstream(full_path)) { - if(!java_bytecode_parse( - full_path, - parse_tree, - get_message_handler())) - return parse_tree; + debug() + << "Getting class `" << class_name << "' from file " << full_path + << eom; + java_bytecode_parse_treet parse_tree; + if(!java_bytecode_parse(full_path, parse_tree, get_message_handler())) + parse_trees.push_back(std::move(parse_tree)); } } } - // not found + if(!parse_trees.empty()) + { + auto parse_tree_it = parse_trees.begin(); + // If the first class implementation is an overlay emit a warning and + // skip over it until we find a non-overlay class + while(parse_tree_it != parse_trees.end()) + { + // Skip parse trees that failed to load - though these shouldn't exist yet + if(parse_tree_it->loading_successful) + { + if(!is_overlay_class(parse_tree_it->parsed_class)) + { + // Keep the non-overlay class + ++parse_tree_it; + break; + } + warning() + << "Skipping class " << class_name + << " marked with OverlayClassImplementation but found before" + " original definition" + << eom; + } + auto unloaded_or_overlay_out_of_order_it = parse_tree_it; + ++parse_tree_it; + parse_trees.erase(unloaded_or_overlay_out_of_order_it); + } + // Collect overlay classes + while(parse_tree_it != parse_trees.end()) + { + // Remove non-initial classes that aren't overlays + if(!is_overlay_class(parse_tree_it->parsed_class)) + { + warning() + << "Skipping duplicate definition of class " << class_name + << " not marked with OverlayClassImplementation" << eom; + auto duplicate_non_overlay_it = parse_tree_it; + ++parse_tree_it; + parse_trees.erase(duplicate_non_overlay_it); + } + else + ++parse_tree_it; + } + return parse_trees; + } + + // Not found or failed to load warning() << "failed to load class `" << class_name << '\'' << eom; + java_bytecode_parse_treet parse_tree; parse_tree.parsed_class.name=class_name; - return parse_tree; + parse_trees.push_back(std::move(parse_tree)); + return parse_trees; } void java_class_loadert::load_entire_jar( java_class_loader_limitt &class_loader_limit, - const std::string &file) + const std::string &jar_path) { - read_jar_file(class_loader_limit, file); - - const auto &jm=jar_map[file]; + jar_index_optcreft jar_index = read_jar_file(class_loader_limit, jar_path); + if(!jar_index) + return; - jar_files.push_front(file); + jar_files.push_front(jar_path); - for(const auto &e : jm.entries) + for(const auto &e : jar_index->get()) operator()(e.first); jar_files.pop_front(); } -void java_class_loadert::read_jar_file( +java_class_loadert::jar_index_optcreft java_class_loadert::read_jar_file( java_class_loader_limitt &class_loader_limit, - const irep_idt &file) + const std::string &jar_path) { - // done already? - if(jar_map.find(file)!=jar_map.end()) - return; + auto existing_it = jars_by_path.find(jar_path); + if(existing_it != jars_by_path.end()) + return std::cref(existing_it->second); std::vector filenames; try { - filenames=this->jar_pool(class_loader_limit, id2string(file)).filenames(); + filenames = this->jar_pool(class_loader_limit, jar_path).filenames(); } catch(const std::runtime_error &) { - error() << "failed to open JAR file `" << file << "'" << eom; - return; + error() << "failed to open JAR file `" << jar_path << "'" << eom; + return jar_index_optcreft(); } - debug() << "adding JAR file `" << file << "'" << eom; + debug() << "Adding JAR file `" << jar_path << "'" << eom; - // create a new entry in the map and initialize using the list of file names + // Create a new entry in the map and initialize using the list of file names // that were retained in the jar_filet by the class_loader_limit filter - auto &jm=jar_map[file]; + jar_indext &jar_index = jars_by_path[jar_path]; for(auto &file_name : filenames) { - // does it end on .class? if(has_suffix(file_name, ".class")) { - debug() << "read class file " << file_name << " from " << file << eom; + debug() + << "Found class file " << file_name << " in JAR `" << jar_path << "'" + << eom; irep_idt class_name=file_to_class_name(file_name); - - // record - jm.entries[class_name].class_file_name=file_name; + jar_index[class_name] = file_name; } } + return std::cref(jar_index); } std::string java_class_loadert::file_to_class_name(const std::string &file) @@ -330,15 +365,6 @@ jar_filet &java_class_loadert::jar_pool( return it->second; } -/// Adds the list of classes to the load queue, forcing them to be loaded even -/// without explicit reference -/// \param classes: list of class identifiers -void java_class_loadert::add_load_classes(const std::vector &classes) -{ - for(const auto &id : classes) - java_load_classes.push_back(id); -} - jar_filet &java_class_loadert::jar_pool( java_class_loader_limitt &class_loader_limit, const std::string &buffer_name, diff --git a/src/java_bytecode/java_class_loader.h b/src/java_bytecode/java_class_loader.h index 58241e6536a..6f46072aaa2 100644 --- a/src/java_bytecode/java_class_loader.h +++ b/src/java_bytecode/java_class_loader.h @@ -15,6 +15,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include "java_bytecode_parse_tree.h" #include "java_class_loader_limit.h" @@ -23,49 +24,25 @@ Author: Daniel Kroening, kroening@kroening.com class java_class_loadert:public messaget { public: - // Default constructor does not use core models - // for backward compatibility of unit tests - java_class_loadert() : - use_core_models(true) - {} + /// A map associating logical class names with the name of the .class file + /// implementing it for all classes inside a single JAR file + typedef std::map jar_indext; - java_bytecode_parse_treet &operator()(const irep_idt &); - - void set_java_cp_include_files(std::string &); - void add_load_classes(const std::vector &); + typedef std::list parse_tree_with_overlayst; + typedef std::map + parse_tree_with_overridest_mapt; /// A function that yields a list of extra dependencies based on a class name. typedef std::function(const irep_idt &)> get_extra_class_refs_functiont; - void set_extra_class_refs_function(get_extra_class_refs_functiont func); - - static std::string file_to_class_name(const std::string &); - static std::string class_name_to_file(const irep_idt &); - - void add_jar_file(const std::string &f) + // Default constructor does not use core models + // for backward compatibility of unit tests + java_class_loadert() : use_core_models(true) { - jar_files.push_back(f); } - void load_entire_jar(java_class_loader_limitt &, const std::string &f); - - void read_jar_file(java_class_loader_limitt &, const irep_idt &); - - /// Attempts to load the class from the given jar. - /// Returns true if found and loaded - bool get_class_file( - java_class_loader_limitt &class_loader_limit, - const irep_idt &class_name, - const std::string &jar_file, - java_bytecode_parse_treet &parse_tree); - - /// Attempts to load the class from the internal core models. - /// Returns true if found and loaded - bool get_internal_class_file( - java_class_loader_limitt &class_loader_limit, - const irep_idt &class_name, - java_bytecode_parse_treet &parse_tree); + parse_tree_with_overlayst &operator()(const irep_idt &class_name); /// Given a \p class_name (e.g. "java.lang.Thread") try to load the /// corresponding .class file by first scanning all .jar files whose @@ -74,58 +51,80 @@ class java_class_loadert:public messaget /// \p limit to limit the class files that it might (directly or indirectly) /// load and returns a default-constructed parse tree when unable to find the /// .class file. - java_bytecode_parse_treet &get_parse_tree( - java_class_loader_limitt &limit, const irep_idt &class_name); + parse_tree_with_overlayst &get_parse_tree( + java_class_loader_limitt &class_loader_limit, + const irep_idt &class_name); + + void set_java_cp_include_files(std::string &java_cp_include_files) + { + this->java_cp_include_files = java_cp_include_files; + } + /// Sets a function that provides extra dependencies for a particular class. + /// Currently used by the string preprocessor to note that even if we don't + /// have a definition of core string types, it will nontheless give them + /// certain known superclasses and interfaces, such as Serializable. + void set_extra_class_refs_function(get_extra_class_refs_functiont func) + { + get_extra_class_refs = func; + } + /// Adds the list of classes to the load queue, forcing them to be loaded even + /// without explicit reference + /// \param classes: list of class identifiers + void add_load_classes(const std::vector &classes) + { + for(const auto &id : classes) + java_load_classes.push_back(id); + } + void add_jar_file(const std::string &f) + { + jar_files.push_back(f); + } + void set_use_core_models(bool use_core_models) + { + this->use_core_models = use_core_models; + } + + static std::string file_to_class_name(const std::string &); + static std::string class_name_to_file(const irep_idt &); - /// Maps class names to the bytecode parse trees. - typedef std::map class_mapt; - class_mapt class_map; + void load_entire_jar(java_class_loader_limitt &, const std::string &jar_path); + + const jar_indext &get_jar_index(const std::string &jar_path) + { + return jars_by_path.at(jar_path); + } + /// Map from class names to the bytecode parse trees + fixed_keys_map_wrappert + get_class_with_overlays_map() + { + return fixed_keys_map_wrappert(class_map); + } + const java_bytecode_parse_treet &get_original_class( + const irep_idt &class_name) + { + return class_map.at(class_name).front(); + } - /// Load jar archive(from cache if already loaded) + /// Load jar archive or retrieve from cache if already loaded /// \param limit /// \param filename name of the file - jar_filet &jar_pool(java_class_loader_limitt &limit, - const std::string &filename); + jar_filet &jar_pool( + java_class_loader_limitt &limit, const std::string &filename); - /// Load jar archive(from cache if already loaded) + /// Load jar archive or retrieve from cache if already loaded /// \param limit /// \param buffer_name name of the original file /// \param pmem memory pointer to the contents of the file /// \param size size of the memory buffer /// Note that this mocks the existence of a file which may /// or may not exist since the actual data bis retrieved from memory. - jar_filet &jar_pool(java_class_loader_limitt &limit, - const std::string &buffer_name, - const void *pmem, - size_t size); - - /// An object of this class represents the information of _a single JAR file_ - /// that is relevant for a class loader: a map associating logical class names - /// with with handles (struct entryt) that describe one .class file inside the - /// JAR. Currently the handle only contains the name of the .class file. - class jar_map_entryt - { - public: - struct entryt - { - std::string class_file_name; - }; - - /// Maps class names to jar file entries. - typedef std::map entriest; - entriest entries; - }; - - /// Maps .jar filesystem paths to .class file descriptors (see - /// jar_map_entryt). - typedef std::map jar_mapt; - jar_mapt jar_map; - - /// List of filesystem paths to .jar files that will be used, in the given - /// order, to find and load a class, provided its name (see \ref - /// get_parse_tree). - std::list jar_files; + jar_filet &jar_pool( + java_class_loader_limitt &limit, + const std::string &buffer_name, + const void *pmem, + size_t size); +private: /// Either a regular expression matching files that will be allowed to be /// loaded or a string of the form `@PATH` where PATH is the file path of a /// json file storing an explicit list of files allowed to be loaded. See @@ -133,13 +132,36 @@ class java_class_loadert:public messaget /// information. std::string java_cp_include_files; + /// List of filesystem paths to .jar files that will be used, in the given + /// order, to find and load a class, provided its name (see \ref + /// get_parse_tree). + std::list jar_files; + /// Indicates that the core models should be loaded bool use_core_models; -private: - std::map m_archives; + /// Classes to be explicitly loaded std::vector java_load_classes; get_extra_class_refs_functiont get_extra_class_refs; + + /// The jar_indext for each jar file we've read + std::map jars_by_path; + + /// Jar files that have been loaded + std::map m_archives; + /// Map from class names to the bytecode parse trees + parse_tree_with_overridest_mapt class_map; + + typedef optionalt> + jar_index_optcreft; + jar_index_optcreft read_jar_file( + java_class_loader_limitt &class_loader_limit, + const std::string &jar_path); + optionalt get_class_from_jar( + const irep_idt &class_name, + const std::string &jar_file, + const jar_indext &jar_index, + java_class_loader_limitt &class_loader_limit); }; #endif // CPROVER_JAVA_BYTECODE_JAVA_CLASS_LOADER_H diff --git a/src/util/fixed_keys_map_wrapper.h b/src/util/fixed_keys_map_wrapper.h new file mode 100644 index 00000000000..644a7b5727c --- /dev/null +++ b/src/util/fixed_keys_map_wrapper.h @@ -0,0 +1,117 @@ +// Copyright 2018 DiffBlue Limited. All Rights Reserved. + +/// \file +/// A wrapper for maps that gives read-write access to elements but without +/// allowing addition or removal of elements + +#ifndef CPROVER_UTIL_FIXED_KEYS_MAP_WRAPPER_H +#define CPROVER_UTIL_FIXED_KEYS_MAP_WRAPPER_H + +template +class fixed_keys_map_wrappert +{ +private: + mapt ↦ + +public: + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::iterator iterator; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::const_iterator const_iterator; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::reverse_iterator reverse_iterator; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::const_reverse_iterator const_reverse_iterator; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::key_type key_type; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::mapped_type mapped_type; + // NOLINTNEXTLINE(readability/identifiers) - should match STL + typedef typename mapt::size_type size_type; + + explicit fixed_keys_map_wrappert(mapt &map) : map(map) + { + } + + iterator begin() + { + return map.begin(); + } + const_iterator begin() const + { + return map.begin(); + } + iterator end() + { + return map.end(); + } + const_iterator end() const + { + return map.end(); + } + reverse_iterator rbegin() + { + return map.rbegin(); + } + const_reverse_iterator rbegin() const + { + return map.rbegin(); + } + reverse_iterator rend() + { + return map.rend(); + } + const_reverse_iterator rend() const + { + return map.rend(); + } + const_iterator cbegin() const + { + return map.begin(); + } + const_iterator cend() const + { + return map.end(); + } + const_reverse_iterator crbegin() const + { + return map.rbegin(); + } + const_reverse_iterator crend() const + { + return map.rend(); + } + + bool empty() const + { + return map.empty(); + } + size_type size() const + { + return map.size(); + } + size_type count(const key_type &key) const + { + return map.count(key); + } + + const mapped_type &at(const key_type &key) const + { + return map.at(key); + } + mapped_type &at(const key_type &key) + { + return map.at(key); + } + + iterator find(const key_type &key) + { + return map.find(key); + } + const_iterator find(const key_type &key) const + { + return map.find(key); + } +}; + +#endif // CPROVER_UTIL_FIXED_KEYS_MAP_WRAPPER_H diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index 643377bf4fc..b95433a35ab 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -841,6 +841,8 @@ IREP_ID_TWO(java_lambda_method_handles, lambda_method_handles) IREP_ID_ONE(havoc_object) IREP_ID_TWO(overflow_shl, overflow-shl) IREP_ID_TWO(C_no_initialization_required, #no_initialization_required) +IREP_ID_TWO(overlay_class, java::com.diffblue.OverlayClassImplementation) +IREP_ID_TWO(overlay_method, java::com.diffblue.OverlayMethodImplementation) #undef IREP_ID_ONE #undef IREP_ID_TWO