diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index dacc8ce9c261c..eec979efbc0d1 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -800,6 +800,9 @@ product(bool, IncrementalInlineForceCleanup, false, DIAGNOSTIC, \ "do cleanup after every iteration of incremental inlining") \ \ + product(bool, IncrementalInlineVector, true, DIAGNOSTIC, \ + "Inline fallback implementation of failed vector intrinsics") \ + \ product(intx, LiveNodeCountInliningCutoff, 40000, \ "max number of live nodes in a method") \ range(0, max_juint / 8) \ diff --git a/src/hotspot/share/opto/callGenerator.cpp b/src/hotspot/share/opto/callGenerator.cpp index 49897ca3c1763..d67af873460f2 100644 --- a/src/hotspot/share/opto/callGenerator.cpp +++ b/src/hotspot/share/opto/callGenerator.cpp @@ -437,6 +437,29 @@ CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* cal return cg; } +class LateInlineVectorCallGenerator : public LateInlineCallGenerator { + public: + LateInlineVectorCallGenerator(ciMethod* method, CallGenerator* intrinsic_cg) : + LateInlineCallGenerator(method, intrinsic_cg) {} + + virtual bool is_vector_late_inline() const { return true; } + + virtual JVMState* generate(JVMState* jvms) { + JVMState* new_jvms = LateInlineCallGenerator::generate(jvms); + if (IncrementalInlineVector) { + CallGenerator* inline_cg = CallGenerator::for_inline(method()); + CallGenerator* fallback = CallGenerator::for_late_inline(method(), inline_cg)->with_call_node(call_node()); + Compile::current()->add_vector_late_inline(fallback); + } + return new_jvms; + } +}; + +CallGenerator* CallGenerator::for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg) { + return new LateInlineVectorCallGenerator(m, intrinsic_cg); +} + + // Allow inlining decisions to be delayed class LateInlineVirtualCallGenerator : public VirtualCallGenerator { private: diff --git a/src/hotspot/share/opto/callGenerator.hpp b/src/hotspot/share/opto/callGenerator.hpp index 75ba6f709c07c..b786f392b8a19 100644 --- a/src/hotspot/share/opto/callGenerator.hpp +++ b/src/hotspot/share/opto/callGenerator.hpp @@ -75,6 +75,7 @@ class CallGenerator : public ArenaObj { // same but for method handle calls virtual bool is_mh_late_inline() const { return false; } virtual bool is_string_late_inline() const { return false; } + virtual bool is_vector_late_inline() const { return false; } virtual bool is_boxing_late_inline() const { return false; } virtual bool is_vector_reboxing_late_inline() const { return false; } virtual bool is_virtual_late_inline() const { return false; } @@ -142,6 +143,7 @@ class CallGenerator : public ArenaObj { static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const); static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg); + static CallGenerator* for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg); static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg); static CallGenerator* for_late_inline_virtual(ciMethod* m, int vtable_index, float expected_uses); diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 382c8f89a5fcb..dc461f20f3bd9 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -416,6 +416,7 @@ void Compile::remove_useless_node(Node* dead) { remove_useless_late_inlines( &_late_inlines, dead); remove_useless_late_inlines( &_string_late_inlines, dead); remove_useless_late_inlines( &_boxing_late_inlines, dead); + remove_useless_late_inlines( &_vector_late_inlines, dead); remove_useless_late_inlines(&_vector_reboxing_late_inlines, dead); if (dead->is_CallStaticJava()) { @@ -480,6 +481,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis remove_useless_late_inlines( &_late_inlines, useful); remove_useless_late_inlines( &_string_late_inlines, useful); remove_useless_late_inlines( &_boxing_late_inlines, useful); + remove_useless_late_inlines( &_vector_late_inlines, useful); remove_useless_late_inlines(&_vector_reboxing_late_inlines, useful); DEBUG_ONLY(verify_graph_edges(true /*check for no_dead_code*/, root_and_safepoints);) } @@ -693,6 +695,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci, _string_late_inlines(comp_arena(), 2, 0, nullptr), _boxing_late_inlines(comp_arena(), 2, 0, nullptr), _vector_reboxing_late_inlines(comp_arena(), 2, 0, nullptr), + _vector_late_inlines(comp_arena(), 2, 0, nullptr), _late_inlines_pos(0), _has_mh_late_inlines(false), _oom(false), @@ -2158,6 +2161,32 @@ void Compile::shuffle_late_inlines() { shuffle_array(*C, _late_inlines); } +void Compile::process_vector_late_inlines() { + for (int i = 0; i < _vector_late_inlines.length(); i++) { + CallGenerator* cg = _vector_late_inlines.at(i); + ciMethod* callee = cg->method(); + + // Skip fallback inlining for callees already compiled into large nmethods. + if (callee->has_compiled_code() && + callee->inline_instructions_size() > InlineSmallCode) { + continue; + } + + // When a vector intrinsic fails, set_generator(cg) caches the + // LateInlineVectorCallGenerator on the call node to allow retries + // if IGVN optimizes the call node's inputs. If the call node is not + // on the IGVN worklist when cleanup runs, CallStaticJavaNode::Ideal + // does not fire and the cached generator persists. Once _late_inlines + // drains and we commit to the fallback here, clear the stale generator + // to prevent a subsequent IGVN pass from re-registering the intrinsic + // attempt into _late_inlines alongside the fallback, which would create + // duplicate call_node entries. + cg->call_node()->as_CallJava()->set_generator(nullptr); + add_late_inline(cg); + } + _vector_late_inlines.clear(); +} + // Perform incremental inlining until bound on number of live nodes is reached void Compile::inline_incrementally(PhaseIterGVN& igvn) { TracePhase tp(_t_incrInline); @@ -2215,6 +2244,10 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) { print_method(PHASE_INCREMENTAL_INLINE_STEP, 3); if (failing()) return; + + if (_late_inlines.length() == 0) { + process_vector_late_inlines(); + } } igvn_worklist()->ensure_empty(); // should be done with igvn diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index ff0085d79deac..a3ee4ed6f8ef7 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -480,6 +480,7 @@ class Compile : public Phase { GrowableArray _boxing_late_inlines; // same but for boxing operations GrowableArray _vector_reboxing_late_inlines; // same but for vector reboxing operations + GrowableArray _vector_late_inlines; // inline fallback implementation for failed intrinsics int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining) bool _has_mh_late_inlines; // Can there still be a method handle late inlining pending? @@ -508,6 +509,12 @@ class Compile : public Phase { InlinePrinter _inline_printer; public: + + void add_vector_late_inline(CallGenerator* cg) { + _vector_late_inlines.push(cg); + } + void process_vector_late_inlines(); + void* barrier_set_state() const { return _barrier_set_state; } InlinePrinter* inline_printer() { return &_inline_printer; } diff --git a/src/hotspot/share/opto/doCall.cpp b/src/hotspot/share/opto/doCall.cpp index d6e75f17f5012..af517a4a2aa8f 100644 --- a/src/hotspot/share/opto/doCall.cpp +++ b/src/hotspot/share/opto/doCall.cpp @@ -166,7 +166,7 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool cg_intrinsic = cg; cg = nullptr; } else if (IncrementalInline && should_delay_vector_inlining(callee, jvms)) { - return CallGenerator::for_late_inline(callee, cg); + return CallGenerator::for_vector_late_inline(callee, cg); } else { return cg; } diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java index c6329c70f6594..9cc88c32b6602 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java @@ -47,7 +47,7 @@ public static void main(String[] args) { public int call() { return 1; } @Test - @IR(failOn = {IRNode.CMP_I, IRNode.CMOVE_I}) + @IR(failOn = {IRNode.CMP_I, IRNode.CMOVE_I}, applyIf = {"IncrementalInlineVector", "false"}) @IR(counts = {IRNode.VECTOR_TEST, "1"}) public int branch(long maskLong) { var mask = VectorMask.fromLong(ByteVector.SPECIES_PREFERRED, maskLong); @@ -55,8 +55,8 @@ public int branch(long maskLong) { } @Test - @IR(failOn = {IRNode.CMP_I}) - @IR(counts = {IRNode.VECTOR_TEST, "1", IRNode.CMOVE_I, "1"}) + @IR(failOn = {IRNode.CMP_I}, applyIf = {"IncrementalInlineVector", "false"}) + @IR(counts = {IRNode.VECTOR_TEST, "1", IRNode.CMOVE_I, "1"}, applyIf = {"IncrementalInlineVector", "false"}) public int cmove(long maskLong) { var mask = VectorMask.fromLong(ByteVector.SPECIES_PREFERRED, maskLong); return mask.allTrue() ? 1 : 0; diff --git a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java index 09185f63c6942..c21a6d86d7c01 100644 --- a/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java +++ b/test/hotspot/jtreg/compiler/vectorapi/VectorMaskCompareNotTest.java @@ -1294,7 +1294,7 @@ public static void testCompareMaskNotDoubleNegative() { public static void main(String[] args) { TestFramework testFramework = new TestFramework(); testFramework.setDefaultWarmup(5000) - .addFlags("--add-modules=jdk.incubator.vector") + .addFlags("--add-modules=jdk.incubator.vector", "-XX:InlineSmallCode=100000") .start(); } }