Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/hotspot/share/opto/c2_globals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,9 @@
product(bool, IncrementalInlineForceCleanup, false, DIAGNOSTIC, \
"do cleanup after every iteration of incremental inlining") \
\
product(bool, IncrementalInlineVector, true, DIAGNOSTIC, \
"Inline fallback implementation of failed vector intrinsics") \
\
product(intx, LiveNodeCountInliningCutoff, 40000, \
"max number of live nodes in a method") \
range(0, max_juint / 8) \
Expand Down
23 changes: 23 additions & 0 deletions src/hotspot/share/opto/callGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,29 @@ CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* cal
return cg;
}

class LateInlineVectorCallGenerator : public LateInlineCallGenerator {
public:
LateInlineVectorCallGenerator(ciMethod* method, CallGenerator* intrinsic_cg) :
LateInlineCallGenerator(method, intrinsic_cg) {}

virtual bool is_vector_late_inline() const { return true; }

virtual JVMState* generate(JVMState* jvms) {
JVMState* new_jvms = LateInlineCallGenerator::generate(jvms);
if (IncrementalInlineVector) {
CallGenerator* inline_cg = CallGenerator::for_inline(method());
CallGenerator* fallback = CallGenerator::for_late_inline(method(), inline_cg)->with_call_node(call_node());
Compile::current()->add_vector_late_inline(fallback);
}
return new_jvms;
}
};

CallGenerator* CallGenerator::for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg) {
return new LateInlineVectorCallGenerator(m, intrinsic_cg);
}


// Allow inlining decisions to be delayed
class LateInlineVirtualCallGenerator : public VirtualCallGenerator {
private:
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/callGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class CallGenerator : public ArenaObj {
// same but for method handle calls
virtual bool is_mh_late_inline() const { return false; }
virtual bool is_string_late_inline() const { return false; }
virtual bool is_vector_late_inline() const { return false; }
virtual bool is_boxing_late_inline() const { return false; }
virtual bool is_vector_reboxing_late_inline() const { return false; }
virtual bool is_virtual_late_inline() const { return false; }
Expand Down Expand Up @@ -142,6 +143,7 @@ class CallGenerator : public ArenaObj {
static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const);
static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg);
static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_late_inline_virtual(ciMethod* m, int vtable_index, float expected_uses);
Expand Down
25 changes: 25 additions & 0 deletions src/hotspot/share/opto/compile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ void Compile::remove_useless_node(Node* dead) {
remove_useless_late_inlines( &_late_inlines, dead);
remove_useless_late_inlines( &_string_late_inlines, dead);
remove_useless_late_inlines( &_boxing_late_inlines, dead);
remove_useless_late_inlines( &_vector_late_inlines, dead);
remove_useless_late_inlines(&_vector_reboxing_late_inlines, dead);

if (dead->is_CallStaticJava()) {
Expand Down Expand Up @@ -480,6 +481,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis
remove_useless_late_inlines( &_late_inlines, useful);
remove_useless_late_inlines( &_string_late_inlines, useful);
remove_useless_late_inlines( &_boxing_late_inlines, useful);
remove_useless_late_inlines( &_vector_late_inlines, useful);
remove_useless_late_inlines(&_vector_reboxing_late_inlines, useful);
DEBUG_ONLY(verify_graph_edges(true /*check for no_dead_code*/, root_and_safepoints);)
}
Expand Down Expand Up @@ -693,6 +695,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
_string_late_inlines(comp_arena(), 2, 0, nullptr),
_boxing_late_inlines(comp_arena(), 2, 0, nullptr),
_vector_reboxing_late_inlines(comp_arena(), 2, 0, nullptr),
_vector_late_inlines(comp_arena(), 2, 0, nullptr),
_late_inlines_pos(0),
_has_mh_late_inlines(false),
_oom(false),
Expand Down Expand Up @@ -2158,6 +2161,24 @@ void Compile::shuffle_late_inlines() {
shuffle_array(*C, _late_inlines);
}

void Compile::transfer_vector_late_inlines() {
for (int i = 0; i < _vector_late_inlines.length(); i++) {
CallGenerator* cg = _vector_late_inlines.at(i);
// When a vector intrinsic fails, set_generator(cg) caches the
// LateInlineVectorCallGenerator on the call node to allow retries
// if IGVN optimizes the call node's inputs. If the call node is not
// on the IGVN worklist when cleanup runs, CallStaticJavaNode::Ideal
// does not fire and the cached generator persists. Once _late_inlines
// drains and we commit to the fallback here, clear the stale generator
// to prevent a subsequent IGVN pass from re-registering the intrinsic
// attempt into _late_inlines alongside the fallback, which would create
// duplicate call_node entries.
cg->call_node()->as_CallJava()->set_generator(nullptr);
add_late_inline(cg);
}
_vector_late_inlines.clear();
}

// Perform incremental inlining until bound on number of live nodes is reached
void Compile::inline_incrementally(PhaseIterGVN& igvn) {
TracePhase tp(_t_incrInline);
Expand Down Expand Up @@ -2215,6 +2236,10 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
print_method(PHASE_INCREMENTAL_INLINE_STEP, 3);

if (failing()) return;

if (_late_inlines.length() == 0 && _vector_late_inlines.length() > 0) {
Copy link
Copy Markdown
Contributor

@iwanowww iwanowww May 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you extract it into a helper method? Otherwise, the patch looks good. I'll submit it for testing.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest to rename transfer_vector_late_inlines() to process_vector_late_inlines() and move _vector_late_inlines.length() > 0 guard there.

transfer_vector_late_inlines();
}
}

igvn_worklist()->ensure_empty(); // should be done with igvn
Expand Down
7 changes: 7 additions & 0 deletions src/hotspot/share/opto/compile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ class Compile : public Phase {
GrowableArray<CallGenerator*> _boxing_late_inlines; // same but for boxing operations

GrowableArray<CallGenerator*> _vector_reboxing_late_inlines; // same but for vector reboxing operations
GrowableArray<CallGenerator*> _vector_late_inlines; // inline fallback implementation for failed intrinsics

int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
bool _has_mh_late_inlines; // Can there still be a method handle late inlining pending?
Expand Down Expand Up @@ -508,6 +509,12 @@ class Compile : public Phase {
InlinePrinter _inline_printer;

public:

void add_vector_late_inline(CallGenerator* cg) {
_vector_late_inlines.push(cg);
}
void transfer_vector_late_inlines();

void* barrier_set_state() const { return _barrier_set_state; }

InlinePrinter* inline_printer() { return &_inline_printer; }
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/opto/doCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
cg_intrinsic = cg;
cg = nullptr;
} else if (IncrementalInline && should_delay_vector_inlining(callee, jvms)) {
return CallGenerator::for_late_inline(callee, cg);
return CallGenerator::for_vector_late_inline(callee, cg);
} else {
return cg;
}
Expand Down