Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions src/Simplify_Exprs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,10 @@ Expr Simplify::visit(const VectorReduce *op, ExprInfo *info) {
auto rewrite = IRMatcher::rewriter(IRMatcher::h_add(value, lanes), op->type);
if (rewrite(h_add(x * broadcast(y, arg_lanes), lanes), h_add(x, lanes) * broadcast(y, lanes)) ||
rewrite(h_add(broadcast(x, arg_lanes) * y, lanes), h_add(y, lanes) * broadcast(x, lanes)) ||
rewrite(h_add(broadcast(x, arg_lanes), lanes), broadcast(x * factor, lanes))) {
rewrite(h_add(broadcast(x, arg_lanes), lanes), broadcast(x * factor, lanes)) ||
rewrite(h_add(broadcast(x, c0), lanes), broadcast(h_add(x, lanes / c0), c0), lanes % c0 == 0) ||
rewrite(h_add(broadcast(x, c0), lanes), broadcast(h_add(x, 1) * (c0 / lanes), lanes), c0 % lanes == 0) ||
false) {
return mutate(rewrite.result, info);
}
break;
Expand All @@ -142,8 +145,10 @@ Expr Simplify::visit(const VectorReduce *op, ExprInfo *info) {
rewrite(h_min(max(x, broadcast(y, arg_lanes)), lanes), max(h_min(x, lanes), broadcast(y, lanes))) ||
rewrite(h_min(max(broadcast(x, arg_lanes), y), lanes), max(h_min(y, lanes), broadcast(x, lanes))) ||
rewrite(h_min(broadcast(x, arg_lanes), lanes), broadcast(x, lanes)) ||
rewrite(h_min(broadcast(x, c0), lanes), h_min(x, lanes), factor % c0 == 0) ||
rewrite(h_min(ramp(x, y, arg_lanes), lanes), x + min(y * (arg_lanes - 1), 0)) ||
rewrite(h_min(broadcast(x, c0), 1), h_min(x, 1)) ||
rewrite(h_min(broadcast(x, c0), lanes), broadcast(h_min(x, lanes / c0), c0), lanes % c0 == 0) ||
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the analog to the third h_add rule is missing:

rewrite(h_min(broadcast(x, c0), lanes), broadcast(h_min(x, 1), lanes), c0 % lanes == 0) ||

rewrite(h_min(ramp(x, y, arg_lanes), 1), x + min(y * (arg_lanes - 1), 0)) ||
rewrite(h_min(ramp(x, y, arg_lanes), lanes), ramp(x + min(y * (factor - 1), 0), y * factor, lanes)) ||
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these two rules need to be guarded with no_overflow_type, because ramps are only strictly decreasing or increasing if there's no overflow.

false) {
return mutate(rewrite.result, info);
}
Expand All @@ -156,8 +161,10 @@ Expr Simplify::visit(const VectorReduce *op, ExprInfo *info) {
rewrite(h_max(max(x, broadcast(y, arg_lanes)), lanes), max(h_max(x, lanes), broadcast(y, lanes))) ||
rewrite(h_max(max(broadcast(x, arg_lanes), y), lanes), max(h_max(y, lanes), broadcast(x, lanes))) ||
rewrite(h_max(broadcast(x, arg_lanes), lanes), broadcast(x, lanes)) ||
rewrite(h_max(broadcast(x, c0), lanes), h_max(x, lanes), factor % c0 == 0) ||
rewrite(h_max(ramp(x, y, arg_lanes), lanes), x + max(y * (arg_lanes - 1), 0)) ||
rewrite(h_max(broadcast(x, c0), 1), h_max(x, 1)) ||
rewrite(h_max(broadcast(x, c0), lanes), broadcast(h_max(x, lanes / c0), c0), lanes % c0 == 0) ||
Copy link
Copy Markdown
Member

@abadams abadams Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same missing rule as above

rewrite(h_max(ramp(x, y, arg_lanes), 1), x + max(y * (arg_lanes - 1), 0)) ||
rewrite(h_max(ramp(x, y, arg_lanes), lanes), ramp(x + max(y * (factor - 1), 0), y * factor, lanes)) ||
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs to be guarded with no_overflow_type, as above

false) {
return mutate(rewrite.result, info);
}
Expand All @@ -170,15 +177,16 @@ Expr Simplify::visit(const VectorReduce *op, ExprInfo *info) {
rewrite(h_and(x && broadcast(y, arg_lanes), lanes), h_and(x, lanes) && broadcast(y, lanes)) ||
rewrite(h_and(broadcast(x, arg_lanes) && y, lanes), h_and(y, lanes) && broadcast(x, lanes)) ||
rewrite(h_and(broadcast(x, arg_lanes), lanes), broadcast(x, lanes)) ||
rewrite(h_and(broadcast(x, c0), lanes), h_and(x, lanes), factor % c0 == 0) ||
rewrite(h_and(ramp(x, y, arg_lanes) < broadcast(z, arg_lanes), lanes),
x + max(y * (arg_lanes - 1), 0) < z) ||
rewrite(h_and(ramp(x, y, arg_lanes) <= broadcast(z, arg_lanes), lanes),
x + max(y * (arg_lanes - 1), 0) <= z) ||
rewrite(h_and(broadcast(x, arg_lanes) < ramp(y, z, arg_lanes), lanes),
x < y + min(z * (arg_lanes - 1), 0)) ||
rewrite(h_and(broadcast(x, arg_lanes) < ramp(y, z, arg_lanes), lanes),
x <= y + min(z * (arg_lanes - 1), 0)) ||
rewrite(h_and(broadcast(x, c0), lanes), broadcast(h_and(x, lanes / c0), c0), lanes % c0 == 0) ||
rewrite(h_and(broadcast(x, c0), lanes), broadcast(h_and(x, 1), lanes), c0 >= lanes) ||
(lanes == 1 && rewrite(h_and(ramp(x, y, arg_lanes) < broadcast(z, arg_lanes), lanes),
x + max(y * (arg_lanes - 1), 0) < z)) ||
(lanes == 1 && rewrite(h_and(ramp(x, y, arg_lanes) <= broadcast(z, arg_lanes), lanes),
x + max(y * (arg_lanes - 1), 0) <= z)) ||
(lanes == 1 && rewrite(h_and(broadcast(x, arg_lanes) < ramp(y, z, arg_lanes), lanes),
x < y + min(z * (arg_lanes - 1), 0))) ||
(lanes == 1 && rewrite(h_and(broadcast(x, arg_lanes) < ramp(y, z, arg_lanes), lanes),
x <= y + min(z * (arg_lanes - 1), 0))) ||
false) {
return mutate(rewrite.result, info);
}
Expand All @@ -191,7 +199,8 @@ Expr Simplify::visit(const VectorReduce *op, ExprInfo *info) {
rewrite(h_or(x && broadcast(y, arg_lanes), lanes), h_or(x, lanes) && broadcast(y, lanes)) ||
rewrite(h_or(broadcast(x, arg_lanes) && y, lanes), h_or(y, lanes) && broadcast(x, lanes)) ||
rewrite(h_or(broadcast(x, arg_lanes), lanes), broadcast(x, lanes)) ||
rewrite(h_or(broadcast(x, c0), lanes), h_or(x, lanes), factor % c0 == 0) ||
rewrite(h_or(broadcast(x, c0), lanes), broadcast(h_or(x, lanes / c0), c0), lanes % c0 == 0) ||
rewrite(h_or(broadcast(x, c0), lanes), broadcast(h_or(x, 1), lanes), c0 >= lanes) ||
// type of arg_lanes is somewhat indeterminate
rewrite(h_or(ramp(x, y, arg_lanes) < broadcast(z, arg_lanes), lanes),
x + min(y * (arg_lanes - 1), 0) < z) ||
Expand Down
59 changes: 56 additions & 3 deletions test/correctness/simplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -805,11 +805,64 @@ void check_vectors() {
check(VectorReduce::make(VectorReduce::And, Broadcast::make(bool_vector, 4), 1),
VectorReduce::make(VectorReduce::And, bool_vector, 1));
check(VectorReduce::make(VectorReduce::Or, Broadcast::make(bool_vector, 4), 2),
VectorReduce::make(VectorReduce::Or, bool_vector, 2));
Broadcast::make(VectorReduce::make(VectorReduce::Or, bool_vector, 1), 2));
check(VectorReduce::make(VectorReduce::Min, Broadcast::make(int_vector, 4), 4),
int_vector);
Broadcast::make(VectorReduce::make(VectorReduce::Min, int_vector, 1), 4));
check(VectorReduce::make(VectorReduce::Max, Broadcast::make(int_vector, 4), 8),
VectorReduce::make(VectorReduce::Max, Broadcast::make(int_vector, 4), 8));
Broadcast::make(VectorReduce::make(VectorReduce::Max, int_vector, 2), 4));

{
Expr x = Variable::make(Int(32), "x");
Expr y = Variable::make(Int(32), "y");

// == Symbolic Strides ==

// 1. Min: Scalar Reduction (arg_lanes=4, lanes=1 -> factor=4)
check(VectorReduce::make(VectorReduce::Min, Ramp::make(x, y, 4), 1),
min(y, 0) * 3 + x);

// 2. Min: Vector Reduction (arg_lanes=6, lanes=2 -> factor=3)
check(VectorReduce::make(VectorReduce::Min, Ramp::make(x, y, 6), 2),
Ramp::make(min(y, 0) * 2 + x, y * 3, 2));

// 3. Max: Scalar Reduction (arg_lanes=4, lanes=1 -> factor=4)
check(VectorReduce::make(VectorReduce::Max, Ramp::make(x, y, 4), 1),
max(y, 0) * 3 + x);

// 4. Max: Vector Reduction (arg_lanes=6, lanes=2 -> factor=3)
check(VectorReduce::make(VectorReduce::Max, Ramp::make(x, y, 6), 2),
Ramp::make(max(y, 0) * 2 + x, y * 3, 2));

// == Constant Strides (Positive & Negative) ==

// 5. Min: Positive Stride (arg_lanes=8, lanes=2 -> factor=4, stride=2)
// Block 1: min(x, x+2, x+4, x+6) -> x
// Expected Base: x + min(2 * 3, 0) -> x + 0 -> x
// Expected Stride: 2 * 4 = 8
check(VectorReduce::make(VectorReduce::Min, Ramp::make(x, 2, 8), 2),
Ramp::make(x, 8, 2));

// 6. Max: Positive Stride (arg_lanes=8, lanes=2 -> factor=4, stride=2)
// Block 1: max(x, x+2, x+4, x+6) -> x+6
// Expected Base: x + max(2 * 3, 0) -> x + 6
// Expected Stride: 2 * 4 = 8
check(VectorReduce::make(VectorReduce::Max, Ramp::make(x, 2, 8), 2),
Ramp::make(x + 6, 8, 2));

// 7. Min: Negative Stride (arg_lanes=8, lanes=2 -> factor=4, stride=-2)
// Block 1: min(x, x-2, x-4, x-6) -> x-6
// Expected Base: x + min(-2 * 3, 0) -> x - 6
// Expected Stride: -2 * 4 = -8
check(VectorReduce::make(VectorReduce::Min, Ramp::make(x, -2, 8), 2),
Ramp::make(x + -6, -8, 2));

// 8. Max: Negative Stride (arg_lanes=8, lanes=2 -> factor=4, stride=-2)
// Block 1: max(x, x-2, x-4, x-6) -> x
// Expected Base: x + max(-2 * 3, 0) -> x + 0 -> x
// Expected Stride: -2 * 4 = -8
check(VectorReduce::make(VectorReduce::Max, Ramp::make(x, -2, 8), 2),
Ramp::make(x, -8, 2));
}

{
// h_add(broadcast(x, 8), 4) should simplify to broadcast(x * 2, 4)
Expand Down
Loading