diff --git a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java index 9b24cf2ad201..4d4b67dad628 100644 --- a/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java +++ b/core/src/main/java/org/apache/calcite/plan/RelOptUtil.java @@ -3346,6 +3346,21 @@ public static List pushPastProject(List nodes, // function? Possibly. But it's invalid SQL, so don't go there. return null; } + // [CALCITE-7551] Refuse to merge if it would duplicate a + // non-deterministic expression (e.g. RAND()). + final List bottom = project.getProjects(); + final int[] refs = new int[bottom.size()]; + new RexVisitorImpl(true) { + @Override public Void visitInputRef(RexInputRef ref) { + refs[ref.getIndex()]++; + return null; + } + }.visitEach(nodes); + for (int i = 0; i < refs.length; i++) { + if (refs[i] > 1 && !RexUtil.isDeterministic(bottom.get(i))) { + return null; + } + } final List list = pushPastProject(nodes, project); final int bottomCount = RexUtil.nodeCount(project.getProjects()); final int topCount = RexUtil.nodeCount(nodes); diff --git a/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java index 3c365222cd66..18b2aa9a5ff3 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/FilterProjectTransposeRule.java @@ -165,6 +165,18 @@ protected FilterProjectTransposeRule( // it can be pushed down. For now we don't support this. return; } + // Refuse to transpose if the filter references a projected column whose + // expression is non-deterministic (e.g. RAND()). Pushing the filter + // below the project would inline that expression into the new filter + // condition while the original is still produced by the project above, + // splitting one evaluation into two. References to deterministic columns + // (even when other columns are non-deterministic) are safe to push. + final List projects = project.getProjects(); + for (int ref : RelOptUtil.InputFinder.bits(filter.getCondition())) { + if (!RexUtil.isDeterministic(projects.get(ref))) { + return; + } + } // convert the filter to one that references the child of the project RexNode newCondition = RelOptUtil.pushPastProjectUnlessBloat(filter.getCondition(), project, config.bloat()); diff --git a/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java index 6902a5ef8054..5a4fc4c32713 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/JoinProjectTransposeRule.java @@ -36,9 +36,11 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexProgram; import org.apache.calcite.rex.RexProgramBuilder; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import org.checkerframework.checker.nullness.qual.Nullable; @@ -112,6 +114,22 @@ public JoinProjectTransposeRule(RelOptRuleOperand operand, //~ Methods ---------------------------------------------------------------- + /** Returns whether {@code conditionRefs} (input references of the join + * condition, expressed against the join's combined output) references a + * non-deterministic expression of {@code project}, whose first output + * field is at {@code offset} in that combined output. */ + private static boolean referencesNonDeterministic(Project project, + ImmutableBitSet conditionRefs, int offset) { + final List exprs = project.getProjects(); + for (int i = 0; i < exprs.size(); i++) { + if (conditionRefs.get(offset + i) + && !RexUtil.isDeterministic(exprs.get(i))) { + return true; + } + } + return false; + } + @Override public void onMatch(RelOptRuleCall call) { final Join join = call.rel(0); final JoinRelType joinType = join.getJoinType(); @@ -151,6 +169,26 @@ public JoinProjectTransposeRule(RelOptRuleOperand operand, rightJoinChild = join.getRight(); } + // Skip a project when the join condition references one of its + // non-deterministic expressions (e.g. RAND()). The merge below inlines + // that expression into the new join condition via expandLocalRef while + // the project still re-emits it above, splitting one evaluation into + // two. Non-deterministic columns that the condition does not reference + // are safe to pull up. + final ImmutableBitSet conditionRefs = + RelOptUtil.InputFinder.bits(join.getCondition()); + final int nLeftFields = join.getLeft().getRowType().getFieldCount(); + if (leftProject != null + && referencesNonDeterministic(leftProject, conditionRefs, 0)) { + leftProject = null; + leftJoinChild = join.getLeft(); + } + if (rightProject != null + && referencesNonDeterministic(rightProject, conditionRefs, nLeftFields)) { + rightProject = null; + rightJoinChild = join.getRight(); + } + if ((leftProject == null) && (rightProject == null)) { return; } diff --git a/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java b/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java index 42deabf5e3d6..e5be603d9e40 100644 --- a/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java +++ b/core/src/main/java/org/apache/calcite/rel/rules/SemiJoinProjectTransposeRule.java @@ -17,6 +17,7 @@ package org.apache.calcite.rel.rules; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; @@ -31,8 +32,10 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexProgram; import org.apache.calcite.rex.RexProgramBuilder; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; import com.google.common.collect.ImmutableList; @@ -72,6 +75,21 @@ protected SemiJoinProjectTransposeRule(Config config) { final Join semiJoin = call.rel(0); final Project project = call.rel(1); + // Skip when the semi-join condition references one of the project's + // non-deterministic expressions (e.g. RAND()). Pulling such a project + // above the semi-join inlines that expression into the join condition + // via expandLocalRef while the project still re-emits it above, + // splitting one evaluation into two. Non-deterministic columns that the + // condition does not reference are safe to pull up. See [CALCITE-7551]. + final ImmutableBitSet conditionRefs = + RelOptUtil.InputFinder.bits(semiJoin.getCondition()); + final List projects = project.getProjects(); + for (int i = 0; i < projects.size(); i++) { + if (conditionRefs.get(i) && !RexUtil.isDeterministic(projects.get(i))) { + return; + } + } + // Convert the LHS semi-join keys to reference the child projection // expression; all projection expressions must be RexInputRefs, // otherwise, we wouldn't have created this semi-join. diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java index 715d9f4f7b8e..980c976dda87 100644 --- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java +++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java @@ -1533,6 +1533,87 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) { .check(); } + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. JoinProjectTransposeRule must + * not pull a project containing a non-deterministic expression above + * the join, because it inlines the expression into the new join + * condition via {@code mergedProgram.expandLocalRef}. */ + @Test void testJoinProjectTransposeShouldIgnoreNonDeterministic() { + final Function relFn = b -> b + .scan("EMP") + .project(b.field("EMPNO"), + b.alias(b.call(SqlStdOperatorTable.RAND), "r")) + .scan("DEPT") + .join(JoinRelType.INNER, + b.and( + b.greaterThan(b.field(2, 0, "r"), b.literal(0.0)), + b.lessThan(b.field(2, 0, "r"), b.literal(1.0)))) + .build(); + relFn(relFn).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).checkUnchanged(); + } + + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. The transpose is still allowed when + * the join condition only references deterministic projected columns, + * even if the project also computes a non-deterministic column (here + * {@code r} is RAND() but the join is on DEPTNO). */ + @Test void testJoinProjectTransposeWithUnrelatedNonDeterministic() { + final Function relFn = b -> b + .scan("EMP") + .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), + b.field("DEPTNO")) + .scan("DEPT") + .join(JoinRelType.INNER, + b.equals(b.field(2, 0, "DEPTNO"), b.field(2, 1, "DEPTNO"))) + .build(); + relFn(relFn).withRule(CoreRules.JOIN_PROJECT_LEFT_TRANSPOSE).check(); + } + + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. SemiJoinProjectTransposeRule + * uses the same {@code mergePrograms} + {@code expandLocalRef} + * pattern as JoinProjectTransposeRule, and must not pull a project + * above the semi-join when the condition references one of its + * non-deterministic expressions. */ + @Test void testSemiJoinProjectTransposeShouldIgnoreNonDeterministic() { + final Function relFn = b -> b + .scan("EMP") + .project(b.field("EMPNO"), + b.alias(b.call(SqlStdOperatorTable.RAND), "r")) + .scan("DEPT") + .join(JoinRelType.SEMI, + b.and( + b.greaterThan(b.field(2, 0, "r"), b.literal(0.0)), + b.lessThan(b.field(2, 0, "r"), b.literal(1.0)))) + .build(); + relFn(relFn).withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE).checkUnchanged(); + } + + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. The semi-join transpose is still + * allowed when the condition only references deterministic projected + * columns, even if the project also computes a non-deterministic column + * (here {@code r} is RAND() but the semi-join is on DEPTNO). */ + @Test void testSemiJoinProjectTransposeWithUnrelatedNonDeterministic() { + final Function relFn = b -> b + .scan("EMP") + .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), + b.field("DEPTNO")) + .scan("DEPT") + .join(JoinRelType.SEMI, + b.equals(b.field(2, 0, "DEPTNO"), b.field(2, 1, "DEPTNO"))) + .build(); + relFn(relFn).withRule(CoreRules.SEMI_JOIN_PROJECT_TRANSPOSE).check(); + } + /** Test case for * [CALCITE-1338] * JoinProjectTransposeRule should not pull a literal above the @@ -3204,6 +3285,47 @@ private void checkProjectCorrelateTransposeRuleSemiOrAntiCorrelate(JoinRelType t .check(); } + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. FilterProjectTransposeRule must + * not pull a filter that references a non-deterministic projected + * column below the project. */ + @Test void testFilterProjectTransposeShouldIgnoreNonDeterministic() { + // Filter(a > 0 AND a < 1) over Project(a = RAND()). The filter + // references the non-deterministic column, so the transpose must be + // refused: otherwise the pushed-down filter and the re-emitted project + // would each evaluate RAND() independently. + final Function relFn = b -> b + .scan("EMP") + .project(b.alias(b.call(SqlStdOperatorTable.RAND), "a")) + .filter( + b.and( + b.greaterThan(b.field("a"), b.literal(0.0)), + b.lessThan(b.field("a"), b.literal(1.0)))) + .build(); + relFn(relFn).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).checkUnchanged(); + } + + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. The transpose is still allowed when + * the filter only references deterministic projected columns, even if + * other columns in the project are non-deterministic (here {@code r} is + * RAND() but the filter is on {@code b}). */ + @Test void testFilterProjectTransposeWithUnrelatedNonDeterministic() { + // Filter(b > 0) over Project(r = RAND(), b = DEPTNO). The filter only + // references the deterministic column b, so the transpose is safe. + final Function relFn = b -> b + .scan("EMP") + .project(b.alias(b.call(SqlStdOperatorTable.RAND), "r"), + b.alias(b.field("DEPTNO"), "b")) + .filter(b.greaterThan(b.field("b"), b.literal(0))) + .build(); + relFn(relFn).withRule(CoreRules.FILTER_PROJECT_TRANSPOSE).check(); + } + private static final String NOT_STRONG_EXPR = "case when e.sal < 11 then 11 else -1 * e.sal end"; @@ -6920,6 +7042,18 @@ private HepProgram getTransitiveProgram() { sql(sql).withRule(CoreRules.PROJECT_MERGE).checkUnchanged(); } + /** Test case for + * [CALCITE-7551] + * Project/Filter/Join transpose and merge rules can duplicate + * non-deterministic expressions. ProjectMergeRule must not merge + * adjacent projects when doing so would duplicate a non-deterministic + * expression. */ + @Test void testProjectMergeShouldIgnoreNonDeterministic() { + final String sql = "select a, a + 1 as b from (select rand() as a from emp)"; + sql(sql).withRule(CoreRules.PROJECT_MERGE).checkUnchanged(); + } + + @Test void testAggregateProjectPullUpConstants() { final String sql = "select job, empno, sal, sum(sal) as s\n" + "from emp where empno = 10\n" diff --git a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java index ebad0e9450a5..6041ca68fc78 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlToRelConverterTest.java @@ -6133,4 +6133,16 @@ void checkUserDefinedOrderByOver(NullCollation nullCollation) { + "FROM emp JOIN dept using (deptno)"; sql(sql).withConformance(SqlConformanceEnum.PRESTO).ok(); } + + /** Test case of + * [CALCITE-7551] + * Non-deterministic expressions (e.g. {@code RAND()}) should not be + * duplicated when projections are merged. The two references to + * {@code a} in the outer query must resolve to the same {@code RAND()} + * evaluation produced by the inner sub-query, so the inner projection + * must not be flattened into the outer. */ + @Test void testRandNotDuplicatedInProjectionMerge() { + final String sql = "select a, a + 1 as b from (select rand() as a)"; + sql(sql).ok(); + } } diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml index c782bb913cc1..4e9e1e68cd34 100644 --- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml @@ -5903,6 +5903,31 @@ LogicalAggregate(group=[{}], EXPR$0=[COUNT()]) LogicalTableScan(table=[[CATALOG, SALES, EMP]]) }))]) LogicalTableScan(table=[[CATALOG, SALES, EMP]]) +]]> + + + + + + + + + + ($1, 0)]) + LogicalProject(r=[RAND()], b=[$7]) + LogicalTableScan(table=[[scott, EMP]]) +]]> + + + ($7, 0)]) + LogicalTableScan(table=[[scott, EMP]]) ]]> @@ -8445,6 +8470,16 @@ LogicalProject(DEPTNO=[$0], NAME=[$1], NAME0=[$2], EXPR$1=[$3]) LogicalJoin(condition=[=($1, $3)], joinType=[left]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + + + + + @@ -8473,6 +8508,24 @@ LogicalProject(DEPTNO=[$0], NAME=[$1], R=[$3], EXPR$1=[$4]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) LogicalProject(R=[RANK() OVER (ORDER BY $1)], EXPR$1=[+(1, 1)]) LogicalTableScan(table=[[CATALOG, SALES, DEPT]]) +]]> + + + + + + + + @@ -11947,6 +12000,18 @@ LogicalProject(EXPR$0=[+($0, 1)]) })]) LogicalProject(X=[ARRAY(1, 2, 3)]) LogicalValues(tuples=[[{ 0 }]]) +]]> + + + + + + + + @@ -17971,6 +18036,34 @@ LogicalProject(DNAME=[$1]) LogicalAggregate(group=[{0}]) LogicalProject($f0=[*(2, $0)]) LogicalTableScan(table=[[scott, DEPT]]) +]]> + + + + + + + + + + + + + diff --git a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml index 56f169629ae6..5c8811ce40bf 100644 --- a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml +++ b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml @@ -7243,6 +7243,18 @@ LogicalProject(EXPR$0=[= SOME(1970-01-01 01:23:45, ARRAY(1970-01-01 01:23:45, 19 + + + + + + + +