Skip to content

Commit 4689acd

Browse files
committed
Merge remote-tracking branch 'upstream/master' into HBASE-29368-feature
2 parents c4510bd + 4b85a22 commit 4689acd

145 files changed

Lines changed: 2502 additions & 2653 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

hbase-archetypes/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ specific language governing permissions and limitations
1717
under the License.
1818
-->
1919

20-
#hbase-archetypes
20+
# hbase-archetypes
2121

22-
##Overview
22+
## Overview
2323
The hbase-archetypes subproject of hbase provides an infrastructure for
2424
creation and maintenance of Maven archetypes<sup id="a1">[1](#f1)</sup>
2525
pertinent to HBase. Upon deployment to the archetype
@@ -30,9 +30,9 @@ end-user developers to autogenerate completely configured Maven projects
3030
`archetype:generate` goal of the
3131
maven-archetype-plugin<sup id="a4">[4](#f4)</sup>.
3232

33-
##Notes for contributors and committers to the HBase project
33+
## Notes for contributors and committers to the HBase project
3434

35-
####The structure of hbase-archetypes
35+
#### The structure of hbase-archetypes
3636
The hbase-archetypes project contains a separate subproject for each archetype.
3737
The top level components of such a subproject comprise a complete, standalone
3838
exemplar Maven project containing:
@@ -47,7 +47,7 @@ code `./src/main/.../HelloHBase.java` and `./src/test/.../TestHelloHBase.java`,
4747
(b) a `pom.xml` file establishing dependency upon hbase-client and test-scope
4848
dependency upon hbase-testing-util, and (c) a `log4j.properties` resource file.
4949

50-
####How archetypes are created during the hbase install process
50+
#### How archetypes are created during the hbase install process
5151
During the `mvn install` process, all standalone exemplar projects in the
5252
`hbase-archetypes` subdirectory are first packaged/tested/installed, and then
5353
the following steps are executed in `hbase-archetypes/hbase-archetype-builder`
@@ -74,7 +74,7 @@ repository. (Note that installation of an archetype automatically includes
7474
invocation of integration-testing prior to install, which performs a test
7575
generation of a project from the archetype.)
7676

77-
####How to add a new archetype to the hbase-archetypes collection
77+
#### How to add a new archetype to the hbase-archetypes collection
7878
1. Create a new subdirectory in `hbase-archetypes`, populated with a
7979
completely configured Maven project, which will serve as the exemplar project
8080
of the new archetype. (It may be most straightforward to simply copy the `src`
@@ -93,7 +93,7 @@ elements and `<transformationSet>` elements within the `<plugin>` elements
9393
`createArchetypes.sh` and `installArchetypes.sh` scripts in the
9494
`hbase-archetype-builder` subdirectory (using the existing entries as a guide).
9595

96-
####How to do additional testing/inspection of an archetype in this collection
96+
#### How to do additional testing/inspection of an archetype in this collection
9797
Although integration-testing (which is automatically performed for each
9898
archetype during the install process) already performs test generation of a
9999
project from an archetype, it may often be advisable to do further manual
@@ -106,7 +106,7 @@ and can be tested locally by executing the following:
106106
This displays a numbered list of all locally-installed archetypes for the user
107107
to choose from for generation of a new Maven project.
108108

109-
##Footnotes:
109+
## Footnotes:
110110
<b id="f1">1</b> -- [Maven Archetype
111111
](http://maven.apache.org/archetype/index.html) ("About" page).
112112
-- [](#a1)

hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,10 @@ public Scan setMaxResultSize(long maxResultSize) {
554554

555555
@Override
556556
public Scan setFilter(Filter filter) {
557+
if (filter != null && filter.hasFilterRow() && this.batch > 0) {
558+
throw new IncompatibleFilterException(
559+
"Cannot set a filter that returns true for filter.hasFilterRow on a scan with batch set");
560+
}
557561
super.setFilter(filter);
558562
return this;
559563
}

hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@
3737
import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
3838

3939
/**
40-
* This filter is used for selecting only those keys with columns that matches a particular prefix.
41-
* For example, if prefix is 'an', it will pass keys will columns like 'and', 'anti' but not keys
42-
* with columns like 'ball', 'act'.
40+
* This filter is used for selecting only those keys with columns that match any of the given
41+
* prefixes. For example, if prefixes are 'an' and 'ba', it will pass keys with columns like 'and',
42+
* 'anti', 'ball' but not keys with columns like 'cat', 'act'. The prefixes are stored in a sorted
43+
* set and the filter uses seek hints to efficiently skip columns that do not match any prefix.
4344
*/
4445
@InterfaceAudience.Public
4546
public class MultipleColumnPrefixFilter extends FilterBase implements HintingFilter {

hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestOperation.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ public void testScanOperationToJSON() throws IOException {
371371
scan.setLimit(5);
372372
scan.setReadType(Scan.ReadType.PREAD);
373373
scan.setNeedCursorResult(true);
374-
scan.setFilter(SCV_FILTER);
374+
scan.setFilter(VALUE_FILTER);
375375
scan.setReplicaId(1);
376376
scan.setConsistency(Consistency.STRONG);
377377
scan.setLoadColumnFamiliesOnDemand(true);

hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestScan.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.junit.jupiter.api.Assertions.assertEquals;
2121
import static org.junit.jupiter.api.Assertions.assertNull;
22+
import static org.junit.jupiter.api.Assertions.assertThrows;
2223
import static org.junit.jupiter.api.Assertions.assertTrue;
2324
import static org.junit.jupiter.api.Assertions.fail;
2425

@@ -29,6 +30,8 @@
2930
import org.apache.hadoop.hbase.HConstants;
3031
import org.apache.hadoop.hbase.client.Scan.ReadType;
3132
import org.apache.hadoop.hbase.filter.FilterList;
33+
import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
34+
import org.apache.hadoop.hbase.filter.PageFilter;
3235
import org.apache.hadoop.hbase.security.access.Permission;
3336
import org.apache.hadoop.hbase.security.visibility.Authorizations;
3437
import org.apache.hadoop.hbase.testclassification.ClientTests;
@@ -254,6 +257,36 @@ public void testScanCopyConstructor() throws Exception {
254257
"Make sure copy constructor adds all the fields in the copied object");
255258
}
256259

260+
@Test
261+
public void testSetFilterWithBatchThrows() {
262+
Scan scan = new Scan();
263+
scan.setBatch(5);
264+
assertThrows(IncompatibleFilterException.class, () -> scan.setFilter(new PageFilter(10)));
265+
}
266+
267+
@Test
268+
public void testSetFilterWithoutBatchDoesNotThrow() {
269+
Scan scan = new Scan();
270+
scan.setFilter(new PageFilter(10));
271+
// no exception expected
272+
}
273+
274+
@Test
275+
public void testSetFilterWithBatchAndNonFilterRowFilter() {
276+
Scan scan = new Scan();
277+
scan.setBatch(5);
278+
scan.setFilter(new FilterList());
279+
// FilterList.hasFilterRow() returns false, so no exception expected
280+
}
281+
282+
@Test
283+
public void testSetFilterWithBatchAndNullFilter() {
284+
Scan scan = new Scan();
285+
scan.setBatch(5);
286+
scan.setFilter(null);
287+
// null filter should not throw
288+
}
289+
257290
@Test
258291
public void testScanReadType() throws Exception {
259292
Scan scan = new Scan();

hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFileCompactionChore.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,15 @@ protected void chore() {
102102
hcd.getNameAsString());
103103
if (regionBatchSize == MobConstants.DEFAULT_MOB_MAJOR_COMPACTION_REGION_BATCH_SIZE) {
104104
LOG.debug(
105-
"Table={} cf ={}: batch MOB compaction is disabled, {}=0 -"
105+
"Table={} cf={}: batch MOB compaction is disabled, {}=0 -"
106106
+ " all regions will be compacted in parallel",
107-
htd.getTableName(), hcd.getNameAsString(), "hbase.mob.compaction.batch.size");
107+
htd.getTableName(), hcd.getNameAsString(),
108+
MobConstants.MOB_MAJOR_COMPACTION_REGION_BATCH_SIZE);
108109
admin.majorCompact(htd.getTableName(), hcd.getName());
109110
} else {
110-
LOG.info(
111-
"Table={} cf={}: performing MOB major compaction in batches "
112-
+ "'hbase.mob.compaction.batch.size'={}",
113-
htd.getTableName(), hcd.getNameAsString(), regionBatchSize);
111+
LOG.info("Table={} cf={}: performing MOB major compaction in batches {}={}",
112+
htd.getTableName(), hcd.getNameAsString(),
113+
MobConstants.MOB_MAJOR_COMPACTION_REGION_BATCH_SIZE, regionBatchSize);
114114
performMajorCompactionInBatches(admin, htd, hcd);
115115
}
116116
} else {

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/DeleteTracker.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,20 @@ enum DeleteResult {
8383
// deleted in strong semantics of versions(See MvccTracker)
8484
}
8585

86+
/**
87+
* Check if the given delete marker is redundant, i.e., it is already covered by a previously
88+
* tracked delete of equal or broader scope. A DeleteFamily is redundant if a DeleteFamily with a
89+
* higher timestamp was already seen. A DeleteColumn is redundant if a DeleteColumn for the same
90+
* qualifier with a higher timestamp, or a DeleteFamily with a higher timestamp, was already seen.
91+
* <p>
92+
* This is a read-only check with no side effects on tracker state.
93+
* @param cell the delete marker cell to check
94+
* @return true if the delete marker is redundant and can be skipped
95+
*/
96+
default boolean isRedundantDelete(ExtendedCell cell) {
97+
return false;
98+
}
99+
86100
/**
87101
* Return the comparator passed to this delete tracker
88102
* @return the cell comparator

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/MinorCompactionScanQueryMatcher.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.io.IOException;
2121
import org.apache.hadoop.hbase.ExtendedCell;
22+
import org.apache.hadoop.hbase.KeyValue;
2223
import org.apache.hadoop.hbase.PrivateCellUtil;
2324
import org.apache.hadoop.hbase.regionserver.ScanInfo;
2425
import org.apache.yetus.audience.InterfaceAudience;
@@ -47,6 +48,19 @@ public MatchCode match(ExtendedCell cell) throws IOException {
4748
// we should not use this delete marker to mask any cell yet.
4849
return MatchCode.INCLUDE;
4950
}
51+
// Check before tracking: an older DeleteColumn or DeleteFamily is redundant if a newer
52+
// one of equal or broader scope was already seen. Must check before trackDelete() since
53+
// that overwrites tracker state. Seek past remaining cells for this column/row since
54+
// they are all covered by the previously tracked delete.
55+
if (deletes.isRedundantDelete(cell)) {
56+
// Skip seeking for deletes with empty qualifier, not to skip a subsequent
57+
// DeleteFamily marker that covers other qualifiers. DeleteFamily itself can seek
58+
// safely because all remaining empty-qualifier cells are redundant under it.
59+
if (cell.getQualifierLength() == 0 && typeByte != KeyValue.Type.DeleteFamily.getCode()) {
60+
return MatchCode.SKIP;
61+
}
62+
return columns.getNextRowOrNextColumn(cell);
63+
}
5064
trackDelete(cell);
5165
return MatchCode.INCLUDE;
5266
}

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/NormalUserScanQueryMatcher.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
package org.apache.hadoop.hbase.regionserver.querymatcher;
1919

2020
import java.io.IOException;
21+
import org.apache.hadoop.hbase.CellUtil;
2122
import org.apache.hadoop.hbase.ExtendedCell;
2223
import org.apache.hadoop.hbase.KeepDeletedCells;
24+
import org.apache.hadoop.hbase.KeyValue;
25+
import org.apache.hadoop.hbase.KeyValueUtil;
2326
import org.apache.hadoop.hbase.PrivateCellUtil;
2427
import org.apache.hadoop.hbase.client.Scan;
2528
import org.apache.hadoop.hbase.regionserver.ScanInfo;
@@ -31,6 +34,14 @@
3134
@InterfaceAudience.Private
3235
public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {
3336

37+
/**
38+
* Number of consecutive range delete markers (DeleteColumn/DeleteFamily) to skip before switching
39+
* to seek. Seeking is more expensive than skipping for a single marker, but much faster when
40+
* markers accumulate. This threshold avoids the seek overhead for the common case (one delete per
41+
* row/column) while still kicking in when markers pile up.
42+
*/
43+
static final int SEEK_ON_DELETE_MARKER_THRESHOLD = 10;
44+
3445
/** Keeps track of deletes */
3546
private final DeleteTracker deletes;
3647

@@ -40,18 +51,32 @@ public abstract class NormalUserScanQueryMatcher extends UserScanQueryMatcher {
4051
/** whether time range queries can see rows "behind" a delete */
4152
protected final boolean seePastDeleteMarkers;
4253

54+
/** Whether seek optimization for range delete markers is applicable */
55+
private final boolean canSeekOnDeleteMarker;
56+
57+
/** Count of consecutive range delete markers seen for the same column */
58+
private int rangeDeleteCount;
59+
60+
/** Last range delete cell, for qualifier comparison across consecutive markers */
61+
private ExtendedCell lastDelete;
62+
4363
protected NormalUserScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
4464
boolean hasNullColumn, DeleteTracker deletes, long oldestUnexpiredTS, long now) {
4565
super(scan, scanInfo, columns, hasNullColumn, oldestUnexpiredTS, now);
4666
this.deletes = deletes;
4767
this.get = scan.isGetScan();
4868
this.seePastDeleteMarkers = scanInfo.getKeepDeletedCells() != KeepDeletedCells.FALSE;
69+
this.canSeekOnDeleteMarker =
70+
!seePastDeleteMarkers && deletes.getClass() == ScanDeleteTracker.class;
4971
}
5072

5173
@Override
5274
public void beforeShipped() throws IOException {
5375
super.beforeShipped();
5476
deletes.beforeShipped();
77+
if (lastDelete != null) {
78+
lastDelete = KeyValueUtil.toNewKeyCell(lastDelete);
79+
}
5580
}
5681

5782
@Override
@@ -71,8 +96,31 @@ public MatchCode match(ExtendedCell cell) throws IOException {
7196
if (includeDeleteMarker) {
7297
this.deletes.add(cell);
7398
}
99+
100+
// A DeleteColumn or DeleteFamily masks all remaining cells for this column/family.
101+
// Seek past them instead of skipping one cell at a time, but only after seeing
102+
// enough consecutive markers for the same column to justify the seek overhead.
103+
// Only safe with plain ScanDeleteTracker. Not safe with newVersionBehavior (sequence
104+
// IDs determine visibility), visibility labels (delete/put label mismatch), or
105+
// seePastDeleteMarkers (KEEP_DELETED_CELLS).
106+
if (
107+
canSeekOnDeleteMarker && (typeByte == KeyValue.Type.DeleteFamily.getCode()
108+
|| (typeByte == KeyValue.Type.DeleteColumn.getCode() && cell.getQualifierLength() > 0))
109+
) {
110+
if (lastDelete != null && !CellUtil.matchingQualifier(cell, lastDelete)) {
111+
rangeDeleteCount = 0;
112+
}
113+
lastDelete = cell;
114+
if (++rangeDeleteCount >= SEEK_ON_DELETE_MARKER_THRESHOLD) {
115+
rangeDeleteCount = 0;
116+
return columns.getNextRowOrNextColumn(cell);
117+
}
118+
} else {
119+
rangeDeleteCount = 0;
120+
}
74121
return MatchCode.SKIP;
75122
}
123+
rangeDeleteCount = 0;
76124
returnCode = checkDeleted(deletes, cell);
77125
if (returnCode != null) {
78126
return returnCode;
@@ -83,6 +131,8 @@ public MatchCode match(ExtendedCell cell) throws IOException {
83131
@Override
84132
protected void reset() {
85133
deletes.reset();
134+
rangeDeleteCount = 0;
135+
lastDelete = null;
86136
}
87137

88138
@Override

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanDeleteTracker.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,28 @@ public DeleteResult isDeleted(ExtendedCell cell) {
142142
return DeleteResult.NOT_DELETED;
143143
}
144144

145+
@Override
146+
public boolean isRedundantDelete(ExtendedCell cell) {
147+
byte type = cell.getTypeByte();
148+
boolean coveredByFamily = hasFamilyStamp && cell.getTimestamp() <= familyStamp;
149+
150+
if (
151+
type == KeyValue.Type.DeleteFamily.getCode()
152+
|| type == KeyValue.Type.DeleteFamilyVersion.getCode()
153+
) {
154+
return coveredByFamily;
155+
}
156+
157+
boolean coveredByColumn =
158+
deleteCell != null && deleteType == KeyValue.Type.DeleteColumn.getCode()
159+
&& CellUtil.matchingQualifier(cell, deleteCell) && cell.getTimestamp() <= deleteTimestamp;
160+
161+
if (type == KeyValue.Type.DeleteColumn.getCode() || type == KeyValue.Type.Delete.getCode()) {
162+
return coveredByFamily || coveredByColumn;
163+
}
164+
return false;
165+
}
166+
145167
@Override
146168
public boolean isEmpty() {
147169
return deleteCell == null && !hasFamilyStamp && familyVersionStamps.isEmpty();

0 commit comments

Comments
 (0)