diff --git a/pkg/schedule/checker/affinity_checker.go b/pkg/schedule/checker/affinity_checker.go index 02345f2e51d..5f05a852cbb 100644 --- a/pkg/schedule/checker/affinity_checker.go +++ b/pkg/schedule/checker/affinity_checker.go @@ -17,6 +17,7 @@ package checker import ( "bytes" "context" + "math/rand" "slices" "time" @@ -34,6 +35,7 @@ import ( "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/schedule/types" + "github.com/tikv/pd/pkg/statistics" "github.com/tikv/pd/pkg/utils/logutil" ) @@ -44,20 +46,24 @@ const recentMergeTTL = time.Minute type AffinityChecker struct { PauseController cluster sche.CheckerCluster + ruleManager *placement.RuleManager affinityManager *affinity.Manager conf config.CheckerConfigProvider recentMergeCache *cache.TTLUint64 startTime time.Time + r *rand.Rand } // NewAffinityChecker create an affinity checker. func NewAffinityChecker(ctx context.Context, cluster sche.CheckerCluster, conf config.CheckerConfigProvider) *AffinityChecker { return &AffinityChecker{ cluster: cluster, + ruleManager: cluster.GetRuleManager(), affinityManager: cluster.GetAffinityManager(), conf: conf, recentMergeCache: cache.NewIDTTL(ctx, gcInterval, recentMergeTTL), startTime: time.Now(), + r: rand.New(rand.NewSource(time.Now().UnixNano())), } } @@ -83,6 +89,10 @@ func (c *AffinityChecker) Check(region *core.RegionInfo) []*operator.Operator { affinityCheckerPausedCounter.Inc() return nil } + if !c.cluster.GetSharedConfig().IsPlacementRulesEnabled() { + affinityCheckerPlacementRulesDisabledCounter.Inc() + return nil + } // Check region state if region.GetLeader() == nil { @@ -93,15 +103,21 @@ func (c *AffinityChecker) Check(region *core.RegionInfo) []*operator.Operator { affinityCheckerUnhealthyRegionCounter.Inc() return nil } - if !filter.IsRegionReplicated(c.cluster, region) { - affinityCheckerAbnormalReplicaCounter.Inc() + + if !c.hasAffinityGroups() { return nil } - // Get the affinity group for this region + // Check the affinity group before the heavier best-location gate. Most regions + // are not affinity regions. group, isAffinity := c.affinityManager.GetAndCacheRegionAffinityGroupState(region) if group == nil { - // Region doesn't belong to any affinity group + // Region doesn't belong to any affinity group. + return nil + } + + if !c.isRegionPlacementRuleSatisfiedWithBestLocation(region, true /* isExistingRegion */) { + affinityCheckerAbnormalReplicaCounter.Inc() return nil } @@ -115,8 +131,9 @@ func (c *AffinityChecker) Check(region *core.RegionInfo) []*operator.Operator { // so expire the group first, then provide the available Region information and fetch the Group state again. if !isAffinity { targetRegion := cloneRegionWithReplacePeerStores(region, group.LeaderStoreID, group.VoterStoreIDs...) - if targetRegion == nil || !filter.IsRegionReplicated(c.cluster, targetRegion) { + if targetRegion == nil || !c.isRegionPlacementRuleSatisfiedWithBestLocation(targetRegion, false /* isExistingRegion */) { c.affinityManager.ExpireAffinityGroup(group.ID) + group = c.affinityManager.GetAffinityGroupState(group.ID) needRefetch = true } } @@ -383,7 +400,7 @@ func (c *AffinityChecker) checkAffinityMergeTarget(region, adjacent *core.Region return false } - if !filter.IsRegionReplicated(c.cluster, adjacent) { + if !c.isRegionPlacementRuleSatisfiedWithBestLocation(adjacent, true /* isExistingRegion */) { affinityMergeCheckerAdjAbnormalReplicaCounter.Inc() return false } @@ -493,3 +510,61 @@ func (c *AffinityChecker) RecordOpSuccess(op *operator.Operator) { c.recentMergeCache.PutWithTTL(op.RegionID(), nil, recentMergeTTL) c.recentMergeCache.PutWithTTL(relatedID, nil, recentMergeTTL) } + +// isRegionPlacementRuleSatisfiedWithBestLocation is an affinity-specific +// scheduling gate. Besides requiring the region to satisfy placement rules, it +// also requires that affinity scheduling cannot find a strictly better location +// under the current topology and that the matched rule's isolation level is +// satisfied. This is intentionally stricter than filter.IsRegionReplicated and +// should not be used as a general replicated-state check. +func (c *AffinityChecker) isRegionPlacementRuleSatisfiedWithBestLocation(region *core.RegionInfo, isExistingRegion bool) bool { + // Get the RegionFit for the given Region. If the Region is not an existing Region but a virtual target state, + // use FitRegionWithoutCache to bypass the cache. + var fit *placement.RegionFit + if isExistingRegion { + fit = c.ruleManager.FitRegion(c.cluster, region) + } else { + fit = c.ruleManager.FitRegionWithoutCache(c.cluster, region) + } + + // Check region is satisfied + if fit == nil || !fit.IsSatisfied() { + return false + } + + // Check whether all peers covered by the rules are at the best isolation level. + // This logic is based on `RuleChecker.fixBetterLocation`. + for _, rf := range fit.RuleFits { + if len(rf.Rule.LocationLabels) == 0 { + continue + } + isWitness := rf.Rule.IsWitness && isWitnessEnabled(c.cluster) + // If the peer to be moved is a witness, since no snapshot is needed, we also reuse the fast failover logic. + strategy := c.strategy(c.r, region, rf.Rule, isWitness) + _, newStoreID, filterByTempState := strategy.getBetterLocation(c.cluster, region, fit, rf) + // filterByTempState being true means a better placement exists but is temporarily unschedulable. + // This is also considered not satisfied. + if newStoreID != 0 || filterByTempState { + return false + } + // If the isolation level does not meet the requirement, it is also considered not to be at the best location. + if !statistics.IsRegionLabelIsolationSatisfied(rf.Stores, rf.Rule.LocationLabels, rf.Rule.IsolationLevel) { + return false + } + } + + return true +} + +func (c *AffinityChecker) strategy(r *rand.Rand, region *core.RegionInfo, rule *placement.Rule, fastFailover bool) *ReplicaStrategy { + return &ReplicaStrategy{ + checkerName: c.Name(), + cluster: c.cluster, + isolationLevel: rule.IsolationLevel, + locationLabels: rule.LocationLabels, + region: region, + extraFilters: []filter.Filter{filter.NewLabelConstraintFilter(c.Name(), rule.LabelConstraints)}, + fastFailover: fastFailover, + r: r, + } +} diff --git a/pkg/schedule/checker/affinity_checker_test.go b/pkg/schedule/checker/affinity_checker_test.go index 8bc3c2a6c15..2bc8ff410b4 100644 --- a/pkg/schedule/checker/affinity_checker_test.go +++ b/pkg/schedule/checker/affinity_checker_test.go @@ -32,6 +32,7 @@ import ( "github.com/tikv/pd/pkg/schedule/affinity" scheconfig "github.com/tikv/pd/pkg/schedule/config" sche "github.com/tikv/pd/pkg/schedule/core" + "github.com/tikv/pd/pkg/schedule/filter" "github.com/tikv/pd/pkg/schedule/labeler" "github.com/tikv/pd/pkg/schedule/operator" "github.com/tikv/pd/pkg/schedule/placement" @@ -2014,9 +2015,9 @@ func TestAffinityCheckerGroupScheduleDisallowed(t *testing.T) { re.Nil(ops, "Affinity scheduling should be blocked when group is not allowed") } -// TestAffinityCheckerExpireGroupWhenPlacementRuleMismatch verifies that when the affinity group peers -// don't satisfy placement rules, the group will be expired and scheduling is disabled. -func TestAffinityCheckerExpireGroupWhenPlacementRuleMismatch(t *testing.T) { +// TestAffinityCheckerNormalizeGroupWhenPlacementRuleMismatch verifies that when the affinity group peers +// don't satisfy placement rules, the checker keeps the group schedulable by normalizing it to the valid Region peers. +func TestAffinityCheckerNormalizeGroupWhenPlacementRuleMismatch(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -2037,8 +2038,8 @@ func TestAffinityCheckerExpireGroupWhenPlacementRuleMismatch(t *testing.T) { affinityManager := tc.GetAffinityManager() checker := newTestAffinityChecker(ctx, tc, opt) - // Affinity group expects 4 voters while placement rules (and the region) only allow 3, - // so isGroupReplicated should be false and the group should be expired/refreshed. + // Affinity group expects 4 voters while placement rules (and the region) only allow 3. + // The checker should normalize the group to the valid Region peers and keep it schedulable. group := &affinity.Group{ ID: "test_group", LeaderStoreID: 1, @@ -2052,9 +2053,9 @@ func TestAffinityCheckerExpireGroupWhenPlacementRuleMismatch(t *testing.T) { groupState := affinityManager.GetAffinityGroupState("test_group") re.NotNil(groupState) - re.False(groupState.AffinitySchedulingAllowed, "Group should be expired when peer count violates placement rules") - re.Equal(affinity.PhasePending, groupState.Phase) - re.Equal([]uint64{1, 2, 3, 4}, groupState.VoterStoreIDs, "Peers remain as configured until a valid available region is observed") + re.True(groupState.AffinitySchedulingAllowed) + re.Equal(affinity.PhaseStable, groupState.Phase) + re.Equal([]uint64{1, 2, 3}, groupState.VoterStoreIDs) } // TestAffinityCheckerTargetStoreEvictLeader tests that operator is not created when target store has evict-leader. @@ -2124,6 +2125,147 @@ func TestAffinityCheckerTargetStoreRejectLeader(t *testing.T) { re.Nil(ops, "Should not create operator when target store has reject-leader label") } +// TestAffinityCheckerRegionHasBetterLocation tests how the checker handles a Region where isRegionPlacementRuleSatisfiedWithBestLocation returns false. +func TestAffinityCheckerRegionHasBetterLocation(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + opt := newAffinityTestOptions() + opt.SetLocationLabels([]string{"region", "zone", "host"}) + tc := mockcluster.NewCluster(ctx, opt) + + tc.AddLabelsStore(1, 0, map[string]string{"region": "r1", "zone": "z1", "host": "h1"}) + tc.AddLabelsStore(2, 0, map[string]string{"region": "r1", "zone": "z1", "host": "h1"}) + tc.AddLabelsStore(3, 0, map[string]string{"region": "r1", "zone": "z1", "host": "h2"}) + tc.AddLabelsStore(4, 0, map[string]string{"region": "r1", "zone": "z2", "host": "h1"}) + tc.AddLabelsStore(5, 0, map[string]string{"region": "r1", "zone": "z3", "host": "h1"}) + tc.AddLabelsStore(6, 0, map[string]string{"region": "r1", "zone": "z3", "host": "h2"}) + + affinityManager := tc.GetAffinityManager() + checker := newTestAffinityChecker(ctx, tc, opt) + + // Create affinity group without best location + group := &affinity.Group{ + ID: "test_group", + LeaderStoreID: 2, + VoterStoreIDs: []uint64{1, 2, 3}, + } + err := createAffinityGroupForTest(affinityManager, group, []byte(""), []byte("")) + re.NoError(err) + + // No scheduling is generated because the source Region is not at the best location. + tc.AddLeaderRegion(100, 1, 3, 4) + region := tc.GetRegion(100) + re.False(checker.isRegionPlacementRuleSatisfiedWithBestLocation(region, true)) + ops := checker.Check(region) + re.Nil(ops) + groupState := affinityManager.GetAffinityGroupState("test_group") + re.NotNil(groupState) + re.Equal([]uint64{1, 2, 3}, groupState.VoterStoreIDs) + + // Because the Group’s currently specified Peers are not at the best location, + // they are cleared and replaced with the source Region’s Peers. + // In this case, no scheduling is generated, but the Peers are updated. + tc.AddLeaderRegion(200, 1, 4, 5) + region = tc.GetRegion(200) + re.True(checker.isRegionPlacementRuleSatisfiedWithBestLocation(region, false)) + ops = checker.Check(region) + re.Nil(ops) + groupState = affinityManager.GetAffinityGroupState("test_group") + re.NotNil(groupState) + re.Equal([]uint64{1, 4, 5}, groupState.VoterStoreIDs) + + // When both the source Region and the target Region are at the best location, scheduling is generated. + tc.AddLeaderRegion(300, 1, 4, 6) + region = tc.GetRegion(300) + re.True(checker.isRegionPlacementRuleSatisfiedWithBestLocation(region, true)) + ops = checker.Check(region) + re.NotNil(ops) + re.Len(ops, 1) + + // No scheduling is generated when Placement Rules are disabled. + tc.SetEnablePlacementRules(false) + ops = checker.Check(region) + re.Nil(ops) + + // If an IsolationLevel is configured, no scheduling is generated when the required isolation level cannot be met. + tc.SetEnablePlacementRules(true) + rule := tc.GetRuleManager().GetRule(placement.DefaultGroupID, placement.DefaultRuleID) + re.NotNil(rule) + rule.IsolationLevel = "region" + re.NoError(tc.GetRuleManager().SetRule(rule)) + ops = checker.Check(region) + re.Nil(ops) +} + +func TestAffinityBestLocationFilteredByTemporarySourceState(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + opt := newAffinityTestOptions() + opt.SetLocationLabels([]string{"region", "zone", "host"}) + tc := mockcluster.NewCluster(ctx, opt) + + tc.AddLabelsStore(1, 0, map[string]string{"region": "r1", "zone": "z1", "host": "h1"}) + tc.AddLabelsStore(2, 0, map[string]string{"region": "r1", "zone": "z1", "host": "h1"}) + tc.AddLabelsStore(3, 0, map[string]string{"region": "r1", "zone": "z1", "host": "h2"}) + tc.AddLabelsStore(4, 0, map[string]string{"region": "r1", "zone": "z2", "host": "h1"}) + tc.AddLabelsStore(5, 0, map[string]string{"region": "r1", "zone": "z3", "host": "h1"}) + tc.AddLabelsStore(6, 0, map[string]string{"region": "r1", "zone": "z3", "host": "h2"}) + + checker := newTestAffinityChecker(ctx, tc, opt) + tc.AddLeaderRegion(1, 1, 3, 4) + region := tc.GetRegion(1) + + fit := checker.ruleManager.FitRegion(tc, region) + re.NotNil(fit) + re.True(fit.IsSatisfied()) + re.NotEmpty(fit.RuleFits) + strategy := checker.strategy(checker.r, region, fit.RuleFits[0].Rule, false) + oldStoreID, newStoreID, filterByTempState := strategy.getBetterLocation(tc, region, fit, fit.RuleFits[0]) + re.NotZero(oldStoreID) + re.NotZero(newStoreID) + re.False(filterByTempState) + + tc.SetStoreBusy(1, true) + tc.SetStoreBusy(3, true) + oldStoreID, newStoreID, filterByTempState = strategy.getBetterLocation(tc, region, fit, fit.RuleFits[0]) + re.Zero(oldStoreID) + re.Zero(newStoreID) + re.True(filterByTempState) + re.False(checker.isRegionPlacementRuleSatisfiedWithBestLocation(region, true)) +} + +// TestAffinityStrictCheckDivergesFromIsRegionReplicated documents the intended +// semantic split: affinity scheduling may reject a region that is already +// placement-rule replicated when a strictly better location still exists. +func TestAffinityStrictCheckDivergesFromIsRegionReplicated(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + opt := mockconfig.NewTestOptions() + opt.SetLocationLabels([]string{"zone", "host"}) + tc := mockcluster.NewCluster(ctx, opt) + tc.SetEnablePlacementRules(true) + tc.SetMaxReplicas(3) + + tc.AddLabelsStore(1, 0, map[string]string{"zone": "z1", "host": "h1"}) + tc.AddLabelsStore(2, 0, map[string]string{"zone": "z1", "host": "h1"}) + tc.AddLabelsStore(3, 0, map[string]string{"zone": "z2", "host": "h2"}) + tc.AddLabelsStore(4, 0, map[string]string{"zone": "z3", "host": "h3"}) + + checker := newTestAffinityChecker(ctx, tc, opt) + + tc.AddLeaderRegion(1, 1, 2, 3) + region := tc.GetRegion(1) + + re.True(filter.IsRegionReplicated(tc, region)) + re.False(checker.isRegionPlacementRuleSatisfiedWithBestLocation(region, true)) +} + // TestAffinityMergeCheckPeerStoreMismatch tests that merge is rejected when peer stores don't match. func TestAffinityMergeCheckPeerStoreMismatch(t *testing.T) { re := require.New(t) diff --git a/pkg/schedule/checker/metrics.go b/pkg/schedule/checker/metrics.go index 79e74b408c1..c58c1c87ebf 100644 --- a/pkg/schedule/checker/metrics.go +++ b/pkg/schedule/checker/metrics.go @@ -205,6 +205,7 @@ var ( affinityCheckerCounter = checkerCounter.WithLabelValues(affinityChecker, "check") affinityCheckerPausedCounter = checkerCounter.WithLabelValues(affinityChecker, "paused") + affinityCheckerPlacementRulesDisabledCounter = checkerCounter.WithLabelValues(affinityChecker, "placement-rules-disabled") affinityCheckerRegionNoLeaderCounter = checkerCounter.WithLabelValues(affinityChecker, "region-no-leader") affinityCheckerGroupSchedulingDisabledCounter = checkerCounter.WithLabelValues(affinityChecker, "group-scheduling-disabled") affinityCheckerNewOpCounter = checkerCounter.WithLabelValues(affinityChecker, "new-operator") diff --git a/pkg/schedule/checker/replica_strategy.go b/pkg/schedule/checker/replica_strategy.go index ffb25f90ad5..73f1f8b4d4c 100644 --- a/pkg/schedule/checker/replica_strategy.go +++ b/pkg/schedule/checker/replica_strategy.go @@ -22,6 +22,8 @@ import ( "github.com/tikv/pd/pkg/core/constant" sche "github.com/tikv/pd/pkg/schedule/core" "github.com/tikv/pd/pkg/schedule/filter" + "github.com/tikv/pd/pkg/schedule/placement" + "github.com/tikv/pd/pkg/versioninfo" "go.uber.org/zap" ) @@ -156,3 +158,77 @@ func (s *ReplicaStrategy) SelectStoreToRemove(coLocationStores []*core.StoreInfo } return source.GetID() } + +func (s *ReplicaStrategy) selectStoreToRemoveWithTempState(coLocationStores []*core.StoreInfo) (uint64, bool) { + isolationComparer := filter.IsolationComparer(s.locationLabels, coLocationStores) + level := constant.High + if s.fastFailover { + level = constant.Urgent + } + sourceCandidate := filter.NewCandidates(s.r, coLocationStores). + FilterSource(s.cluster.GetCheckerConfig(), nil, nil, &filter.StoreStateFilter{ActionScope: s.checkerName, MoveRegion: true, AllowTemporaryStates: true, OperatorLevel: level}). + KeepTheTopStores(isolationComparer, true) + if sourceCandidate.Len() == 0 { + log.Debug("no removable store", zap.Uint64("region-id", s.region.GetID())) + return 0, false + } + source := filter.NewCandidates(s.r, sourceCandidate.Stores). + FilterSource(s.cluster.GetCheckerConfig(), nil, nil, &filter.StoreStateFilter{ActionScope: s.checkerName, MoveRegion: true, OperatorLevel: level}). + PickTheTopStore(filter.RegionScoreComparer(s.cluster.GetCheckerConfig()), false) + if source != nil { + return source.GetID(), false + } + source = sourceCandidate.PickTheTopStore(filter.RegionScoreComparer(s.cluster.GetCheckerConfig()), false) + return source.GetID(), true +} + +func (s *ReplicaStrategy) getBetterLocation(cluster sche.SharedCluster, region *core.RegionInfo, fit *placement.RegionFit, rf *placement.RuleFit) (oldStoreID, newStoreID uint64, filterByTempState bool) { + ruleStores := getRuleFitStores(cluster, rf) + oldStoreID, sourceFilterByTempState := s.selectStoreToRemoveWithTempState(ruleStores) + if oldStoreID == 0 { + return 0, 0, false + } + oldStore := cluster.GetStore(oldStoreID) + if oldStore == nil { + return 0, 0, false + } + var coLocationStores []*core.StoreInfo + regionStores := cluster.GetRegionStores(region) + for _, store := range regionStores { + if store.GetLabelValue(core.EngineKey) != oldStore.GetLabelValue(core.EngineKey) { + continue + } + for _, r := range fit.GetRules() { + if r.Role != rf.Rule.Role { + continue + } + if placement.MatchLabelConstraints(store, r.LabelConstraints) { + coLocationStores = append(coLocationStores, store) + break + } + } + } + newStoreID, filterByTempState = s.SelectStoreToImprove(coLocationStores, oldStoreID) + if sourceFilterByTempState { + if newStoreID != 0 || filterByTempState { + return 0, 0, true + } + return 0, 0, false + } + return +} + +func isWitnessEnabled(cluster sche.CheckerCluster) bool { + config := cluster.GetCheckerConfig() + return versioninfo.IsFeatureSupported(config.GetClusterVersion(), versioninfo.SwitchWitness) && config.IsWitnessAllowed() +} + +func getRuleFitStores(cluster sche.SharedCluster, rf *placement.RuleFit) []*core.StoreInfo { + stores := make([]*core.StoreInfo, 0, len(rf.Peers)) + for _, p := range rf.Peers { + if s := cluster.GetStore(p.GetStoreId()); s != nil { + stores = append(stores, s) + } + } + return stores +} diff --git a/pkg/schedule/checker/rule_checker.go b/pkg/schedule/checker/rule_checker.go index 2d06f84fdfe..0b5bf479176 100644 --- a/pkg/schedule/checker/rule_checker.go +++ b/pkg/schedule/checker/rule_checker.go @@ -34,7 +34,6 @@ import ( "github.com/tikv/pd/pkg/schedule/placement" "github.com/tikv/pd/pkg/schedule/types" "github.com/tikv/pd/pkg/utils/syncutil" - "github.com/tikv/pd/pkg/versioninfo" "go.uber.org/zap" ) @@ -157,11 +156,6 @@ func (c *RuleChecker) RecordRegionPromoteToNonWitness(regionID uint64) { c.switchWitnessCache.PutWithTTL(regionID, nil, c.cluster.GetCheckerConfig().GetSwitchWitnessInterval()) } -func (c *RuleChecker) isWitnessEnabled() bool { - return versioninfo.IsFeatureSupported(c.cluster.GetCheckerConfig().GetClusterVersion(), versioninfo.SwitchWitness) && - c.cluster.GetCheckerConfig().IsWitnessAllowed() -} - func (c *RuleChecker) fixRulePeer(region *core.RegionInfo, fit *placement.RegionFit, rf *placement.RuleFit) (*operator.Operator, error) { // make up peers. if len(rf.Peers) < rf.Rule.Count { @@ -175,7 +169,7 @@ func (c *RuleChecker) fixRulePeer(region *core.RegionInfo, fit *placement.Region return c.replaceUnexpectedRulePeer(region, rf, fit, peer, downStatus) } // When witness placement rule is enabled, promotes the witness to voter when region has down voter. - if c.isWitnessEnabled() && core.IsVoter(peer) { + if isWitnessEnabled(c.cluster) && core.IsVoter(peer) { if witness, ok := c.hasAvailableWitness(region, peer); ok { ruleCheckerPromoteWitnessCounter.Inc() return operator.CreateNonWitnessPeerOperator("promote-witness-for-down", c.cluster, region, witness) @@ -197,13 +191,13 @@ func (c *RuleChecker) fixRulePeer(region *core.RegionInfo, fit *placement.Region return op, nil } } - return c.fixBetterLocation(region, rf) + return c.fixBetterLocation(region, fit, rf) } func (c *RuleChecker) addRulePeer(region *core.RegionInfo, fit *placement.RegionFit, rf *placement.RuleFit) (*operator.Operator, error) { ruleCheckerAddRulePeerCounter.Inc() - ruleStores := c.getRuleFitStores(rf) - isWitness := rf.Rule.IsWitness && c.isWitnessEnabled() + ruleStores := getRuleFitStores(c.cluster, rf) + isWitness := rf.Rule.IsWitness && isWitnessEnabled(c.cluster) // If the peer to be added is a witness, since no snapshot is needed, we also reuse the fast failover logic. store, filterByTempState := c.strategy(c.r, region, rf.Rule, isWitness).SelectStoreToAdd(ruleStores) if store == 0 { @@ -242,8 +236,13 @@ func (c *RuleChecker) addRulePeer(region *core.RegionInfo, fit *placement.Region // The peer's store may in Offline or Down, need to be replace. func (c *RuleChecker) replaceUnexpectedRulePeer(region *core.RegionInfo, rf *placement.RuleFit, fit *placement.RegionFit, peer *metapb.Peer, status string) (*operator.Operator, error) { var fastFailover bool + store := c.cluster.GetStore(peer.StoreId) + if store == nil { + log.Warn("lost the store, maybe you are recovering the PD cluster", zap.Uint64("store-id", peer.StoreId)) + return nil, errNoStoreToReplace + } // If the store to which the original peer belongs is TiFlash, the new peer cannot be set to witness, nor can it perform fast failover - if c.isWitnessEnabled() && !c.cluster.GetStore(peer.StoreId).IsTiFlash() { + if isWitnessEnabled(c.cluster) && !store.IsTiFlash() { // No matter whether witness placement rule is enabled or disabled, when peer's downtime // exceeds the threshold(30min), quickly add a witness to speed up failover, then promoted // to non-witness gradually to improve availability. @@ -255,14 +254,14 @@ func (c *RuleChecker) replaceUnexpectedRulePeer(region *core.RegionInfo, rf *pla } else { fastFailover = false } - ruleStores := c.getRuleFitStores(rf) - store, filterByTempState := c.strategy(c.r, region, rf.Rule, fastFailover).SelectStoreToFix(ruleStores, peer.GetStoreId()) - if store == 0 { + ruleStores := getRuleFitStores(c.cluster, rf) + storeID, filterByTempState := c.strategy(c.r, region, rf.Rule, fastFailover).SelectStoreToFix(ruleStores, peer.GetStoreId()) + if storeID == 0 { ruleCheckerNoStoreReplaceCounter.Inc() c.handleFilterState(region, filterByTempState) return nil, errNoStoreToReplace } - newPeer := &metapb.Peer{StoreId: store, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: fastFailover} + newPeer := &metapb.Peer{StoreId: storeID, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: fastFailover} // pick the smallest leader store to avoid the Offline store be snapshot generator bottleneck. var newLeader *metapb.Peer if region.GetLeader().GetId() == peer.GetId() { @@ -342,7 +341,7 @@ func (c *RuleChecker) fixLooseMatchPeer(region *core.RegionInfo, fit *placement. if region.GetLeader().GetId() == peer.GetId() && rf.Rule.IsWitness { return nil, errPeerCannotBeWitness } - if !core.IsWitness(peer) && rf.Rule.IsWitness && c.isWitnessEnabled() { + if !core.IsWitness(peer) && rf.Rule.IsWitness && isWitnessEnabled(c.cluster) { c.switchWitnessCache.UpdateTTL(c.cluster.GetCheckerConfig().GetSwitchWitnessInterval()) if c.switchWitnessCache.Exists(region.GetID()) { ruleCheckerRecentlyPromoteToNonWitnessCounter.Inc() @@ -358,7 +357,7 @@ func (c *RuleChecker) fixLooseMatchPeer(region *core.RegionInfo, fit *placement. ruleCheckerSetVoterWitnessCounter.Inc() } return operator.CreateWitnessPeerOperator("fix-witness-peer", c.cluster, region, peer) - } else if core.IsWitness(peer) && (!rf.Rule.IsWitness || !c.isWitnessEnabled()) { + } else if core.IsWitness(peer) && (!rf.Rule.IsWitness || !isWitnessEnabled(c.cluster)) { if core.IsLearner(peer) { ruleCheckerSetLearnerNonWitnessCounter.Inc() } else { @@ -390,36 +389,23 @@ func (c *RuleChecker) allowLeader(fit *placement.RegionFit, peer *metapb.Peer) b return false } -func (c *RuleChecker) fixBetterLocation(region *core.RegionInfo, rf *placement.RuleFit) (*operator.Operator, error) { +func (c *RuleChecker) fixBetterLocation(region *core.RegionInfo, fit *placement.RegionFit, rf *placement.RuleFit) (*operator.Operator, error) { if len(rf.Rule.LocationLabels) == 0 { return nil, nil } - isWitness := rf.Rule.IsWitness && c.isWitnessEnabled() + isWitness := rf.Rule.IsWitness && isWitnessEnabled(c.cluster) // If the peer to be moved is a witness, since no snapshot is needed, we also reuse the fast failover logic. strategy := c.strategy(c.r, region, rf.Rule, isWitness) - ruleStores := c.getRuleFitStores(rf) - oldStore := strategy.SelectStoreToRemove(ruleStores) - if oldStore == 0 { - return nil, nil - } - var coLocationStores []*core.StoreInfo - regionStores := c.cluster.GetRegionStores(region) - for _, s := range regionStores { - if placement.MatchLabelConstraints(s, rf.Rule.LabelConstraints) { - coLocationStores = append(coLocationStores, s) - } - } - - newStore, filterByTempState := strategy.SelectStoreToImprove(coLocationStores, oldStore) - if newStore == 0 { + oldStoreID, newStoreID, filterByTempState := strategy.getBetterLocation(c.cluster, region, fit, rf) + if newStoreID == 0 { log.Debug("no replacement store", zap.Uint64("region-id", region.GetID())) c.handleFilterState(region, filterByTempState) return nil, nil } ruleCheckerMoveToBetterLocationCounter.Inc() - newPeer := &metapb.Peer{StoreId: newStore, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: isWitness} - return operator.CreateMovePeerOperator("move-to-better-location", c.cluster, region, operator.OpReplica, oldStore, newPeer) + newPeer := &metapb.Peer{StoreId: newStoreID, Role: rf.Rule.Role.MetaPeerRole(), IsWitness: isWitness} + return operator.CreateMovePeerOperator("move-to-better-location", c.cluster, region, operator.OpReplica, oldStoreID, newPeer) } func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.RegionFit) (*operator.Operator, error) { @@ -635,16 +621,6 @@ func (c *RuleChecker) strategy(r *rand.Rand, region *core.RegionInfo, rule *plac } } -func (c *RuleChecker) getRuleFitStores(rf *placement.RuleFit) []*core.StoreInfo { - var stores []*core.StoreInfo - for _, p := range rf.Peers { - if s := c.cluster.GetStore(p.GetStoreId()); s != nil { - stores = append(stores, s) - } - } - return stores -} - func (c *RuleChecker) handleFilterState(region *core.RegionInfo, filterByTempState bool) { if filterByTempState { c.pendingProcessedRegions.Put(region.GetID(), nil) diff --git a/pkg/schedule/checker/rule_checker_test.go b/pkg/schedule/checker/rule_checker_test.go index 5ac67122de1..a16d6beeea1 100644 --- a/pkg/schedule/checker/rule_checker_test.go +++ b/pkg/schedule/checker/rule_checker_test.go @@ -742,6 +742,124 @@ func (suite *ruleCheckerTestSuite) TestBetterReplacement2() { re.Nil(op) } +func (suite *ruleCheckerTestSuite) TestBetterReplacement3() { + re := suite.Require() + cfg := suite.cluster.GetReplicationConfig().Clone() + cfg.LocationLabels = []string{"region", "zone", "host"} + suite.cluster.SetReplicationConfig(cfg) + suite.cluster.AddLabelsStore(1, 10, map[string]string{"region": "R1", "host": "host-1", "zone": "z1", "type": "ap"}) + suite.cluster.AddLabelsStore(2, 10, map[string]string{"region": "R1", "host": "host-2", "zone": "z1", "type": "ap"}) + suite.cluster.AddLabelsStore(3, 10, map[string]string{"region": "R1", "host": "host-3", "zone": "z2", "type": "ap"}) + suite.cluster.AddLabelsStore(4, 10, map[string]string{"region": "R1", "host": "host-4", "zone": "z2", "type": "ap"}) + suite.cluster.AddLabelsStore(5, 10, map[string]string{"region": "R1", "host": "host-5", "zone": "z3", "type": "tp"}) + suite.cluster.AddLabelsStore(6, 10, map[string]string{"region": "R1", "host": "host-6", "zone": "z3", "type": "tp"}) + suite.cluster.AddLeaderRegionWithRange(10, "a", "b", 1, 2, 6) + rule1 := &placement.Rule{ + GroupID: "TiDB_DDL_122", + ID: "table_rule_122_0", + Index: 40, + Role: placement.Leader, + Count: 1, + LocationLabels: []string{"zone"}, + LabelConstraints: []placement.LabelConstraint{ + {Key: "type", Op: "in", Values: []string{"ap"}}, + }, + } + rule2 := &placement.Rule{ + + GroupID: "TiDB_DDL_122", + ID: "table_rule_122_1", + Index: 40, + Role: placement.Voter, + Count: 1, + LocationLabels: []string{"zone"}, + LabelConstraints: []placement.LabelConstraint{ + {Key: "type", Op: "in", Values: []string{"ap"}}, + }, + } + rule3 := &placement.Rule{ + + GroupID: "TiDB_DDL_122", + ID: "table_rule_122_2", + Index: 40, + Role: placement.Voter, + Count: 1, + LocationLabels: []string{"zone"}, + LabelConstraints: []placement.LabelConstraint{ + {Key: "type", Op: "in", Values: []string{"tp"}}, + }, + } + suite.ruleManager.SetRule(rule1) + suite.ruleManager.SetRule(rule2) + suite.ruleManager.SetRule(rule3) + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) + region := suite.cluster.GetRegion(10) + op := suite.rc.Check(region) + re.NotNil(op) + re.Equal("move-to-better-location", op.Desc()) + if op.Step(0).(operator.AddLearner).ToStore != 4 { + re.Equal(uint64(3), op.Step(0).(operator.AddLearner).ToStore) + } +} + +func (suite *ruleCheckerTestSuite) TestBetterReplacement4() { + re := suite.Require() + cfg := suite.cluster.GetReplicationConfig().Clone() + cfg.LocationLabels = []string{"region", "zone", "host"} + suite.cluster.SetReplicationConfig(cfg) + suite.cluster.AddLabelsStore(1, 10, map[string]string{"region": "R1", "host": "host-1", "zone": "z1", "type": "ap"}) + suite.cluster.AddLabelsStore(2, 10, map[string]string{"region": "R1", "host": "host-2", "zone": "z1", "type": "ap"}) + suite.cluster.AddLabelsStore(3, 10, map[string]string{"region": "R1", "host": "host-3", "zone": "z2", "type": "ap"}) + suite.cluster.AddLabelsStore(4, 10, map[string]string{"region": "R1", "host": "host-4", "zone": "z2", "type": "tp"}) + suite.cluster.AddLabelsStore(5, 10, map[string]string{"region": "R1", "host": "host-5", "zone": "z3", "type": "ap"}) + suite.cluster.AddLabelsStore(6, 10, map[string]string{"region": "R1", "host": "host-6", "zone": "z3", "type": "tp"}) + suite.cluster.AddLeaderRegionWithRange(10, "a", "b", 2, 3, 4) + rule1 := &placement.Rule{ + GroupID: "TiDB_DDL_122", + ID: "table_rule_122_0", + Index: 40, + Role: placement.Leader, + Count: 1, + LocationLabels: []string{"zone"}, + LabelConstraints: []placement.LabelConstraint{ + {Key: "type", Op: "in", Values: []string{"ap"}}, + }, + } + rule2 := &placement.Rule{ + + GroupID: "TiDB_DDL_122", + ID: "table_rule_122_1", + Index: 40, + Role: placement.Voter, + Count: 1, + LocationLabels: []string{"zone"}, + LabelConstraints: []placement.LabelConstraint{ + {Key: "type", Op: "in", Values: []string{"ap"}}, + }, + } + rule3 := &placement.Rule{ + + GroupID: "TiDB_DDL_122", + ID: "table_rule_122_2", + Index: 40, + Role: placement.Voter, + Count: 1, + LocationLabels: []string{"zone"}, + LabelConstraints: []placement.LabelConstraint{ + {Key: "type", Op: "in", Values: []string{"tp"}}, + }, + } + suite.ruleManager.SetRule(rule1) + suite.ruleManager.SetRule(rule2) + suite.ruleManager.SetRule(rule3) + suite.ruleManager.DeleteRule(placement.DefaultGroupID, placement.DefaultRuleID) + region := suite.cluster.GetRegion(10) + op := suite.rc.Check(region) + re.NotNil(op) + re.Equal("move-to-better-location", op.Desc()) + re.Equal(uint64(5), op.Step(0).(operator.AddLearner).ToStore) +} + func (suite *ruleCheckerTestSuite) TestNoBetterReplacement() { re := suite.Require() suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) @@ -2172,3 +2290,54 @@ func (suite *ruleCheckerTestSuite) TestIssue7808() { re.Equal("fast-replace-rule-down-peer", op.Desc()) re.Contains(op.Brief(), "mv peer: store [1] to [2]") } + +func (suite *ruleCheckerTestSuite) TestFixBetterLocationEngineConstraint() { + re := suite.Require() + + // Setup stores with different engine types + suite.cluster.AddLabelsStore(1, 10, map[string]string{"zone": "z1", "host": "host1"}) + suite.cluster.AddLabelsStore(2, 10, map[string]string{"zone": "z2", "host": "host2"}) + suite.cluster.AddLabelsStore(3, 10, map[string]string{"zone": "z2", "host": "host3"}) + suite.cluster.AddLabelsStore(4, 10, map[string]string{"zone": "z2", "host": "host4"}) + suite.cluster.AddLabelsStore(5, 10, map[string]string{"zone": "z3", "host": "host5", "engine": "tiflash"}) + suite.cluster.AddLabelsStore(6, 10, map[string]string{"zone": "z3", "host": "host6", "engine": "tiflash"}) + suite.cluster.AddLabelsStore(7, 10, map[string]string{"zone": "z3", "host": "host7", "engine": "tiflash"}) + + // Create a TiKV region on stores 1, 2, 3 + suite.cluster.AddLeaderRegionWithRange(1, "a", "b", 1, 2, 3) + + rule := &placement.Rule{ + GroupID: placement.DefaultGroupID, + ID: placement.DefaultRuleID, + Index: 0, + Role: placement.Voter, + Count: 3, + LocationLabels: []string{"zone", "host"}, + } + suite.ruleManager.SetRule(rule) + region1 := suite.cluster.GetRegion(1) + op := suite.rc.Check(region1) + re.Empty(op) + + ruleTiFlash := &placement.Rule{ + GroupID: "tiflash", + ID: "tiflash", + Index: 40, + Role: placement.Learner, + Count: 3, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "engine", + Op: placement.In, + Values: []string{"tiflash"}, + }, + }, + LocationLabels: []string{"zone", "host"}, + } + + suite.ruleManager.SetRule(ruleTiFlash) + suite.cluster.AddRegionWithLearner(2, 1, []uint64{2, 3}, []uint64{5, 6, 7}) + region2 := suite.cluster.GetRegion(2) + op = suite.rc.Check(region2) + re.Empty(op) +} diff --git a/pkg/schedule/placement/fit.go b/pkg/schedule/placement/fit.go index 30530462664..110f629f8ae 100644 --- a/pkg/schedule/placement/fit.go +++ b/pkg/schedule/placement/fit.go @@ -63,7 +63,7 @@ func (f *RegionFit) Replace(srcStoreID uint64, dstStore *core.StoreInfo) bool { return false } - score := isolationStoreScore(srcStoreID, dstStore, fit.stores, fit.Rule.LocationLabels) + score := isolationStoreScore(srcStoreID, dstStore, fit.Stores, fit.Rule.LocationLabels) // restore the source store. return fit.IsolationScore <= score } @@ -119,6 +119,11 @@ func (f *RegionFit) GetRegionStores() []*core.StoreInfo { return f.regionStores } +// GetRules returns the rules that are used to fit the region. +func (f *RegionFit) GetRules() []*Rule { + return f.rules +} + // RuleFit is the result of fitting status of a Rule. type RuleFit struct { Rule *Rule `json:"rule"` @@ -132,8 +137,8 @@ type RuleFit struct { // isolated. A larger value is better. IsolationScore float64 `json:"isolation-score"` WitnessScore int `json:"witness-score"` - // stores is the stores that the peers are placed in. - stores []*core.StoreInfo + // Stores is the stores that the peers are placed in. + Stores []*core.StoreInfo `json:"-"` } // IsSatisfied returns if the rule is properly satisfied. @@ -365,7 +370,7 @@ func newRuleFit(rule *Rule, peers []*fitPeer, supportWitness bool) *RuleFit { rf := &RuleFit{Rule: rule, IsolationScore: isolationScore(peers, rule.LocationLabels), WitnessScore: witnessScore(peers, supportWitness && rule.IsWitness)} for _, p := range peers { rf.Peers = append(rf.Peers, p.Peer) - rf.stores = append(rf.stores, p.store) + rf.Stores = append(rf.Stores, p.store) if !p.matchRoleStrict(rule.Role) || (supportWitness && (p.IsWitness != rule.IsWitness)) || (!supportWitness && p.IsWitness) { diff --git a/pkg/schedule/placement/rule_manager.go b/pkg/schedule/placement/rule_manager.go index f44258d797c..50644d4e9b5 100644 --- a/pkg/schedule/placement/rule_manager.go +++ b/pkg/schedule/placement/rule_manager.go @@ -416,6 +416,17 @@ func (m *RuleManager) IsRegionFitCached(storeSet StoreSet, region *core.RegionIn return isCached } +// FitRegionWithoutCache fits a region to the rules it matches. This function does not use or save the cache, +// and the Region being fitted is usually not a real Region. +func (m *RuleManager) FitRegionWithoutCache(storeSet StoreSet, region *core.RegionInfo) (fit *RegionFit) { + regionStores := getStoresByRegion(storeSet, region) + rules := m.GetRulesForApplyRegion(region) + fit = fitRegion(regionStores, region, rules, m.conf.IsWitnessAllowed()) + fit.regionStores = regionStores + fit.rules = rules + return fit +} + // FitRegion fits a region to the rules it matches. func (m *RuleManager) FitRegion(storeSet StoreSet, region *core.RegionInfo) (fit *RegionFit) { regionStores := getStoresByRegion(storeSet, region) diff --git a/pkg/statistics/region_collection.go b/pkg/statistics/region_collection.go index 958ba3be5df..b54a4e17812 100644 --- a/pkg/statistics/region_collection.go +++ b/pkg/statistics/region_collection.go @@ -380,7 +380,7 @@ func NewLabelStatistics() *LabelStatistics { // Observe records the current label status. func (l *LabelStatistics) Observe(region *core.RegionInfo, stores []*core.StoreInfo, labels []string) { regionID := region.GetID() - regionIsolation := GetRegionLabelIsolation(stores, labels) + regionIsolation, _ := GetRegionLabelIsolation(stores, labels) l.Lock() defer l.Unlock() if label, ok := l.regionLabelStats[regionID]; ok { @@ -441,9 +441,9 @@ func (l *LabelStatistics) GetLabelCounter() map[string]int { } // GetRegionLabelIsolation returns the isolation level of the region. -func GetRegionLabelIsolation(stores []*core.StoreInfo, labels []string) string { +func GetRegionLabelIsolation(stores []*core.StoreInfo, labels []string) (string, int) { if len(stores) == 0 || len(labels) == 0 { - return nonIsolation + return nonIsolation, -1 } queueStores := [][]*core.StoreInfo{stores} for level, label := range labels { @@ -456,10 +456,27 @@ func GetRegionLabelIsolation(stores []*core.StoreInfo, labels []string) string { } queueStores = newQueueStores if len(queueStores) == 0 { - return labels[level] + return labels[level], level } } - return nonIsolation + return nonIsolation, -1 +} + +// IsRegionLabelIsolationSatisfied checks whether the isolation level of the region satisfied. +func IsRegionLabelIsolationSatisfied(stores []*core.StoreInfo, labels []string, isolationLevel string) bool { + if isolationLevel == "" || isolationLevel == nonIsolation { + return true + } + _, level := GetRegionLabelIsolation(stores, labels) + if level == -1 { + return false + } + for _, key := range labels[level:] { + if key == isolationLevel { + return true + } + } + return false } func notIsolatedStoresWithLabel(stores []*core.StoreInfo, label string) [][]*core.StoreInfo { diff --git a/pkg/statistics/region_collection_test.go b/pkg/statistics/region_collection_test.go index 3f9e21998ed..226eee43320 100644 --- a/pkg/statistics/region_collection_test.go +++ b/pkg/statistics/region_collection_test.go @@ -216,19 +216,29 @@ func TestRegionLabelIsolationLevel(t *testing.T) { {"zone": "z1", "host": "h3"}, }, } - res := []string{"rack", "host", "zone", "rack", "none", "rack", "host"} + isolationResSet := []string{"rack", "host", "zone", "rack", "none", "rack", "host"} + levelResSet := []int{1, 2, 0, 1, -1, 1, 2} + satisfiedResSet := []map[string]bool{ + {"zone": false, "rack": true, "host": true, "bar": false, "none": true, "": true}, + {"zone": false, "rack": false, "host": true, "bar": false, "none": true, "": true}, + {"zone": true, "rack": true, "host": true, "bar": false, "none": true, "": true}, + {"zone": false, "rack": true, "host": true, "bar": false, "none": true, "": true}, + {"zone": false, "rack": false, "host": false, "bar": false, "none": true, "": true}, + {"zone": false, "rack": true, "host": true, "bar": false, "none": true, "": true}, + {"zone": false, "rack": false, "host": true, "bar": false, "none": true, "": true}, + } counter := map[string]int{"none": 1, "host": 2, "rack": 3, "zone": 1} regionID := 1 - f := func(labels []map[string]string, res string, locationLabels []string) { + f := func(locationLabels []string, storeLabels []map[string]string, isolationRes string, levelRes int, satisfiedRes map[string]bool) { metaStores := []*metapb.Store{ {Id: 1, Address: "mock://tikv-1"}, {Id: 2, Address: "mock://tikv-2"}, {Id: 3, Address: "mock://tikv-3"}, } - stores := make([]*core.StoreInfo, 0, len(labels)) + stores := make([]*core.StoreInfo, 0, len(storeLabels)) for i, m := range metaStores { var newLabels []*metapb.StoreLabel - for k, v := range labels[i] { + for k, v := range storeLabels[i] { newLabels = append(newLabels, &metapb.StoreLabel{Key: k, Value: v}) } s := core.NewStoreInfo(m, core.SetStoreLabels(newLabels)) @@ -236,34 +246,51 @@ func TestRegionLabelIsolationLevel(t *testing.T) { stores = append(stores, s) } region := core.NewRegionInfo(&metapb.Region{Id: uint64(regionID)}, nil) - label := GetRegionLabelIsolation(stores, locationLabels) + label, level := GetRegionLabelIsolation(stores, locationLabels) labelLevelStats.Observe(region, stores, locationLabels) - re.Equal(res, label) + re.Equal(isolationRes, label) + re.Equal(levelRes, level) + for isolationLevel, res := range satisfiedRes { + re.Equal(res, IsRegionLabelIsolationSatisfied(stores, locationLabels, isolationLevel)) + } regionID++ } for i, labels := range labelsSet { - f(labels, res[i], locationLabels) + f(locationLabels, labels, isolationResSet[i], levelResSet[i], satisfiedResSet[i]) } for i, res := range counter { re.Equal(res, labelLevelStats.labelCounter[i]) } - label := GetRegionLabelIsolation(nil, locationLabels) + label, level := GetRegionLabelIsolation(nil, locationLabels) re.Equal(nonIsolation, label) - label = GetRegionLabelIsolation(nil, nil) + re.Equal(-1, level) + label, level = GetRegionLabelIsolation(nil, nil) re.Equal(nonIsolation, label) + re.Equal(-1, level) store := core.NewStoreInfo(&metapb.Store{Id: 1, Address: "mock://tikv-1"}, core.SetStoreLabels([]*metapb.StoreLabel{{Key: "foo", Value: "bar"}})) - label = GetRegionLabelIsolation([]*core.StoreInfo{store}, locationLabels) + label, level = GetRegionLabelIsolation([]*core.StoreInfo{store}, locationLabels) re.Equal("zone", label) + re.Equal(0, level) regionID = 1 - res = []string{"rack", "none", "zone", "rack", "none", "rack", "none"} - counter = map[string]int{"none": 3, "host": 0, "rack": 3, "zone": 1} locationLabels = []string{"zone", "rack"} + isolationResSet = []string{"rack", "none", "zone", "rack", "none", "rack", "none"} + levelResSet = []int{1, -1, 0, 1, -1, 1, -1} + satisfiedResSet = []map[string]bool{ + {"zone": false, "rack": true, "bar": false, "none": true, "": true}, + {"zone": false, "rack": false, "bar": false, "none": true, "": true}, + {"zone": true, "rack": true, "bar": false, "none": true, "": true}, + {"zone": false, "rack": true, "bar": false, "none": true, "": true}, + {"zone": false, "rack": false, "bar": false, "none": true, "": true}, + {"zone": false, "rack": true, "bar": false, "none": true, "": true}, + {"zone": false, "rack": false, "bar": false, "none": true, "": true}, + } + counter = map[string]int{"none": 3, "host": 0, "rack": 3, "zone": 1} for i, labels := range labelsSet { - f(labels, res[i], locationLabels) + f(locationLabels, labels, isolationResSet[i], levelResSet[i], satisfiedResSet[i]) } for i, res := range counter { re.Equal(res, labelLevelStats.labelCounter[i]) diff --git a/tools/pd-ctl/pdctl/command/label_command.go b/tools/pd-ctl/pdctl/command/label_command.go index 6d95465392f..b73c436bc0f 100644 --- a/tools/pd-ctl/pdctl/command/label_command.go +++ b/tools/pd-ctl/pdctl/command/label_command.go @@ -178,7 +178,7 @@ func checkIsolationLabel(cmd *cobra.Command, args []string) { stores = append(stores, s) } } - isolationLabel := statistics.GetRegionLabelIsolation(stores, locationLabels) + isolationLabel, _ := statistics.GetRegionLabelIsolation(stores, locationLabels) if len(checkLabel) == 0 || isolationLabel == checkLabel { regionMap[region.ID] = isolationLabel labelCount[isolationLabel]++