From fb2c065aeadb9c76e4216b3dcf6acb4d54f20cf0 Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Tue, 21 Apr 2026 16:23:25 +0800
Subject: [PATCH 01/12] resource_control: add PredictedReadBytes hint for RC
 paging pre-charge

Introduce an optional predictedReadBytesProvider interface on RequestInfo.
When a caller (e.g. TiDB maintaining a per-logical-scan EMA across paging
RPCs) supplies a non-zero PredictedReadBytes, BeforeKVRequest pre-charges
ReadBytesCost * predictedReadBytes RRU so that concurrent workers are
throttled at BeforeKVRequest rather than all hitting AfterKVRequest
settlement at once. AfterKVRequest then subtracts the same basis so the
net (pre-charge + settlement) equals baseCost + actualCost.

Without a hint the request is not pre-charged and is billed in
AfterKVRequest by actual read bytes only - this keeps the RU-billing
pre-charge decoupled from the protocol-level paging byte cap.

The hint is added as an optional interface (not a method on RequestInfo)
so existing RequestInfo implementations compile unchanged; they simply
skip pre-charge.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/group_controller_test.go       | 70 +++++++++++++++++++
 client/resource_group/controller/model.go     | 40 +++++++++++
 client/resource_group/controller/testutil.go  | 17 +++--
 3 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/client/resource_group/controller/group_controller_test.go b/client/resource_group/controller/group_controller_test.go
index f916f5e56c4..f92681319f7 100644
--- a/client/resource_group/controller/group_controller_test.go
+++ b/client/resource_group/controller/group_controller_test.go
@@ -208,6 +208,76 @@ func TestOnResponseWaitConsumption(t *testing.T) {
 	verify()
 }
 
+func TestPredictedReadBytesPreCharge(t *testing.T) {
+	re := require.New(t)
+	cfg := DefaultRUConfig()
+	kvCalc := newKVCalculator(cfg)
+
+	// BeforeKVRequest with a PredictedReadBytes hint should pre-charge
+	// baseCost + predictedReadBytes * ReadBytesCost.
+	predictedReadBytes := uint64(256 * 1024) // learned EMA estimate
+	req := &TestRequestInfo{
+		isWrite:            false,
+		predictedReadBytes: predictedReadBytes,
+	}
+	precharge := &rmpb.Consumption{}
+	kvCalc.BeforeKVRequest(precharge, req)
+
+	baseCost := float64(cfg.ReadBaseCost) + float64(cfg.ReadPerBatchBaseCost)*defaultAvgBatchProportion
+	hintCost := float64(cfg.ReadBytesCost) * float64(predictedReadBytes)
+	re.InDelta(baseCost+hintCost, precharge.RRU, 1e-6,
+		"BeforeKVRequest should pre-charge based on PredictedReadBytes")
+
+	// AfterKVRequest should subtract the same hint basis, preserving
+	// precharge + settle == baseCost + actualCost.
+	actualReadBytes := uint64(300 * 1024) // close to the prediction
+	resp := &TestResponseInfo{
+		readBytes: actualReadBytes,
+		kvCPU:     10 * time.Millisecond,
+		succeed:   true,
+	}
+	settle := &rmpb.Consumption{}
+	kvCalc.AfterKVRequest(settle, req, resp)
+
+	actualReadCost := float64(cfg.ReadBytesCost) * float64(actualReadBytes)
+	cpuCost := float64(cfg.CPUMsCost) * 10.0
+	re.InDelta(actualReadCost+cpuCost-hintCost, settle.RRU, 1e-6,
+		"AfterKVRequest should settle using the same hint basis as BeforeKVRequest")
+
+	totalRRU := precharge.RRU + settle.RRU
+	re.InDelta(baseCost+actualReadCost+cpuCost, totalRRU, 1e-6,
+		"Total RRU across precharge+settle should equal baseCost + actualCost")
+}
+
+func TestNoPreChargeWithoutPredictedReadBytes(t *testing.T) {
+	re := require.New(t)
+	cfg := DefaultRUConfig()
+	kvCalc := newKVCalculator(cfg)
+	baseCost := float64(cfg.ReadBaseCost) + float64(cfg.ReadPerBatchBaseCost)*defaultAvgBatchProportion
+
+	// Without a PredictedReadBytes hint, BeforeKVRequest must not
+	// pre-charge; AfterKVRequest bills actual read bytes only.
+	req := &TestRequestInfo{isWrite: false}
+	precharge := &rmpb.Consumption{}
+	kvCalc.BeforeKVRequest(precharge, req)
+	re.InDelta(baseCost, precharge.RRU, 1e-6,
+		"Without a hint, BeforeKVRequest should only charge baseCost")
+
+	actualReadBytes := uint64(2 * 1024 * 1024)
+	resp := &TestResponseInfo{
+		readBytes: actualReadBytes,
+		kvCPU:     10 * time.Millisecond,
+		succeed:   true,
+	}
+	settle := &rmpb.Consumption{}
+	kvCalc.AfterKVRequest(settle, req, resp)
+
+	actualReadCost := float64(cfg.ReadBytesCost) * float64(actualReadBytes)
+	cpuCost := float64(cfg.CPUMsCost) * 10.0
+	re.InDelta(actualReadCost+cpuCost, settle.RRU, 1e-6,
+		"AfterKVRequest should bill actual read cost only when nothing was pre-charged")
+}
+
 func TestResourceGroupThrottledError(t *testing.T) {
 	re := require.New(t)
 	gc := createTestGroupCostController(re)
diff --git a/client/resource_group/controller/model.go b/client/resource_group/controller/model.go
index 54721969811..f93a4d3fb1e 100644
--- a/client/resource_group/controller/model.go
+++ b/client/resource_group/controller/model.go
@@ -54,6 +54,32 @@ type RequestInfo interface {
 	AccessLocationType() AccessLocationType
 }
 
+// predictedReadBytesProvider is an optional interface that a RequestInfo
+// implementation may satisfy to supply a learned estimate (e.g., from a
+// per-logical-scan EMA maintained in TiDB) of how many bytes the request
+// will read. When present and > 0, it is used as the byte basis for RC
+// paging pre-charge; when absent or zero the request is not pre-charged
+// and will be billed in AfterKVRequest by actual read bytes only.
+//
+// Defined as an optional interface (not a method on RequestInfo) so older
+// RequestInfo implementations that have not been updated still compile;
+// they simply skip pre-charge.
+type predictedReadBytesProvider interface {
+	PredictedReadBytes() uint64
+}
+
+// estimatedReadBytes returns the byte basis used for RC paging pre-charge,
+// or 0 to skip pre-charge entirely. Only a learned PredictedReadBytes hint
+// is honored; the protocol-level paging byte cap is intentionally not
+// consulted here so the paging cap and the RU-billing pre-charge stay
+// decoupled.
+func estimatedReadBytes(req RequestInfo) uint64 {
+	if p, ok := req.(predictedReadBytesProvider); ok {
+		return p.PredictedReadBytes()
+	}
+	return 0
+}
+
 // ResponseInfo is the interface of the response information provider. A response should be
 // able to tell how many bytes it read and KV CPU cost in milliseconds.
 type ResponseInfo interface {
@@ -104,6 +130,14 @@ func (kc *KVCalculator) BeforeKVRequest(consumption *rmpb.Consumption, req Reque
 		// Read bytes could not be known before the request is executed,
 		// so we only add the base cost here.
 		consumption.RRU += float64(kc.ReadBaseCost) + float64(kc.ReadPerBatchBaseCost)*defaultAvgBatchProportion
+		// RC Paging pre-charge: pre-charge the learned read-bytes RU so that
+		// concurrent workers are throttled at BeforeKVRequest instead of all
+		// hitting AfterKVRequest settlement at once. Only applies when the
+		// caller supplies a PredictedReadBytes hint; without it the request
+		// is billed in AfterKVRequest by actual read bytes only.
+		if bytesForEst := estimatedReadBytes(req); bytesForEst > 0 {
+			consumption.RRU += float64(kc.ReadBytesCost) * float64(bytesForEst)
+		}
 	}
 	if req.AccessLocationType() == AccessCrossZone {
 		if req.IsWrite() {
@@ -138,6 +172,12 @@ func (kc *KVCalculator) AfterKVRequest(consumption *rmpb.Consumption, req Reques
 	if !req.IsWrite() {
 		// For now, we can only collect the KV CPU cost for a read request.
 		kc.calculateCPUCost(consumption, res)
+		// RC Paging settlement: subtract the pre-charged bytes RU added in
+		// BeforeKVRequest so the net total (pre-charge + settlement) equals
+		// baseCost + actualCost. Must mirror the basis used there.
+		if bytesForEst := estimatedReadBytes(req); bytesForEst > 0 {
+			consumption.RRU -= float64(kc.ReadBytesCost) * float64(bytesForEst)
+		}
 	} else if !res.Succeed() {
 		// If the write request is not successfully returned, we need to pay back the WRU cost.
 		kc.payBackWriteCost(consumption, req)
diff --git a/client/resource_group/controller/testutil.go b/client/resource_group/controller/testutil.go
index a055b32fa6f..368fb933b3a 100644
--- a/client/resource_group/controller/testutil.go
+++ b/client/resource_group/controller/testutil.go
@@ -22,11 +22,12 @@ import "time"
 
 // TestRequestInfo is used to test the request info interface.
 type TestRequestInfo struct {
-	isWrite     bool
-	writeBytes  uint64
-	numReplicas int64
-	storeID     uint64
-	accessType  AccessLocationType
+	isWrite            bool
+	writeBytes         uint64
+	numReplicas        int64
+	storeID            uint64
+	accessType         AccessLocationType
+	predictedReadBytes uint64
 }
 
 // NewTestRequestInfo creates a new TestRequestInfo.
@@ -70,6 +71,12 @@ func (tri *TestRequestInfo) AccessLocationType() AccessLocationType {
 	return tri.accessType
 }
 
+// PredictedReadBytes implements the optional predictedReadBytesProvider
+// interface so tests can exercise the RC paging pre-charge hint path.
+func (tri *TestRequestInfo) PredictedReadBytes() uint64 {
+	return tri.predictedReadBytes
+}
+
 // TestResponseInfo is used to test the response info interface.
 type TestResponseInfo struct {
 	readBytes uint64

From 775b9e288ba9bf03aeb19da3bd7047f98dd37fa0 Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Tue, 21 Apr 2026 16:28:38 +0800
Subject: [PATCH 02/12] resource_control: refund excess pre-charged tokens on
 response settlement

When a request carries a PredictedReadBytes hint, BeforeKVRequest consumes
tokens up-front as a pre-charge. If the actual read bytes come back smaller
than the estimate, the delta represents tokens that were reserved but never
consumed. Previously AfterKVRequest computed a negative delta but called
RemoveTokens unconditionally, which further debited the limiter instead of
giving tokens back.

This commit adds Limiter.RefundTokens as the inverse of RemoveTokens and
wires the response-side paths (onResponseImpl, onResponseWaitImpl) to call
it whenever the settlement delta is negative, so over-estimated pre-charges
are released back to the group's token bucket.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/group_controller.go            |  33 +++--
 .../controller/group_controller_test.go       | 119 ++++++++++++++++++
 client/resource_group/controller/limiter.go   |  20 +++
 .../resource_group/controller/limiter_test.go |  34 +++++
 4 files changed, 195 insertions(+), 11 deletions(-)

diff --git a/client/resource_group/controller/group_controller.go b/client/resource_group/controller/group_controller.go
index 4fca2091c00..327048169c7 100644
--- a/client/resource_group/controller/group_controller.go
+++ b/client/resource_group/controller/group_controller.go
@@ -605,6 +605,10 @@ func (gc *groupCostController) onResponseImpl(
 		counter := gc.run.requestUnitTokens
 		if v := getRUValueFromConsumption(delta); v > 0 {
 			counter.limiter.RemoveTokens(time.Now(), v)
+		} else if v < 0 {
+			// RC paging settlement: actual cost was below the pre-charge,
+			// refund the excess so the limiter reflects real consumption.
+			counter.limiter.RefundTokens(time.Now(), -v)
 		}
 	}
 
@@ -634,19 +638,26 @@ func (gc *groupCostController) onResponseWaitImpl(
 	}
 	var waitDuration time.Duration
 	if !gc.burstable.Load() {
-		allowDebt := delta.ReadBytes+delta.WriteBytes < bigRequestThreshold || !gc.isThrottled.Load()
-		d, err := gc.acquireTokens(ctx, delta, &waitDuration, allowDebt)
-		if err != nil {
-			if errs.ErrClientResourceGroupThrottled.Equal(err) {
-				gc.metrics.failedRequestCounterWithThrottled.Inc()
-				gc.metrics.failedLimitReserveDuration.Observe(d.Seconds())
-			} else {
-				gc.metrics.failedRequestCounterWithOthers.Inc()
+		v := getRUValueFromConsumption(delta)
+		if v > 0 {
+			allowDebt := delta.ReadBytes+delta.WriteBytes < bigRequestThreshold || !gc.isThrottled.Load()
+			d, err := gc.acquireTokens(ctx, delta, &waitDuration, allowDebt)
+			if err != nil {
+				if errs.ErrClientResourceGroupThrottled.Equal(err) {
+					gc.metrics.failedRequestCounterWithThrottled.Inc()
+					gc.metrics.failedLimitReserveDuration.Observe(d.Seconds())
+				} else {
+					gc.metrics.failedRequestCounterWithOthers.Inc()
+				}
+				return nil, waitDuration, err
 			}
-			return nil, waitDuration, err
+			gc.metrics.successfulRequestDuration.Observe(d.Seconds())
+			waitDuration += d
+		} else if v < 0 {
+			// RC paging settlement: actual cost was below the pre-charge,
+			// refund the excess so the limiter reflects real consumption.
+			gc.run.requestUnitTokens.limiter.RefundTokens(time.Now(), -v)
 		}
-		gc.metrics.successfulRequestDuration.Observe(d.Seconds())
-		waitDuration += d
 	}
 
 	gc.mu.Lock()
diff --git a/client/resource_group/controller/group_controller_test.go b/client/resource_group/controller/group_controller_test.go
index f92681319f7..66325f33d36 100644
--- a/client/resource_group/controller/group_controller_test.go
+++ b/client/resource_group/controller/group_controller_test.go
@@ -249,6 +249,125 @@ func TestPredictedReadBytesPreCharge(t *testing.T) {
 		"Total RRU across precharge+settle should equal baseCost + actualCost")
 }
 
+func TestPagingPreChargeTokenRefund(t *testing.T) {
+	re := require.New(t)
+	gc := createTestGroupCostController(re)
+
+	initialTokens := float64(100000)
+	gc.run.requestUnitTokens.limiter.Reconfigure(time.Now(), tokenBucketReconfigureArgs{
+		newTokens:   initialTokens,
+		newFillRate: 0,
+		newBurst:    0,
+	})
+
+	predictedReadBytes := uint64(4 * 1024 * 1024) // 4 MB pre-charge
+	actualReadBytes := uint64(1 * 1024 * 1024)    // 1 MB actual
+
+	req := &TestRequestInfo{
+		isWrite:            false,
+		predictedReadBytes: predictedReadBytes,
+	}
+	resp := &TestResponseInfo{
+		readBytes: actualReadBytes,
+		succeed:   true,
+	}
+
+	_, _, _, _, err := gc.onRequestWaitImpl(context.TODO(), req)
+	re.NoError(err)
+	tokensAfterPreCharge := gc.run.requestUnitTokens.limiter.AvailableTokens(time.Now())
+
+	_, _, err = gc.onResponseWaitImpl(context.TODO(), req, resp)
+	re.NoError(err)
+	tokensAfterSettlement := gc.run.requestUnitTokens.limiter.AvailableTokens(time.Now())
+
+	// Refund (pre-charge - actual) should exceed the actual read cost,
+	// so the limiter ends settlement with more tokens than after pre-charge.
+	cfg := DefaultRUConfig()
+	preChargeCost := float64(cfg.ReadBytesCost) * float64(predictedReadBytes)
+	actualCost := float64(cfg.ReadBytesCost) * float64(actualReadBytes)
+	expectedRefund := preChargeCost - actualCost
+	re.Positive(expectedRefund, "sanity: pre-charge should exceed actual cost")
+	re.InDelta(tokensAfterPreCharge+expectedRefund, tokensAfterSettlement, 1.0,
+		"limiter should be refunded the excess pre-charged tokens")
+
+	gc.mu.Lock()
+	netRRU := gc.mu.consumption.RRU
+	gc.mu.Unlock()
+	baseCost := float64(cfg.ReadBaseCost) + float64(cfg.ReadPerBatchBaseCost)*defaultAvgBatchProportion
+	re.InDelta(baseCost+actualCost, netRRU, 1e-6,
+		"net consumption should equal baseCost + actualReadCost")
+}
+
+func TestPagingPreChargeNoRefundWhenActualExceedsEstimate(t *testing.T) {
+	re := require.New(t)
+	gc := createTestGroupCostController(re)
+
+	initialTokens := float64(100000)
+	gc.run.requestUnitTokens.limiter.Reconfigure(time.Now(), tokenBucketReconfigureArgs{
+		newTokens:   initialTokens,
+		newFillRate: 0,
+		newBurst:    0,
+	})
+
+	predictedReadBytes := uint64(1 * 1024 * 1024) // 1 MB pre-charge
+	actualReadBytes := uint64(4 * 1024 * 1024)    // 4 MB actual (exceeds estimate)
+
+	req := &TestRequestInfo{
+		isWrite:            false,
+		predictedReadBytes: predictedReadBytes,
+	}
+	resp := &TestResponseInfo{
+		readBytes: actualReadBytes,
+		succeed:   true,
+	}
+
+	_, _, _, _, err := gc.onRequestWaitImpl(context.TODO(), req)
+	re.NoError(err)
+	tokensAfterPreCharge := gc.run.requestUnitTokens.limiter.AvailableTokens(time.Now())
+
+	_, _, err = gc.onResponseWaitImpl(context.TODO(), req, resp)
+	re.NoError(err)
+	tokensAfterSettlement := gc.run.requestUnitTokens.limiter.AvailableTokens(time.Now())
+
+	re.Less(tokensAfterSettlement, tokensAfterPreCharge,
+		"when actual exceeds pre-charge, settlement should consume tokens")
+}
+
+func TestOnResponseImplPagingRefund(t *testing.T) {
+	re := require.New(t)
+	gc := createTestGroupCostController(re)
+
+	initialTokens := float64(100000)
+	gc.run.requestUnitTokens.limiter.Reconfigure(time.Now(), tokenBucketReconfigureArgs{
+		newTokens:   initialTokens,
+		newFillRate: 0,
+		newBurst:    0,
+	})
+
+	predictedReadBytes := uint64(4 * 1024 * 1024) // 4 MB pre-charge
+	actualReadBytes := uint64(512 * 1024)         // 512 KB actual
+
+	req := &TestRequestInfo{
+		isWrite:            false,
+		predictedReadBytes: predictedReadBytes,
+	}
+	resp := &TestResponseInfo{
+		readBytes: actualReadBytes,
+		succeed:   true,
+	}
+
+	_, _, _, _, err := gc.onRequestWaitImpl(context.TODO(), req)
+	re.NoError(err)
+	tokensAfterPreCharge := gc.run.requestUnitTokens.limiter.AvailableTokens(time.Now())
+
+	_, err = gc.onResponseImpl(req, resp)
+	re.NoError(err)
+	tokensAfterSettlement := gc.run.requestUnitTokens.limiter.AvailableTokens(time.Now())
+
+	re.Greater(tokensAfterSettlement, tokensAfterPreCharge,
+		"onResponseImpl should refund excess pre-charged tokens")
+}
+
 func TestNoPreChargeWithoutPredictedReadBytes(t *testing.T) {
 	re := require.New(t)
 	cfg := DefaultRUConfig()
diff --git a/client/resource_group/controller/limiter.go b/client/resource_group/controller/limiter.go
index 86e1440946e..bf8f0d48734 100644
--- a/client/resource_group/controller/limiter.go
+++ b/client/resource_group/controller/limiter.go
@@ -325,6 +325,26 @@ func (lim *Limiter) RemoveTokens(now time.Time, amount float64) {
 	lim.maybeNotify()
 }
 
+// RefundTokens returns previously consumed tokens back to the limiter.
+// This is the inverse of RemoveTokens: it increases the available token count.
+// It is used when a pre-charged estimate (e.g. a PredictedReadBytes hint)
+// exceeds the actual cost observed after the request completes.
+//
+// No burst cap is applied here — consistent with Reconfigure. The lazy cap
+// in getTokens() normalizes the balance on the next limiter operation.
+// No low-token notification is needed because refunding moves the balance
+// away from the low-token threshold, never toward it.
+func (lim *Limiter) RefundTokens(now time.Time, amount float64) {
+	lim.mu.Lock()
+	defer lim.mu.Unlock()
+	if lim.burst < 0 || lim.fillRate == Inf {
+		return
+	}
+	_, tokens := lim.getTokens(now)
+	lim.last = now
+	lim.tokens = tokens + amount
+}
+
 type tokenBucketReconfigureArgs struct {
 	newTokens          float64
 	newFillRate        float64
diff --git a/client/resource_group/controller/limiter_test.go b/client/resource_group/controller/limiter_test.go
index f5b2c144757..8bd04eecdd4 100644
--- a/client/resource_group/controller/limiter_test.go
+++ b/client/resource_group/controller/limiter_test.go
@@ -139,6 +139,40 @@ func TestReconfig(t *testing.T) {
 	re.Equal(int64(-1), lim.GetBurst())
 }
 
+func TestRefundTokens(t *testing.T) {
+	re := require.New(t)
+	nc := make(chan notifyMsg, 1)
+	lim := NewLimiter(t0, 0, 0, 100, nc)
+
+	// Consume some tokens.
+	lim.RemoveTokens(t0, 30)
+	checkTokens(re, lim, t0, 70)
+
+	// Refund part of them.
+	lim.RefundTokens(t0, 20)
+	checkTokens(re, lim, t0, 90)
+
+	// Refund beyond the initial amount - allowed (no burst cap when burst==0).
+	lim.RefundTokens(t0, 50)
+	checkTokens(re, lim, t0, 140)
+
+	// With burst > 0, refund up to burst stays.
+	limBurst := NewLimiter(t0, 0, 100, 50, nc)
+	limBurst.RemoveTokens(t0, 30)
+	checkTokens(re, limBurst, t0, 20)
+	limBurst.RefundTokens(t0, 80) // tokens = 20 + 80 = 100, burst = 100
+	checkTokens(re, limBurst, t0, 100)
+
+	// Refund beyond burst - capped by getTokens.
+	limBurst.RefundTokens(t0, 50) // tokens = 100 + 50 = 150, capped to 100
+	checkTokens(re, limBurst, t0, 100)
+
+	// Burstable (burst < 0): RefundTokens is a no-op.
+	limUnlimited := NewLimiter(t0, 0, -1, 100, nc)
+	limUnlimited.RefundTokens(t0, 50)
+	checkTokens(re, limUnlimited, t0, 100)
+}
+
 func TestNotify(t *testing.T) {
 	nc := make(chan notifyMsg, 1)
 	lim := NewLimiter(t0, 1, 0, 0, nc)

From 6ebc843849da2b714fe21ef9fc8b6bdeca48a60e Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Tue, 21 Apr 2026 16:31:37 +0800
Subject: [PATCH 03/12] client/resource_group: add paging pre-charge
 observability metrics

Add per-resource-group Prometheus metrics so operators can observe how the
paging pre-charge path behaves in production and judge EMA prediction
accuracy:

  - paging_precharge_total / paging_precharge_bytes_total: count and byte
    volume of RPCs that arrived with a PredictedReadBytes hint > 0 and
    were pre-charged at BeforeKVRequest.
  - paging_actual_bytes_total: actual read bytes reported by pre-charged
    RPCs, to compute an over/under-charge ratio against the pre-charge
    volume.
  - paging_prediction_residual_bytes: histogram of (actual - predicted)
    bytes for pre-charged RPCs; shows EMA prediction accuracy directly.
  - paging_nonprecharge_total / paging_nonprecharge_actual_bytes_total:
    count and byte volume of read RPCs that implemented the predicted
    hint interface but reported 0 (e.g. EMA cold-start) and therefore
    ran without pre-charge. Paired with paging_precharge_total this
    yields the cold/ready RPC split from the PD client side.

Labels are cached per resource group in groupMetricsCollection to keep
the hot path out of WithLabelValues. Only RequestInfo implementations
that opt into the PredictedReadBytes interface contribute to these
series; existing callers are unaffected.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/group_controller.go            | 65 ++++++++++++++
 .../controller/metrics/metrics.go             | 88 +++++++++++++++++++
 2 files changed, 153 insertions(+)

diff --git a/client/resource_group/controller/group_controller.go b/client/resource_group/controller/group_controller.go
index 327048169c7..5258c1d9a93 100644
--- a/client/resource_group/controller/group_controller.go
+++ b/client/resource_group/controller/group_controller.go
@@ -106,6 +106,21 @@ type groupMetricsCollection struct {
 	tokenRequestCounter               prometheus.Counter
 	runningKVRequestCounter           prometheus.Gauge
 	consumeTokenHistogram             prometheus.Observer
+
+	// Paging pre-charge observability: cached per-RG so the hot path avoids
+	// WithLabelValues on every KV request. Only pre-charged requests (those
+	// with a PredictedReadBytes hint > 0) contribute to these metrics.
+	prechargeCounter        prometheus.Counter
+	prechargeBytesCounter   prometheus.Counter
+	actualBytesCounter      prometheus.Counter
+	predictionResidualBytes prometheus.Observer
+
+	// Nonprecharge bucket: RPCs that implemented the predicted-bytes
+	// interface but reported 0 (EMA cold-start or feature-disabled) and
+	// therefore skipped pre-charge entirely. Recorded at settle time
+	// (AfterKVRequest).
+	nonprechargeCounter     prometheus.Counter
+	nonprechargeActualBytes prometheus.Counter
 }
 
 func initMetrics(oldName, name string) *groupMetricsCollection {
@@ -122,9 +137,42 @@ func initMetrics(oldName, name string) *groupMetricsCollection {
 		tokenRequestCounter:               metrics.ResourceGroupTokenRequestCounter.WithLabelValues(oldName, name),
 		runningKVRequestCounter:           metrics.GroupRunningKVRequestCounter.WithLabelValues(name),
 		consumeTokenHistogram:             metrics.TokenConsumedHistogram.WithLabelValues(name),
+
+		prechargeCounter:        metrics.PagingPrechargeCounter.WithLabelValues(name),
+		prechargeBytesCounter:   metrics.PagingPrechargeBytesCounter.WithLabelValues(name),
+		actualBytesCounter:      metrics.PagingActualBytesCounter.WithLabelValues(name),
+		predictionResidualBytes: metrics.PagingPredictionResidualBytes.WithLabelValues(name),
+
+		nonprechargeCounter:     metrics.PagingNonprechargeCounter.WithLabelValues(name),
+		nonprechargeActualBytes: metrics.PagingNonprechargeActualBytes.WithLabelValues(name),
 	}
 }
 
+// observePagingPrecharge records one pre-charge event. Caller must guarantee
+// bytesForEst > 0; cold-start requests with no hint are not pre-charged and
+// should not be observed here.
+func (gmc *groupMetricsCollection) observePagingPrecharge(bytesForEst uint64) {
+	gmc.prechargeCounter.Inc()
+	gmc.prechargeBytesCounter.Add(float64(bytesForEst))
+}
+
+// observePagingActual records actual read bytes and the signed residual for a
+// pre-charged request. Caller must guarantee predicted > 0.
+func (gmc *groupMetricsCollection) observePagingActual(predicted, actual uint64) {
+	gmc.actualBytesCounter.Add(float64(actual))
+	gmc.predictionResidualBytes.Observe(float64(actual) - float64(predicted))
+}
+
+// observePagingNonprecharge records one RPC that implemented the predicted
+// read-bytes interface but reported 0 (EMA cold or feature disabled) and
+// therefore ran without pre-charge. `actual` is the response's read bytes,
+// settled in AfterKVRequest. Paired with observePagingPrecharge this gives
+// the cold/ready split and the byte volume that bypassed throttling.
+func (gmc *groupMetricsCollection) observePagingNonprecharge(actual uint64) {
+	gmc.nonprechargeCounter.Inc()
+	gmc.nonprechargeActualBytes.Add(float64(actual))
+}
+
 type tokenCounter struct {
 	fillRate uint64
 
@@ -551,6 +599,9 @@ func (gc *groupCostController) onRequestWaitImpl(
 	for _, calc := range gc.calculators {
 		calc.BeforeKVRequest(delta, info)
 	}
+	if bytesForEst := estimatedReadBytes(info); bytesForEst > 0 {
+		gc.metrics.observePagingPrecharge(bytesForEst)
+	}
 
 	gc.mu.Lock()
 	add(gc.mu.consumption, delta)
@@ -601,6 +652,13 @@ func (gc *groupCostController) onResponseImpl(
 	for _, calc := range gc.calculators {
 		calc.AfterKVRequest(delta, req, resp)
 	}
+	if bytesForEst := estimatedReadBytes(req); bytesForEst > 0 {
+		gc.metrics.observePagingActual(bytesForEst, resp.ReadBytes())
+	} else if !req.IsWrite() {
+		if _, ok := req.(predictedReadBytesProvider); ok {
+			gc.metrics.observePagingNonprecharge(resp.ReadBytes())
+		}
+	}
 	if !gc.burstable.Load() {
 		counter := gc.run.requestUnitTokens
 		if v := getRUValueFromConsumption(delta); v > 0 {
@@ -636,6 +694,13 @@ func (gc *groupCostController) onResponseWaitImpl(
 	for _, calc := range gc.calculators {
 		calc.AfterKVRequest(delta, req, resp)
 	}
+	if bytesForEst := estimatedReadBytes(req); bytesForEst > 0 {
+		gc.metrics.observePagingActual(bytesForEst, resp.ReadBytes())
+	} else if !req.IsWrite() {
+		if _, ok := req.(predictedReadBytesProvider); ok {
+			gc.metrics.observePagingNonprecharge(resp.ReadBytes())
+		}
+	}
 	var waitDuration time.Duration
 	if !gc.burstable.Load() {
 		v := getRUValueFromConsumption(delta)
diff --git a/client/resource_group/controller/metrics/metrics.go b/client/resource_group/controller/metrics/metrics.go
index 01231aceeaf..5bff7281f9e 100644
--- a/client/resource_group/controller/metrics/metrics.go
+++ b/client/resource_group/controller/metrics/metrics.go
@@ -53,6 +53,31 @@ var (
 	FailedTokenRequestDuration prometheus.Observer
 	// SuccessfulTokenRequestDuration comments placeholder, WithLabelValues is a heavy operation, define variable to avoid call it every time.
 	SuccessfulTokenRequestDuration prometheus.Observer
+
+	// PagingPrechargeCounter counts paging pre-charge events (predicted hint
+	// present and > 0), per resource group. Cold starts and unhinted requests
+	// are not pre-charged and therefore not counted here.
+	PagingPrechargeCounter *prometheus.CounterVec
+	// PagingPrechargeBytesCounter sums bytes used as the pre-charge basis.
+	PagingPrechargeBytesCounter *prometheus.CounterVec
+	// PagingActualBytesCounter sums actual read bytes reported after the KV
+	// RPC for pre-charged requests. Ratio against PagingPrechargeBytesCounter
+	// reveals over/under-charge factor.
+	PagingActualBytesCounter *prometheus.CounterVec
+	// PagingPredictionResidualBytes observes (actual - predicted) bytes for
+	// pre-charged requests. Shows EMA prediction accuracy.
+	PagingPredictionResidualBytes *prometheus.HistogramVec
+
+	// PagingNonprechargeCounter counts RPCs that implemented the predicted
+	// read-bytes interface but reported 0 (e.g. EMA cold-start) and therefore
+	// ran without pre-charge. Paired with PagingPrechargeCounter this yields
+	// the cold/ready RPC split from the PD side.
+	PagingNonprechargeCounter *prometheus.CounterVec
+	// PagingNonprechargeActualBytes sums the actual read bytes of RPCs that
+	// skipped pre-charge. Quantifies how much read volume bypassed pre-charge
+	// throttling — the "cold window" signal for token-bucket pressure
+	// analysis.
+	PagingNonprechargeActualBytes *prometheus.CounterVec
 )
 
 func init() {
@@ -156,6 +181,63 @@ func initMetrics(constLabels prometheus.Labels) {
 	// WithLabelValues is a heavy operation, define variable to avoid call it every time.
 	FailedTokenRequestDuration = TokenRequestDuration.WithLabelValues("fail")
 	SuccessfulTokenRequestDuration = TokenRequestDuration.WithLabelValues("success")
+
+	PagingPrechargeCounter = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_precharge_total",
+			Help:        "Counter of RC paging pre-charge events (PredictedReadBytes hint present and > 0).",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
+	PagingPrechargeBytesCounter = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_precharge_bytes_total",
+			Help:        "Sum of bytes used as the RC paging pre-charge basis (predicted hint).",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
+	PagingActualBytesCounter = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_actual_bytes_total",
+			Help:        "Sum of actual bytes read by paging KV requests that were pre-charged.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
+	PagingPredictionResidualBytes = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Namespace: namespace,
+			Subsystem: requestSubsystem,
+			Name:      "paging_prediction_residual_bytes",
+			// Signed residual = actual - predicted. Buckets cover both directions
+			// up to the typical paging budget (a few MB).
+			Buckets:     []float64{-4194304, -1048576, -262144, -65536, -16384, -4096, 0, 4096, 16384, 65536, 262144, 1048576, 4194304},
+			Help:        "Histogram of (actual_read_bytes - predicted_read_bytes) for pre-charged requests. Shows EMA prediction accuracy.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
+	PagingNonprechargeCounter = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_nonprecharge_total",
+			Help:        "Counter of RC paging RPCs that implemented the predicted hint but reported 0 (e.g. EMA cold-start) and ran without pre-charge.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
+	PagingNonprechargeActualBytes = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_nonprecharge_actual_bytes_total",
+			Help:        "Sum of actual bytes read by paging RPCs that skipped pre-charge (hint=0). Quantifies cold-window read volume bypassing pre-charge throttling.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
 }
 
 // InitAndRegisterMetrics initializes and register metrics.
@@ -171,4 +253,10 @@ func InitAndRegisterMetrics(constLabels prometheus.Labels) {
 	prometheus.MustRegister(ResourceGroupTokenRequestCounter)
 	prometheus.MustRegister(LowTokenRequestNotifyCounter)
 	prometheus.MustRegister(TokenConsumedHistogram)
+	prometheus.MustRegister(PagingPrechargeCounter)
+	prometheus.MustRegister(PagingPrechargeBytesCounter)
+	prometheus.MustRegister(PagingActualBytesCounter)
+	prometheus.MustRegister(PagingPredictionResidualBytes)
+	prometheus.MustRegister(PagingNonprechargeCounter)
+	prometheus.MustRegister(PagingNonprechargeActualBytes)
 }

From 6e957208500be6d7e6c355dd1fc33004438975eb Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 13:50:17 +0800
Subject: [PATCH 04/12] client/resource_group: trim narrative comments

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/group_controller.go            | 28 ++++-------------
 client/resource_group/controller/limiter.go   | 10 ++----
 .../controller/metrics/metrics.go             | 28 ++++-------------
 client/resource_group/controller/model.go     | 31 ++++++-------------
 4 files changed, 23 insertions(+), 74 deletions(-)

diff --git a/client/resource_group/controller/group_controller.go b/client/resource_group/controller/group_controller.go
index 5258c1d9a93..ae041f60fdb 100644
--- a/client/resource_group/controller/group_controller.go
+++ b/client/resource_group/controller/group_controller.go
@@ -107,18 +107,12 @@ type groupMetricsCollection struct {
 	runningKVRequestCounter           prometheus.Gauge
 	consumeTokenHistogram             prometheus.Observer
 
-	// Paging pre-charge observability: cached per-RG so the hot path avoids
-	// WithLabelValues on every KV request. Only pre-charged requests (those
-	// with a PredictedReadBytes hint > 0) contribute to these metrics.
+	// Paging pre-charge observers, cached per-RG to avoid WithLabelValues
+	// on the hot path.
 	prechargeCounter        prometheus.Counter
 	prechargeBytesCounter   prometheus.Counter
 	actualBytesCounter      prometheus.Counter
 	predictionResidualBytes prometheus.Observer
-
-	// Nonprecharge bucket: RPCs that implemented the predicted-bytes
-	// interface but reported 0 (EMA cold-start or feature-disabled) and
-	// therefore skipped pre-charge entirely. Recorded at settle time
-	// (AfterKVRequest).
 	nonprechargeCounter     prometheus.Counter
 	nonprechargeActualBytes prometheus.Counter
 }
@@ -148,26 +142,18 @@ func initMetrics(oldName, name string) *groupMetricsCollection {
 	}
 }
 
-// observePagingPrecharge records one pre-charge event. Caller must guarantee
-// bytesForEst > 0; cold-start requests with no hint are not pre-charged and
-// should not be observed here.
+// observePagingPrecharge requires bytesForEst > 0.
 func (gmc *groupMetricsCollection) observePagingPrecharge(bytesForEst uint64) {
 	gmc.prechargeCounter.Inc()
 	gmc.prechargeBytesCounter.Add(float64(bytesForEst))
 }
 
-// observePagingActual records actual read bytes and the signed residual for a
-// pre-charged request. Caller must guarantee predicted > 0.
+// observePagingActual requires predicted > 0.
 func (gmc *groupMetricsCollection) observePagingActual(predicted, actual uint64) {
 	gmc.actualBytesCounter.Add(float64(actual))
 	gmc.predictionResidualBytes.Observe(float64(actual) - float64(predicted))
 }
 
-// observePagingNonprecharge records one RPC that implemented the predicted
-// read-bytes interface but reported 0 (EMA cold or feature disabled) and
-// therefore ran without pre-charge. `actual` is the response's read bytes,
-// settled in AfterKVRequest. Paired with observePagingPrecharge this gives
-// the cold/ready split and the byte volume that bypassed throttling.
 func (gmc *groupMetricsCollection) observePagingNonprecharge(actual uint64) {
 	gmc.nonprechargeCounter.Inc()
 	gmc.nonprechargeActualBytes.Add(float64(actual))
@@ -664,8 +650,7 @@ func (gc *groupCostController) onResponseImpl(
 		if v := getRUValueFromConsumption(delta); v > 0 {
 			counter.limiter.RemoveTokens(time.Now(), v)
 		} else if v < 0 {
-			// RC paging settlement: actual cost was below the pre-charge,
-			// refund the excess so the limiter reflects real consumption.
+			// Paging over-estimate: refund the excess pre-charge.
 			counter.limiter.RefundTokens(time.Now(), -v)
 		}
 	}
@@ -719,8 +704,7 @@ func (gc *groupCostController) onResponseWaitImpl(
 			gc.metrics.successfulRequestDuration.Observe(d.Seconds())
 			waitDuration += d
 		} else if v < 0 {
-			// RC paging settlement: actual cost was below the pre-charge,
-			// refund the excess so the limiter reflects real consumption.
+			// Paging over-estimate: refund the excess pre-charge.
 			gc.run.requestUnitTokens.limiter.RefundTokens(time.Now(), -v)
 		}
 	}
diff --git a/client/resource_group/controller/limiter.go b/client/resource_group/controller/limiter.go
index bf8f0d48734..b451ab244bb 100644
--- a/client/resource_group/controller/limiter.go
+++ b/client/resource_group/controller/limiter.go
@@ -325,15 +325,9 @@ func (lim *Limiter) RemoveTokens(now time.Time, amount float64) {
 	lim.maybeNotify()
 }
 
-// RefundTokens returns previously consumed tokens back to the limiter.
-// This is the inverse of RemoveTokens: it increases the available token count.
-// It is used when a pre-charged estimate (e.g. a PredictedReadBytes hint)
-// exceeds the actual cost observed after the request completes.
+// RefundTokens adds tokens back to the limiter.
 //
-// No burst cap is applied here — consistent with Reconfigure. The lazy cap
-// in getTokens() normalizes the balance on the next limiter operation.
-// No low-token notification is needed because refunding moves the balance
-// away from the low-token threshold, never toward it.
+// No burst cap is applied here
 func (lim *Limiter) RefundTokens(now time.Time, amount float64) {
 	lim.mu.Lock()
 	defer lim.mu.Unlock()
diff --git a/client/resource_group/controller/metrics/metrics.go b/client/resource_group/controller/metrics/metrics.go
index 5bff7281f9e..7e95cb84680 100644
--- a/client/resource_group/controller/metrics/metrics.go
+++ b/client/resource_group/controller/metrics/metrics.go
@@ -54,29 +54,13 @@ var (
 	// SuccessfulTokenRequestDuration comments placeholder, WithLabelValues is a heavy operation, define variable to avoid call it every time.
 	SuccessfulTokenRequestDuration prometheus.Observer
 
-	// PagingPrechargeCounter counts paging pre-charge events (predicted hint
-	// present and > 0), per resource group. Cold starts and unhinted requests
-	// are not pre-charged and therefore not counted here.
-	PagingPrechargeCounter *prometheus.CounterVec
-	// PagingPrechargeBytesCounter sums bytes used as the pre-charge basis.
-	PagingPrechargeBytesCounter *prometheus.CounterVec
-	// PagingActualBytesCounter sums actual read bytes reported after the KV
-	// RPC for pre-charged requests. Ratio against PagingPrechargeBytesCounter
-	// reveals over/under-charge factor.
-	PagingActualBytesCounter *prometheus.CounterVec
-	// PagingPredictionResidualBytes observes (actual - predicted) bytes for
-	// pre-charged requests. Shows EMA prediction accuracy.
+	// Paging pre-charge metrics, per resource group. See each metric's
+	// Help string below for semantics.
+	PagingPrechargeCounter        *prometheus.CounterVec
+	PagingPrechargeBytesCounter   *prometheus.CounterVec
+	PagingActualBytesCounter      *prometheus.CounterVec
 	PagingPredictionResidualBytes *prometheus.HistogramVec
-
-	// PagingNonprechargeCounter counts RPCs that implemented the predicted
-	// read-bytes interface but reported 0 (e.g. EMA cold-start) and therefore
-	// ran without pre-charge. Paired with PagingPrechargeCounter this yields
-	// the cold/ready RPC split from the PD side.
-	PagingNonprechargeCounter *prometheus.CounterVec
-	// PagingNonprechargeActualBytes sums the actual read bytes of RPCs that
-	// skipped pre-charge. Quantifies how much read volume bypassed pre-charge
-	// throttling — the "cold window" signal for token-bucket pressure
-	// analysis.
+	PagingNonprechargeCounter     *prometheus.CounterVec
 	PagingNonprechargeActualBytes *prometheus.CounterVec
 )
 
diff --git a/client/resource_group/controller/model.go b/client/resource_group/controller/model.go
index f93a4d3fb1e..3b4e8294393 100644
--- a/client/resource_group/controller/model.go
+++ b/client/resource_group/controller/model.go
@@ -54,25 +54,18 @@ type RequestInfo interface {
 	AccessLocationType() AccessLocationType
 }
 
-// predictedReadBytesProvider is an optional interface that a RequestInfo
-// implementation may satisfy to supply a learned estimate (e.g., from a
-// per-logical-scan EMA maintained in TiDB) of how many bytes the request
-// will read. When present and > 0, it is used as the byte basis for RC
-// paging pre-charge; when absent or zero the request is not pre-charged
-// and will be billed in AfterKVRequest by actual read bytes only.
+// predictedReadBytesProvider is an optional interface a RequestInfo may
+// satisfy to supply a read-bytes estimate. When PredictedReadBytes > 0
+// it is the byte basis for RC paging pre-charge in BeforeKVRequest and
+// settled symmetrically in AfterKVRequest; otherwise the request is
+// billed by actual read bytes at settlement only.
 //
-// Defined as an optional interface (not a method on RequestInfo) so older
-// RequestInfo implementations that have not been updated still compile;
-// they simply skip pre-charge.
+// Optional (not a method on RequestInfo) so existing implementations
+// keep compiling.
 type predictedReadBytesProvider interface {
 	PredictedReadBytes() uint64
 }
 
-// estimatedReadBytes returns the byte basis used for RC paging pre-charge,
-// or 0 to skip pre-charge entirely. Only a learned PredictedReadBytes hint
-// is honored; the protocol-level paging byte cap is intentionally not
-// consulted here so the paging cap and the RU-billing pre-charge stay
-// decoupled.
 func estimatedReadBytes(req RequestInfo) uint64 {
 	if p, ok := req.(predictedReadBytesProvider); ok {
 		return p.PredictedReadBytes()
@@ -130,11 +123,7 @@ func (kc *KVCalculator) BeforeKVRequest(consumption *rmpb.Consumption, req Reque
 		// Read bytes could not be known before the request is executed,
 		// so we only add the base cost here.
 		consumption.RRU += float64(kc.ReadBaseCost) + float64(kc.ReadPerBatchBaseCost)*defaultAvgBatchProportion
-		// RC Paging pre-charge: pre-charge the learned read-bytes RU so that
-		// concurrent workers are throttled at BeforeKVRequest instead of all
-		// hitting AfterKVRequest settlement at once. Only applies when the
-		// caller supplies a PredictedReadBytes hint; without it the request
-		// is billed in AfterKVRequest by actual read bytes only.
+		// Paging pre-charge
 		if bytesForEst := estimatedReadBytes(req); bytesForEst > 0 {
 			consumption.RRU += float64(kc.ReadBytesCost) * float64(bytesForEst)
 		}
@@ -172,9 +161,7 @@ func (kc *KVCalculator) AfterKVRequest(consumption *rmpb.Consumption, req Reques
 	if !req.IsWrite() {
 		// For now, we can only collect the KV CPU cost for a read request.
 		kc.calculateCPUCost(consumption, res)
-		// RC Paging settlement: subtract the pre-charged bytes RU added in
-		// BeforeKVRequest so the net total (pre-charge + settlement) equals
-		// baseCost + actualCost. Must mirror the basis used there.
+		// Paging settlement
 		if bytesForEst := estimatedReadBytes(req); bytesForEst > 0 {
 			consumption.RRU -= float64(kc.ReadBytesCost) * float64(bytesForEst)
 		}

From 6935b5763bb639840177994e254eb26845563c20 Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 14:46:44 +0800
Subject: [PATCH 05/12] client/resource_group: widen paging residual histogram
 to +/-64MB

The previous +/-4MB range clipped large first-page responses on cold
copIterators (predicted=0 -> residual == actual, commonly several MB)
and workload shifts where EMA sits above the current actual. Extending
both ends by two factor-4 steps keeps the same near-zero resolution
while making P95/P99 readable up to the TiKV paging cap.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 client/resource_group/controller/metrics/metrics.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/client/resource_group/controller/metrics/metrics.go b/client/resource_group/controller/metrics/metrics.go
index 7e95cb84680..b0819b3fad4 100644
--- a/client/resource_group/controller/metrics/metrics.go
+++ b/client/resource_group/controller/metrics/metrics.go
@@ -198,9 +198,11 @@ func initMetrics(constLabels prometheus.Labels) {
 			Namespace: namespace,
 			Subsystem: requestSubsystem,
 			Name:      "paging_prediction_residual_bytes",
-			// Signed residual = actual - predicted. Buckets cover both directions
-			// up to the typical paging budget (a few MB).
-			Buckets:     []float64{-4194304, -1048576, -262144, -65536, -16384, -4096, 0, 4096, 16384, 65536, 262144, 1048576, 4194304},
+			// Signed residual = actual - predicted. Buckets span ±64MB to
+			// absorb large first-page responses on a cold copIterator
+			// (predicted=0) and workload shifts that leave EMA above
+			// actual. Factor-4 spacing keeps resolution near zero.
+			Buckets: []float64{-67108864, -16777216, -4194304, -1048576, -262144, -65536, -16384, -4096, 0, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864},
 			Help:        "Histogram of (actual_read_bytes - predicted_read_bytes) for pre-charged requests. Shows EMA prediction accuracy.",
 			ConstLabels: constLabels,
 		}, []string{newResourceGroupNameLabel})

From 176454909a63dbbf01097b29e38f8b9ac2d25d41 Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 14:54:52 +0800
Subject: [PATCH 06/12] metrics: drop EMA/cold-start phrasing from paging
 metric Help text

EMA is a TiDB-side implementation detail; PD's metric Help text should
describe what the metric observes in terms of the hint contract.
paging_nonprecharge_* also fires when hint is absent entirely (not just
when the predictor produced 0), so reword to say so.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 client/resource_group/controller/metrics/metrics.go | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/client/resource_group/controller/metrics/metrics.go b/client/resource_group/controller/metrics/metrics.go
index b0819b3fad4..53f45578da8 100644
--- a/client/resource_group/controller/metrics/metrics.go
+++ b/client/resource_group/controller/metrics/metrics.go
@@ -199,11 +199,12 @@ func initMetrics(constLabels prometheus.Labels) {
 			Subsystem: requestSubsystem,
 			Name:      "paging_prediction_residual_bytes",
 			// Signed residual = actual - predicted. Buckets span ±64MB to
-			// absorb large first-page responses on a cold copIterator
-			// (predicted=0) and workload shifts that leave EMA above
-			// actual. Factor-4 spacing keeps resolution near zero.
+			// absorb large first-page responses when the predictor has no
+			// prior observation (predicted=0) and workload shifts that
+			// leave the prediction above actual. Factor-4 spacing keeps
+			// resolution near zero.
 			Buckets: []float64{-67108864, -16777216, -4194304, -1048576, -262144, -65536, -16384, -4096, 0, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864},
-			Help:        "Histogram of (actual_read_bytes - predicted_read_bytes) for pre-charged requests. Shows EMA prediction accuracy.",
+			Help:        "Histogram of (actual_read_bytes - predicted_read_bytes) for pre-charged paging requests. Shows predictor accuracy.",
 			ConstLabels: constLabels,
 		}, []string{newResourceGroupNameLabel})
 
@@ -212,7 +213,7 @@ func initMetrics(constLabels prometheus.Labels) {
 			Namespace:   namespace,
 			Subsystem:   requestSubsystem,
 			Name:        "paging_nonprecharge_total",
-			Help:        "Counter of RC paging RPCs that implemented the predicted hint but reported 0 (e.g. EMA cold-start) and ran without pre-charge.",
+			Help:        "Counter of paging read RPCs where no PredictedReadBytes hint was provided (hint absent or zero). These RPCs skip pre-charge and settle entirely on actual read bytes at response time.",
 			ConstLabels: constLabels,
 		}, []string{newResourceGroupNameLabel})
 
@@ -221,7 +222,7 @@ func initMetrics(constLabels prometheus.Labels) {
 			Namespace:   namespace,
 			Subsystem:   requestSubsystem,
 			Name:        "paging_nonprecharge_actual_bytes_total",
-			Help:        "Sum of actual bytes read by paging RPCs that skipped pre-charge (hint=0). Quantifies cold-window read volume bypassing pre-charge throttling.",
+			Help:        "Sum of actual bytes read by paging RPCs that skipped pre-charge (hint absent or zero). Quantifies read volume settled without pre-charge throttling.",
 			ConstLabels: constLabels,
 		}, []string{newResourceGroupNameLabel})
 }

From 8308651ada2056e55579066f391364503d2504bd Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 15:42:41 +0800
Subject: [PATCH 07/12] controller: add paging RU observability metrics

Mirror the existing bytes-dimension paging metrics in RU units:

- paging_precharge_ru_total: RU pre-acquired at BeforeKVRequest for
  pre-charged paging read requests.
- paging_settlement_ru_total: full RU finally consumed per pre-charged
  paging read request (base + CPU + ReadBytesCost * actual_bytes).
- paging_settlement_ru_delta: histogram of signed per-RPC delta
  (settlement_ru - precharge_ru); negative bucket = RefundTokens flow,
  positive = RemoveTokens/acquireTokens flow.

The histogram captures the per-RPC settlement magnitude and direction,
which cannot be reconstructed from the two aggregate counters alone
(sum and max(0,-v) don't commute).

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/group_controller.go            | 53 +++++++++++--------
 .../controller/metrics/metrics.go             | 37 +++++++++++++
 2 files changed, 69 insertions(+), 21 deletions(-)

diff --git a/client/resource_group/controller/group_controller.go b/client/resource_group/controller/group_controller.go
index ae041f60fdb..b043c7c33d4 100644
--- a/client/resource_group/controller/group_controller.go
+++ b/client/resource_group/controller/group_controller.go
@@ -115,6 +115,9 @@ type groupMetricsCollection struct {
 	predictionResidualBytes prometheus.Observer
 	nonprechargeCounter     prometheus.Counter
 	nonprechargeActualBytes prometheus.Counter
+	prechargeRU             prometheus.Counter
+	settlementRU            prometheus.Counter
+	settlementRUDelta       prometheus.Observer
 }
 
 func initMetrics(oldName, name string) *groupMetricsCollection {
@@ -139,19 +142,29 @@ func initMetrics(oldName, name string) *groupMetricsCollection {
 
 		nonprechargeCounter:     metrics.PagingNonprechargeCounter.WithLabelValues(name),
 		nonprechargeActualBytes: metrics.PagingNonprechargeActualBytes.WithLabelValues(name),
+		prechargeRU:             metrics.PagingPrechargeRU.WithLabelValues(name),
+		settlementRU:            metrics.PagingSettlementRU.WithLabelValues(name),
+		settlementRUDelta:       metrics.PagingSettlementRUDelta.WithLabelValues(name),
 	}
 }
 
 // observePagingPrecharge requires bytesForEst > 0.
-func (gmc *groupMetricsCollection) observePagingPrecharge(bytesForEst uint64) {
+// prechargeRU is the total RU pre-acquired at BeforeKVRequest
+// (base + ReadBytesCost * bytesForEst).
+func (gmc *groupMetricsCollection) observePagingPrecharge(bytesForEst uint64, prechargeRU float64) {
 	gmc.prechargeCounter.Inc()
 	gmc.prechargeBytesCounter.Add(float64(bytesForEst))
+	gmc.prechargeRU.Add(prechargeRU)
 }
 
 // observePagingActual requires predicted > 0.
-func (gmc *groupMetricsCollection) observePagingActual(predicted, actual uint64) {
+// settlementRU is the total RU finally consumed (base + CPU + ReadBytesCost * actual);
+// vDelta is (settlement_ru - precharge_ru), the signed per-RPC RU adjustment.
+func (gmc *groupMetricsCollection) observePagingActual(predicted, actual uint64, settlementRU, vDelta float64) {
 	gmc.actualBytesCounter.Add(float64(actual))
 	gmc.predictionResidualBytes.Observe(float64(actual) - float64(predicted))
+	gmc.settlementRU.Add(settlementRU)
+	gmc.settlementRUDelta.Observe(vDelta)
 }
 
 func (gmc *groupMetricsCollection) observePagingNonprecharge(actual uint64) {
@@ -586,7 +599,7 @@ func (gc *groupCostController) onRequestWaitImpl(
 		calc.BeforeKVRequest(delta, info)
 	}
 	if bytesForEst := estimatedReadBytes(info); bytesForEst > 0 {
-		gc.metrics.observePagingPrecharge(bytesForEst)
+		gc.metrics.observePagingPrecharge(bytesForEst, getRUValueFromConsumption(delta))
 	}
 
 	gc.mu.Lock()
@@ -638,8 +651,15 @@ func (gc *groupCostController) onResponseImpl(
 	for _, calc := range gc.calculators {
 		calc.AfterKVRequest(delta, req, resp)
 	}
+	// `count` is the full per-request consumption (BeforeKVRequest + AfterKVRequest).
+	count := &rmpb.Consumption{}
+	*count = *delta
+	for _, calc := range gc.calculators {
+		calc.BeforeKVRequest(count, req)
+	}
 	if bytesForEst := estimatedReadBytes(req); bytesForEst > 0 {
-		gc.metrics.observePagingActual(bytesForEst, resp.ReadBytes())
+		gc.metrics.observePagingActual(bytesForEst, resp.ReadBytes(),
+			getRUValueFromConsumption(count), getRUValueFromConsumption(delta))
 	} else if !req.IsWrite() {
 		if _, ok := req.(predictedReadBytesProvider); ok {
 			gc.metrics.observePagingNonprecharge(resp.ReadBytes())
@@ -656,15 +676,7 @@ func (gc *groupCostController) onResponseImpl(
 	}
 
 	gc.mu.Lock()
-	// Record the consumption of the request
 	add(gc.mu.consumption, delta)
-	// Record the consumption of the request by store
-	count := &rmpb.Consumption{}
-	*count = *delta
-	// As the penalty is only counted when the request is completed, so here needs to calculate the write cost which is added in `BeforeKVRequest`
-	for _, calc := range gc.calculators {
-		calc.BeforeKVRequest(count, req)
-	}
 	add(gc.mu.storeCounter[req.StoreID()], count)
 	add(gc.mu.globalCounter, count)
 	gc.mu.Unlock()
@@ -679,8 +691,15 @@ func (gc *groupCostController) onResponseWaitImpl(
 	for _, calc := range gc.calculators {
 		calc.AfterKVRequest(delta, req, resp)
 	}
+	// `count` is the full per-request consumption (BeforeKVRequest + AfterKVRequest).
+	count := &rmpb.Consumption{}
+	*count = *delta
+	for _, calc := range gc.calculators {
+		calc.BeforeKVRequest(count, req)
+	}
 	if bytesForEst := estimatedReadBytes(req); bytesForEst > 0 {
-		gc.metrics.observePagingActual(bytesForEst, resp.ReadBytes())
+		gc.metrics.observePagingActual(bytesForEst, resp.ReadBytes(),
+			getRUValueFromConsumption(count), getRUValueFromConsumption(delta))
 	} else if !req.IsWrite() {
 		if _, ok := req.(predictedReadBytesProvider); ok {
 			gc.metrics.observePagingNonprecharge(resp.ReadBytes())
@@ -710,15 +729,7 @@ func (gc *groupCostController) onResponseWaitImpl(
 	}
 
 	gc.mu.Lock()
-	// Record the consumption of the request
 	add(gc.mu.consumption, delta)
-	// Record the consumption of the request by store
-	count := &rmpb.Consumption{}
-	*count = *delta
-	// As the penalty is only counted when the request is completed, so here needs to calculate the write cost which is added in `BeforeKVRequest`
-	for _, calc := range gc.calculators {
-		calc.BeforeKVRequest(count, req)
-	}
 	add(gc.mu.storeCounter[req.StoreID()], count)
 	add(gc.mu.globalCounter, count)
 	gc.mu.Unlock()
diff --git a/client/resource_group/controller/metrics/metrics.go b/client/resource_group/controller/metrics/metrics.go
index 53f45578da8..19c59808494 100644
--- a/client/resource_group/controller/metrics/metrics.go
+++ b/client/resource_group/controller/metrics/metrics.go
@@ -62,6 +62,9 @@ var (
 	PagingPredictionResidualBytes *prometheus.HistogramVec
 	PagingNonprechargeCounter     *prometheus.CounterVec
 	PagingNonprechargeActualBytes *prometheus.CounterVec
+	PagingPrechargeRU             *prometheus.CounterVec
+	PagingSettlementRU            *prometheus.CounterVec
+	PagingSettlementRUDelta       *prometheus.HistogramVec
 )
 
 func init() {
@@ -225,6 +228,37 @@ func initMetrics(constLabels prometheus.Labels) {
 			Help:        "Sum of actual bytes read by paging RPCs that skipped pre-charge (hint absent or zero). Quantifies read volume settled without pre-charge throttling.",
 			ConstLabels: constLabels,
 		}, []string{newResourceGroupNameLabel})
+
+	PagingPrechargeRU = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_precharge_ru_total",
+			Help:        "Sum of RU pre-acquired at BeforeKVRequest for pre-charged paging read requests (read base cost + ReadBytesCost * predicted_bytes).",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
+	PagingSettlementRU = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_settlement_ru_total",
+			Help:        "Sum of total RU finally consumed by pre-charged paging read requests (read base cost + CPUMsCost * kv_cpu_ms + ReadBytesCost * actual_bytes). Equals precharge_ru + settlement_ru_delta.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
+	PagingSettlementRUDelta = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Namespace: namespace,
+			Subsystem: requestSubsystem,
+			Name:      "paging_settlement_ru_delta",
+			// v = settlement_ru - precharge_ru = CPUMsCost * kv_cpu_ms + ReadBytesCost * (actual - predicted).
+			// Negative => flows through RefundTokens; positive => flows through RemoveTokens / acquireTokens.
+			// Factor-4 spacing over ±1024 RU covers CPU (±tens of RU) plus bytes residuals up to ±64MB.
+			Buckets:     []float64{-1024, -256, -64, -16, -4, -1, -0.25, -0.0625, 0, 0.0625, 0.25, 1, 4, 16, 64, 256, 1024},
+			Help:        "Per-RPC signed settlement RU delta (settlement_ru - precharge_ru) for pre-charged paging read requests. Negative means refund, positive means extra debit.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
 }
 
 // InitAndRegisterMetrics initializes and register metrics.
@@ -246,4 +280,7 @@ func InitAndRegisterMetrics(constLabels prometheus.Labels) {
 	prometheus.MustRegister(PagingPredictionResidualBytes)
 	prometheus.MustRegister(PagingNonprechargeCounter)
 	prometheus.MustRegister(PagingNonprechargeActualBytes)
+	prometheus.MustRegister(PagingPrechargeRU)
+	prometheus.MustRegister(PagingSettlementRU)
+	prometheus.MustRegister(PagingSettlementRUDelta)
 }

From ded66bf8f5d8d67e85b1ce7642d4a80efa5d4c7d Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 15:44:06 +0800
Subject: [PATCH 08/12] metrics: group paging metrics by sampling unit and add
 doc comments

Order as count -> bytes -> RU, matching the conceptual grouping of
sampling units. Add a one-line doc comment to each exported metric to
satisfy revive lint.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/metrics/metrics.go             | 69 +++++++++++--------
 1 file changed, 39 insertions(+), 30 deletions(-)

diff --git a/client/resource_group/controller/metrics/metrics.go b/client/resource_group/controller/metrics/metrics.go
index 19c59808494..9130a02a692 100644
--- a/client/resource_group/controller/metrics/metrics.go
+++ b/client/resource_group/controller/metrics/metrics.go
@@ -54,17 +54,26 @@ var (
 	// SuccessfulTokenRequestDuration comments placeholder, WithLabelValues is a heavy operation, define variable to avoid call it every time.
 	SuccessfulTokenRequestDuration prometheus.Observer
 
-	// Paging pre-charge metrics, per resource group. See each metric's
-	// Help string below for semantics.
-	PagingPrechargeCounter        *prometheus.CounterVec
-	PagingPrechargeBytesCounter   *prometheus.CounterVec
-	PagingActualBytesCounter      *prometheus.CounterVec
-	PagingPredictionResidualBytes *prometheus.HistogramVec
-	PagingNonprechargeCounter     *prometheus.CounterVec
+	// PagingPrechargeCounter counts paging read requests that triggered pre-charge (hint > 0).
+	PagingPrechargeCounter *prometheus.CounterVec
+	// PagingNonprechargeCounter counts paging read requests that skipped pre-charge (hint absent or zero).
+	PagingNonprechargeCounter *prometheus.CounterVec
+
+	// PagingPrechargeBytesCounter accumulates bytes used as the pre-charge basis (sum of predicted hints).
+	PagingPrechargeBytesCounter *prometheus.CounterVec
+	// PagingActualBytesCounter accumulates actual bytes read by pre-charged paging requests.
+	PagingActualBytesCounter *prometheus.CounterVec
+	// PagingNonprechargeActualBytes accumulates actual bytes read by paging requests that skipped pre-charge.
 	PagingNonprechargeActualBytes *prometheus.CounterVec
-	PagingPrechargeRU             *prometheus.CounterVec
-	PagingSettlementRU            *prometheus.CounterVec
-	PagingSettlementRUDelta       *prometheus.HistogramVec
+	// PagingPredictionResidualBytes records the distribution of (actual - predicted) read bytes for pre-charged requests.
+	PagingPredictionResidualBytes *prometheus.HistogramVec
+
+	// PagingPrechargeRU accumulates RU pre-acquired at BeforeKVRequest for pre-charged paging read requests.
+	PagingPrechargeRU *prometheus.CounterVec
+	// PagingSettlementRU accumulates total RU finally consumed by pre-charged paging read requests (base + CPU + bytes cost).
+	PagingSettlementRU *prometheus.CounterVec
+	// PagingSettlementRUDelta records the distribution of per-RPC signed RU delta (settlement_ru - precharge_ru) for pre-charged paging read requests.
+	PagingSettlementRUDelta *prometheus.HistogramVec
 )
 
 func init() {
@@ -178,6 +187,15 @@ func initMetrics(constLabels prometheus.Labels) {
 			ConstLabels: constLabels,
 		}, []string{newResourceGroupNameLabel})
 
+	PagingNonprechargeCounter = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_nonprecharge_total",
+			Help:        "Counter of paging read RPCs where no PredictedReadBytes hint was provided (hint absent or zero). These RPCs skip pre-charge and settle entirely on actual read bytes at response time.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
 	PagingPrechargeBytesCounter = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Namespace:   namespace,
@@ -196,6 +214,15 @@ func initMetrics(constLabels prometheus.Labels) {
 			ConstLabels: constLabels,
 		}, []string{newResourceGroupNameLabel})
 
+	PagingNonprechargeActualBytes = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace:   namespace,
+			Subsystem:   requestSubsystem,
+			Name:        "paging_nonprecharge_actual_bytes_total",
+			Help:        "Sum of actual bytes read by paging RPCs that skipped pre-charge (hint absent or zero). Quantifies read volume settled without pre-charge throttling.",
+			ConstLabels: constLabels,
+		}, []string{newResourceGroupNameLabel})
+
 	PagingPredictionResidualBytes = prometheus.NewHistogramVec(
 		prometheus.HistogramOpts{
 			Namespace: namespace,
@@ -211,24 +238,6 @@ func initMetrics(constLabels prometheus.Labels) {
 			ConstLabels: constLabels,
 		}, []string{newResourceGroupNameLabel})
 
-	PagingNonprechargeCounter = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Namespace:   namespace,
-			Subsystem:   requestSubsystem,
-			Name:        "paging_nonprecharge_total",
-			Help:        "Counter of paging read RPCs where no PredictedReadBytes hint was provided (hint absent or zero). These RPCs skip pre-charge and settle entirely on actual read bytes at response time.",
-			ConstLabels: constLabels,
-		}, []string{newResourceGroupNameLabel})
-
-	PagingNonprechargeActualBytes = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Namespace:   namespace,
-			Subsystem:   requestSubsystem,
-			Name:        "paging_nonprecharge_actual_bytes_total",
-			Help:        "Sum of actual bytes read by paging RPCs that skipped pre-charge (hint absent or zero). Quantifies read volume settled without pre-charge throttling.",
-			ConstLabels: constLabels,
-		}, []string{newResourceGroupNameLabel})
-
 	PagingPrechargeRU = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Namespace:   namespace,
@@ -275,11 +284,11 @@ func InitAndRegisterMetrics(constLabels prometheus.Labels) {
 	prometheus.MustRegister(LowTokenRequestNotifyCounter)
 	prometheus.MustRegister(TokenConsumedHistogram)
 	prometheus.MustRegister(PagingPrechargeCounter)
+	prometheus.MustRegister(PagingNonprechargeCounter)
 	prometheus.MustRegister(PagingPrechargeBytesCounter)
 	prometheus.MustRegister(PagingActualBytesCounter)
-	prometheus.MustRegister(PagingPredictionResidualBytes)
-	prometheus.MustRegister(PagingNonprechargeCounter)
 	prometheus.MustRegister(PagingNonprechargeActualBytes)
+	prometheus.MustRegister(PagingPredictionResidualBytes)
 	prometheus.MustRegister(PagingPrechargeRU)
 	prometheus.MustRegister(PagingSettlementRU)
 	prometheus.MustRegister(PagingSettlementRUDelta)

From bdec70a600bcda1750d30ba82837fee0fb849937 Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 17:58:17 +0800
Subject: [PATCH 09/12] client/resource_group: gate paging hint on read
 requests only

Move the !IsWrite() guard into estimatedReadBytes so paging pre-charge,
settlement, and metric observations stay symmetric even if a future
write type implements the optional predictedReadBytesProvider.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 client/resource_group/controller/model.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/client/resource_group/controller/model.go b/client/resource_group/controller/model.go
index 3b4e8294393..fb44fdf6c69 100644
--- a/client/resource_group/controller/model.go
+++ b/client/resource_group/controller/model.go
@@ -66,7 +66,13 @@ type predictedReadBytesProvider interface {
 	PredictedReadBytes() uint64
 }
 
+// estimatedReadBytes returns the predicted read-bytes hint for read requests.
+// Write requests always get 0 so paging pre-charge / settlement / metrics stay
+// gated to reads even if a future write type implements the optional interface.
 func estimatedReadBytes(req RequestInfo) uint64 {
+	if req.IsWrite() {
+		return 0
+	}
 	if p, ok := req.(predictedReadBytesProvider); ok {
 		return p.PredictedReadBytes()
 	}

From 8063dfe49e75bcc7ef2b90224fb47d73ff06f80e Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 17:59:30 +0800
Subject: [PATCH 10/12] client/resource_group: test paging refund on failed
 read

Failed reads with a paging hint still go through AfterKVRequest (proven
by the existing write !res.Succeed() payBackWriteCost branch), so the
paging settlement subtracts ReadBytesCost*predicted and the resulting
negative delta flows through RefundTokens. ReadBaseCost is intentionally
not refunded, matching existing non-paging read failure behavior.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/group_controller_test.go       | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/client/resource_group/controller/group_controller_test.go b/client/resource_group/controller/group_controller_test.go
index 66325f33d36..424c183f268 100644
--- a/client/resource_group/controller/group_controller_test.go
+++ b/client/resource_group/controller/group_controller_test.go
@@ -368,6 +368,48 @@ func TestOnResponseImplPagingRefund(t *testing.T) {
 		"onResponseImpl should refund excess pre-charged tokens")
 }
 
+func TestPagingPreChargeRefundOnFailedRead(t *testing.T) {
+	re := require.New(t)
+	gc := createTestGroupCostController(re)
+
+	initialTokens := float64(100000)
+	gc.run.requestUnitTokens.limiter.Reconfigure(time.Now(), tokenBucketReconfigureArgs{
+		newTokens:   initialTokens,
+		newFillRate: 0,
+		newBurst:    0,
+	})
+
+	predictedReadBytes := uint64(4 * 1024 * 1024) // 4 MB pre-charge
+
+	req := &TestRequestInfo{
+		isWrite:            false,
+		predictedReadBytes: predictedReadBytes,
+	}
+	// Failed response: no bytes read, no CPU consumed, succeed=false.
+	resp := &TestResponseInfo{
+		readBytes: 0,
+		kvCPU:     0,
+		succeed:   false,
+	}
+
+	_, _, _, _, err := gc.onRequestWaitImpl(context.TODO(), req)
+	re.NoError(err)
+	tokensAfterPreCharge := gc.run.requestUnitTokens.limiter.AvailableTokens(time.Now())
+
+	_, _, err = gc.onResponseWaitImpl(context.TODO(), req, resp)
+	re.NoError(err)
+	tokensAfterSettlement := gc.run.requestUnitTokens.limiter.AvailableTokens(time.Now())
+
+	// On a failed read, AfterKVRequest still runs: paging settlement subtracts
+	// ReadBytesCost*predicted and calculateReadCost adds 0, yielding a negative
+	// delta that flows through RefundTokens. ReadBaseCost is not refunded,
+	// matching existing behavior for non-paging read failures.
+	cfg := DefaultRUConfig()
+	expectedRefund := float64(cfg.ReadBytesCost) * float64(predictedReadBytes)
+	re.InDelta(tokensAfterPreCharge+expectedRefund, tokensAfterSettlement, 1.0,
+		"failed read with paging hint should refund ReadBytesCost*predicted")
+}
+
 func TestNoPreChargeWithoutPredictedReadBytes(t *testing.T) {
 	re := require.New(t)
 	cfg := DefaultRUConfig()

From 1818262e8dabed307cda47e9417b97ab051f3491 Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 18:12:51 +0800
Subject: [PATCH 11/12] client/resource_group: promote PredictedReadBytes to
 RequestInfo method

Drop the predictedReadBytesProvider optional interface and the type
assertion guards at the paging metric observation sites. The hint is now
part of the RequestInfo contract; client-go and tidb are upgraded
simultaneously so no back-compat shim is needed.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/group_controller.go            |  8 ++-----
 client/resource_group/controller/model.go     | 23 +++++--------------
 client/resource_group/controller/testutil.go  |  3 +--
 3 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/client/resource_group/controller/group_controller.go b/client/resource_group/controller/group_controller.go
index b043c7c33d4..8e0025dd03d 100644
--- a/client/resource_group/controller/group_controller.go
+++ b/client/resource_group/controller/group_controller.go
@@ -661,9 +661,7 @@ func (gc *groupCostController) onResponseImpl(
 		gc.metrics.observePagingActual(bytesForEst, resp.ReadBytes(),
 			getRUValueFromConsumption(count), getRUValueFromConsumption(delta))
 	} else if !req.IsWrite() {
-		if _, ok := req.(predictedReadBytesProvider); ok {
-			gc.metrics.observePagingNonprecharge(resp.ReadBytes())
-		}
+		gc.metrics.observePagingNonprecharge(resp.ReadBytes())
 	}
 	if !gc.burstable.Load() {
 		counter := gc.run.requestUnitTokens
@@ -701,9 +699,7 @@ func (gc *groupCostController) onResponseWaitImpl(
 		gc.metrics.observePagingActual(bytesForEst, resp.ReadBytes(),
 			getRUValueFromConsumption(count), getRUValueFromConsumption(delta))
 	} else if !req.IsWrite() {
-		if _, ok := req.(predictedReadBytesProvider); ok {
-			gc.metrics.observePagingNonprecharge(resp.ReadBytes())
-		}
+		gc.metrics.observePagingNonprecharge(resp.ReadBytes())
 	}
 	var waitDuration time.Duration
 	if !gc.burstable.Load() {
diff --git a/client/resource_group/controller/model.go b/client/resource_group/controller/model.go
index fb44fdf6c69..62730dcbcad 100644
--- a/client/resource_group/controller/model.go
+++ b/client/resource_group/controller/model.go
@@ -52,31 +52,20 @@ type RequestInfo interface {
 	StoreID() uint64
 	RequestSize() uint64
 	AccessLocationType() AccessLocationType
-}
-
-// predictedReadBytesProvider is an optional interface a RequestInfo may
-// satisfy to supply a read-bytes estimate. When PredictedReadBytes > 0
-// it is the byte basis for RC paging pre-charge in BeforeKVRequest and
-// settled symmetrically in AfterKVRequest; otherwise the request is
-// billed by actual read bytes at settlement only.
-//
-// Optional (not a method on RequestInfo) so existing implementations
-// keep compiling.
-type predictedReadBytesProvider interface {
+	// PredictedReadBytes returns the read-bytes hint used for RC paging
+	// pre-charge in BeforeKVRequest and settled symmetrically in
+	// AfterKVRequest. Return 0 to opt out; writes always return 0.
 	PredictedReadBytes() uint64
 }
 
 // estimatedReadBytes returns the predicted read-bytes hint for read requests.
-// Write requests always get 0 so paging pre-charge / settlement / metrics stay
-// gated to reads even if a future write type implements the optional interface.
+// Writes always return 0 so paging pre-charge / settlement / metrics stay
+// gated to reads.
 func estimatedReadBytes(req RequestInfo) uint64 {
 	if req.IsWrite() {
 		return 0
 	}
-	if p, ok := req.(predictedReadBytesProvider); ok {
-		return p.PredictedReadBytes()
-	}
-	return 0
+	return req.PredictedReadBytes()
 }
 
 // ResponseInfo is the interface of the response information provider. A response should be
diff --git a/client/resource_group/controller/testutil.go b/client/resource_group/controller/testutil.go
index 368fb933b3a..c14d9eea342 100644
--- a/client/resource_group/controller/testutil.go
+++ b/client/resource_group/controller/testutil.go
@@ -71,8 +71,7 @@ func (tri *TestRequestInfo) AccessLocationType() AccessLocationType {
 	return tri.accessType
 }
 
-// PredictedReadBytes implements the optional predictedReadBytesProvider
-// interface so tests can exercise the RC paging pre-charge hint path.
+// PredictedReadBytes implements the RequestInfo interface.
 func (tri *TestRequestInfo) PredictedReadBytes() uint64 {
 	return tri.predictedReadBytes
 }

From 7a38b73dddb91a78c75f1feb9f902cf5d5532866 Mon Sep 17 00:00:00 2001
From: Yuhao Zhang <yhzhang00@outlook.com>
Date: Wed, 22 Apr 2026 18:24:26 +0800
Subject: [PATCH 12/12] client/resource_group: delay paging precharge metrics
 until acquire succeeds

Observing the precharge before acquireTokens lets throttled and
ctx-cancelled requests inflate PagingPrechargeCounter /
PagingPrechargeBytesCounter / PagingPrechargeRU with no matching
settlement sample on the response side (since OnResponse is never
called for those requests). Delay the observation until after
acquireTokens returns nil, matching the unconditional observation of
paging_actual metrics in onResponse{,Wait}Impl. Burstable mode still
observes, preserving symmetry with the burstable-agnostic settlement
side.

Signed-off-by: Yuhao Zhang <yhzhang00@outlook.com>
---
 .../controller/group_controller.go            |  6 ++--
 .../controller/group_controller_test.go       | 31 +++++++++++++++++++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/client/resource_group/controller/group_controller.go b/client/resource_group/controller/group_controller.go
index 8e0025dd03d..8394533b227 100644
--- a/client/resource_group/controller/group_controller.go
+++ b/client/resource_group/controller/group_controller.go
@@ -598,9 +598,6 @@ func (gc *groupCostController) onRequestWaitImpl(
 	for _, calc := range gc.calculators {
 		calc.BeforeKVRequest(delta, info)
 	}
-	if bytesForEst := estimatedReadBytes(info); bytesForEst > 0 {
-		gc.metrics.observePagingPrecharge(bytesForEst, getRUValueFromConsumption(delta))
-	}
 
 	gc.mu.Lock()
 	add(gc.mu.consumption, delta)
@@ -626,6 +623,9 @@ func (gc *groupCostController) onRequestWaitImpl(
 		gc.metrics.successfulRequestDuration.Observe(d.Seconds())
 		waitDuration += d
 	}
+	if bytesForEst := estimatedReadBytes(info); bytesForEst > 0 {
+		gc.metrics.observePagingPrecharge(bytesForEst, getRUValueFromConsumption(delta))
+	}
 
 	gc.mu.Lock()
 	// Calculate the penalty of the store
diff --git a/client/resource_group/controller/group_controller_test.go b/client/resource_group/controller/group_controller_test.go
index 424c183f268..82301c16e36 100644
--- a/client/resource_group/controller/group_controller_test.go
+++ b/client/resource_group/controller/group_controller_test.go
@@ -20,6 +20,7 @@ import (
 	"testing"
 	"time"
 
+	dto "github.com/prometheus/client_model/go"
 	"github.com/stretchr/testify/require"
 
 	rmpb "github.com/pingcap/kvproto/pkg/resource_manager"
@@ -27,6 +28,12 @@ import (
 	"github.com/tikv/pd/client/errs"
 )
 
+func counterValue(re *require.Assertions, c interface{ Write(*dto.Metric) error }) float64 {
+	var m dto.Metric
+	re.NoError(c.Write(&m))
+	return m.GetCounter().GetValue()
+}
+
 func createTestGroupCostController(re *require.Assertions) *groupCostController {
 	group := &rmpb.ResourceGroup{
 		Name:     "test",
@@ -452,6 +459,30 @@ func TestResourceGroupThrottledError(t *testing.T) {
 	re.True(errs.ErrClientResourceGroupThrottled.Equal(err))
 }
 
+func TestPagingPrechargeNotObservedOnThrottle(t *testing.T) {
+	re := require.New(t)
+	gc := createTestGroupCostController(re)
+
+	// 4 GiB predicted ~= 65536 RU at default ReadCostPerByte (1/64KiB),
+	// exceeding the LTBMaxWaitDuration budget at the default FillRate so
+	// acquireTokens returns ErrClientResourceGroupThrottled.
+	req := &TestRequestInfo{
+		isWrite:            false,
+		predictedReadBytes: 4 * 1024 * 1024 * 1024,
+	}
+
+	before := counterValue(re, gc.metrics.prechargeCounter)
+	_, _, _, _, err := gc.onRequestWaitImpl(context.TODO(), req)
+	re.Error(err)
+	re.True(errs.ErrClientResourceGroupThrottled.Equal(err))
+	after := counterValue(re, gc.metrics.prechargeCounter)
+
+	// Throttled requests never reach OnResponse for settlement, so the
+	// precharge counter must not be incremented either.
+	re.Equal(before, after,
+		"throttled paging request should not inflate PagingPrechargeCounter")
+}
+
 func TestAcquireTokensSignalAwareWait(t *testing.T) {
 	re := require.New(t)