diff --git a/integration-test/scripts/token-quota.sh b/integration-test/scripts/token-quota.sh
index 48063557d..195ec998d 100755
--- a/integration-test/scripts/token-quota.sh
+++ b/integration-test/scripts/token-quota.sh
@@ -18,8 +18,8 @@ fi
 # Quota is debited from service_2 bucket on the response path so only the 4th request should be rejected
 response=$(curl -f -s -H "request-no: 4" http://envoy-proxy:8888/tokenquota)
 
-if [ $? -ne 0 ]; then
-	echo "Quota mode does not deny requests yet"
+if [ $? -eq 0 ]; then
+	echo "Quota limiting should fail the request"
 	exit 1
 fi
 
diff --git a/src/redis/fixed_cache_impl.go b/src/redis/fixed_cache_impl.go
index b1f290de3..1034d3beb 100644
--- a/src/redis/fixed_cache_impl.go
+++ b/src/redis/fixed_cache_impl.go
@@ -214,7 +214,6 @@ func (this *fixedRateLimitCacheImpl) DoLimit(
 
 		responseDescriptorStatuses[i] = this.baseRateLimiter.GetResponseDescriptorStatus(cacheKey.Key,
 			limitInfo, isOverLimitWithLocalCache[i], hitsAddends[i])
-
 	}
 
 	return responseDescriptorStatuses
diff --git a/src/service/ratelimit.go b/src/service/ratelimit.go
index 313dc7b77..f6ecd2e24 100644
--- a/src/service/ratelimit.go
+++ b/src/service/ratelimit.go
@@ -201,7 +201,6 @@ func (this *service) shouldRateLimitWorker(
 
 	response := &pb.RateLimitResponse{}
 	response.Statuses = make([]*pb.RateLimitResponse_DescriptorStatus, len(request.Descriptors))
-	finalCode := pb.RateLimitResponse_OK
 
 	// Keep track of the descriptor which is closest to hit the ratelimit
 	minLimitRemaining := MaxUint32
@@ -209,6 +208,9 @@ func (this *service) shouldRateLimitWorker(
 
 	// Track quota mode violations for metadata
 	var quotaModeViolations []int
+	failedRateLimitDescriptors := 0
+	failedQuotaDescriptors := 0
+	totalQuotaDescriptors := 0
 
 	for i, descriptorStatus := range responseDescriptorStatuses {
 		// Keep track of the descriptor closest to hit the ratelimit
@@ -226,28 +228,31 @@ func (this *service) shouldRateLimitWorker(
 			}
 		} else {
 			response.Statuses[i] = descriptorStatus
+			isQuotaMode := globalQuotaMode || (limitsToCheck[i] != nil && limitsToCheck[i].QuotaMode)
 			if descriptorStatus.Code == pb.RateLimitResponse_OVER_LIMIT {
-				// Check if this limit is in quota mode (individual or global)
-				isQuotaMode := globalQuotaMode || (limitsToCheck[i] != nil && limitsToCheck[i].QuotaMode)
-
 				if isQuotaMode {
 					// In quota mode: track the violation for metadata but keep response as OK
 					quotaModeViolations = append(quotaModeViolations, i)
-					response.Statuses[i] = &pb.RateLimitResponse_DescriptorStatus{
-						Code:           pb.RateLimitResponse_OK,
-						CurrentLimit:   descriptorStatus.CurrentLimit,
-						LimitRemaining: descriptorStatus.LimitRemaining,
-					}
+					failedQuotaDescriptors += 1
 				} else {
-					// Normal rate limit: set final code to OVER_LIMIT
-					finalCode = descriptorStatus.Code
+					failedRateLimitDescriptors += 1
 					minimumDescriptor = descriptorStatus
 					minLimitRemaining = 0
 				}
 			}
+			if isQuotaMode {
+				totalQuotaDescriptors += 1
+			}
 		}
 	}
 
+	finalCode := pb.RateLimitResponse_OK
+	// The final code is OVER_LIMIT iff at least one rate limit descriptor is over the limit
+	// or all quota descriptors are over the limit.
+	if failedRateLimitDescriptors > 0 || (totalQuotaDescriptors > 0 && totalQuotaDescriptors == failedQuotaDescriptors) {
+		finalCode = pb.RateLimitResponse_OVER_LIMIT
+	}
+
 	// Add Headers if requested
 	if this.customHeadersEnabled && minimumDescriptor != nil {
 		response.ResponseHeadersToAdd = []*core.HeaderValue{
@@ -380,6 +385,7 @@ func (this *service) ShouldRateLimit(
 	ctx context.Context,
 	request *pb.RateLimitRequest,
 ) (finalResponse *pb.RateLimitResponse, finalError error) {
+	logger.Debugf("ShouldRateLimit: %+v", request)
 	// Generate trace
 	_, span := tracer.Start(ctx, "ShouldRateLimit Execution",
 		trace.WithAttributes(
diff --git a/test/service/ratelimit_test.go b/test/service/ratelimit_test.go
index f22309348..a949f9a68 100644
--- a/test/service/ratelimit_test.go
+++ b/test/service/ratelimit_test.go
@@ -705,14 +705,14 @@ func TestServiceGlobalQuotaMode(test *testing.T) {
 		})
 	response, err := service.ShouldRateLimit(context.Background(), request)
 
-	// OK overall code even if limit response was OVER_LIMIT, because global quota mode is enabled
+	// OVER_LIMIT overall code since all quota limits were OVER_LIMIT
 	common.AssertProtoEqual(
 		t.assert,
 		&pb.RateLimitResponse{
-			OverallCode: pb.RateLimitResponse_OK,
+			OverallCode: pb.RateLimitResponse_OVER_LIMIT,
 			Statuses: []*pb.RateLimitResponse_DescriptorStatus{
-				{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
-				{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
 			},
 		},
 		response)
@@ -809,11 +809,11 @@ func TestServicePerDescriptorQuotaMode(test *testing.T) {
 	t.cache.EXPECT().DoLimit(context.Background(), request, limits).Return(
 		[]*pb.RateLimitResponse_DescriptorStatus{
 			{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
-			{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+			{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
 		})
 	response, err := service.ShouldRateLimit(context.Background(), request)
 
-	// Regular limit should cause OVER_LIMIT overall, but quota mode limit should be converted to OK
+	// Regular limit should cause OVER_LIMIT overall, even though quota mode is under the limit
 	common.AssertProtoEqual(
 		t.assert,
 		&pb.RateLimitResponse{
@@ -827,7 +827,111 @@ func TestServicePerDescriptorQuotaMode(test *testing.T) {
 	t.assert.Nil(err)
 }
 
-func TestServiceQuotaModeOnly(test *testing.T) {
+func TestServiceMixedPerDescriptorModes(test *testing.T) {
+	t := commonSetup(test)
+	defer t.controller.Finish()
+
+	// No Global Quota mode
+	service := t.setupBasicService()
+
+	request := common.NewRateLimitRequest(
+		"quota-domain", [][][2]string{{{"regular", "limit"}}, {{"quota", "limit"}}}, 1)
+
+	// Create limits with one having quota mode enabled per-descriptor
+	// In this configuration the limits will be evaluated as rate limits.
+	limits := []*config.RateLimit{
+		// Regular limit
+		{
+			FullKey:    "regular_limit",
+			Limit:      &pb.RateLimitResponse_RateLimit{RequestsPerUnit: 5, Unit: pb.RateLimitResponse_RateLimit_MINUTE},
+			QuotaMode:  false,
+			ShadowMode: false,
+		},
+		// Quota mode limit
+		{
+			FullKey:    "quota_limit",
+			Limit:      &pb.RateLimitResponse_RateLimit{RequestsPerUnit: 3, Unit: pb.RateLimitResponse_RateLimit_MINUTE},
+			QuotaMode:  true,
+			ShadowMode: false,
+		},
+	}
+
+	t.config.EXPECT().GetLimit(context.Background(), "quota-domain", request.Descriptors[0]).Return(limits[0])
+	t.config.EXPECT().GetLimit(context.Background(), "quota-domain", request.Descriptors[1]).Return(limits[1])
+	t.cache.EXPECT().DoLimit(context.Background(), request, limits).Return(
+		[]*pb.RateLimitResponse_DescriptorStatus{
+			{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+			{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+		})
+	response, err := service.ShouldRateLimit(context.Background(), request)
+
+	// Overall result is OVER_LIMIT, since all quota limits were exceeded
+	common.AssertProtoEqual(
+		t.assert,
+		&pb.RateLimitResponse{
+			OverallCode: pb.RateLimitResponse_OVER_LIMIT,
+			Statuses: []*pb.RateLimitResponse_DescriptorStatus{
+				{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+			},
+		},
+		response)
+	t.assert.Nil(err)
+}
+
+func TestServiceMixedPerDescriptorModesUnderLimit(test *testing.T) {
+	t := commonSetup(test)
+	defer t.controller.Finish()
+
+	// No Global Quota mode
+	service := t.setupBasicService()
+
+	request := common.NewRateLimitRequest(
+		"quota-domain", [][][2]string{{{"regular", "limit"}}, {{"quota", "limit"}}}, 1)
+
+	// Create limits with one having quota mode enabled per-descriptor
+	// In this configuration the limits will be evaluated as rate limits.
+	limits := []*config.RateLimit{
+		// Regular limit
+		{
+			FullKey:    "regular_limit",
+			Limit:      &pb.RateLimitResponse_RateLimit{RequestsPerUnit: 5, Unit: pb.RateLimitResponse_RateLimit_MINUTE},
+			QuotaMode:  false,
+			ShadowMode: false,
+		},
+		// Quota mode limit
+		{
+			FullKey:    "quota_limit",
+			Limit:      &pb.RateLimitResponse_RateLimit{RequestsPerUnit: 3, Unit: pb.RateLimitResponse_RateLimit_MINUTE},
+			QuotaMode:  true,
+			ShadowMode: false,
+		},
+	}
+
+	t.config.EXPECT().GetLimit(context.Background(), "quota-domain", request.Descriptors[0]).Return(limits[0])
+	t.config.EXPECT().GetLimit(context.Background(), "quota-domain", request.Descriptors[1]).Return(limits[1])
+	t.cache.EXPECT().DoLimit(context.Background(), request, limits).Return(
+		[]*pb.RateLimitResponse_DescriptorStatus{
+			{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+			{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+		})
+	response, err := service.ShouldRateLimit(context.Background(), request)
+
+	// Overall result is OVER_LIMIT, since all quota limits were exceeded
+	common.AssertProtoEqual(
+		t.assert,
+		&pb.RateLimitResponse{
+			OverallCode: pb.RateLimitResponse_OK,
+			Statuses: []*pb.RateLimitResponse_DescriptorStatus{
+				{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+				{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+			},
+		},
+		response)
+	t.assert.Nil(err)
+}
+
+func TestServiceQuotaModeOnlyAllOverTheLimit(test *testing.T) {
 	t := commonSetup(test)
 	defer t.controller.Finish()
 
@@ -861,13 +965,61 @@ func TestServiceQuotaModeOnly(test *testing.T) {
 		})
 	response, err := service.ShouldRateLimit(context.Background(), request)
 
-	// All quota mode limits should result in OK overall code
+	// Since quota limits were exceeded overall result in OVER_LIMIT
+	common.AssertProtoEqual(
+		t.assert,
+		&pb.RateLimitResponse{
+			OverallCode: pb.RateLimitResponse_OVER_LIMIT,
+			Statuses: []*pb.RateLimitResponse_DescriptorStatus{
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+			},
+		},
+		response)
+	t.assert.Nil(err)
+}
+
+func TestServiceQuotaModeOnlySomeOverTheLimit(test *testing.T) {
+	t := commonSetup(test)
+	defer t.controller.Finish()
+
+	service := t.setupBasicService()
+
+	request := common.NewRateLimitRequest(
+		"quota-domain", [][][2]string{{{"quota1", "limit"}}, {{"quota2", "limit"}}}, 1)
+
+	// Both limits are in quota mode
+	limits := []*config.RateLimit{
+		{
+			FullKey:    "quota_limit_1",
+			Limit:      &pb.RateLimitResponse_RateLimit{RequestsPerUnit: 5, Unit: pb.RateLimitResponse_RateLimit_MINUTE},
+			QuotaMode:  true,
+			ShadowMode: false,
+		},
+		{
+			FullKey:    "quota_limit_2",
+			Limit:      &pb.RateLimitResponse_RateLimit{RequestsPerUnit: 3, Unit: pb.RateLimitResponse_RateLimit_MINUTE},
+			QuotaMode:  true,
+			ShadowMode: false,
+		},
+	}
+
+	t.config.EXPECT().GetLimit(context.Background(), "quota-domain", request.Descriptors[0]).Return(limits[0])
+	t.config.EXPECT().GetLimit(context.Background(), "quota-domain", request.Descriptors[1]).Return(limits[1])
+	t.cache.EXPECT().DoLimit(context.Background(), request, limits).Return(
+		[]*pb.RateLimitResponse_DescriptorStatus{
+			{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+			{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+		})
+	response, err := service.ShouldRateLimit(context.Background(), request)
+
+	// Since only some quota limits were exceeded overall result is OK
 	common.AssertProtoEqual(
 		t.assert,
 		&pb.RateLimitResponse{
 			OverallCode: pb.RateLimitResponse_OK,
 			Statuses: []*pb.RateLimitResponse_DescriptorStatus{
-				{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
 				{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
 			},
 		},
@@ -895,6 +1047,71 @@ func TestServiceQuotaModeWithShadowMode(test *testing.T) {
 	t.configUpdateEventChan <- t.configUpdateEvent
 	barrier.wait()
 
+	request := common.NewRateLimitRequest(
+		"quota-domain", [][][2]string{{{"regular", "limit"}}, {{"quota", "limit"}}}, 1)
+
+	// Mix of regular and quota mode limits with global shadow mode
+	limits := []*config.RateLimit{
+		{
+			FullKey:    "regular_limit",
+			Limit:      &pb.RateLimitResponse_RateLimit{RequestsPerUnit: 5, Unit: pb.RateLimitResponse_RateLimit_MINUTE},
+			QuotaMode:  true,
+			ShadowMode: false,
+		},
+		{
+			FullKey:    "quota_limit",
+			Limit:      &pb.RateLimitResponse_RateLimit{RequestsPerUnit: 3, Unit: pb.RateLimitResponse_RateLimit_MINUTE},
+			QuotaMode:  true,
+			ShadowMode: false,
+		},
+	}
+
+	t.config.EXPECT().GetLimit(context.Background(), "quota-domain", request.Descriptors[0]).Return(limits[0])
+	t.config.EXPECT().GetLimit(context.Background(), "quota-domain", request.Descriptors[1]).Return(limits[1])
+	t.cache.EXPECT().DoLimit(context.Background(), request, limits).Return(
+		[]*pb.RateLimitResponse_DescriptorStatus{
+			{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+			{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+		})
+	response, err := service.ShouldRateLimit(context.Background(), request)
+
+	// Global shadow mode should override everything and result in OK
+	common.AssertProtoEqual(
+		t.assert,
+		&pb.RateLimitResponse{
+			OverallCode: pb.RateLimitResponse_OK,
+			Statuses: []*pb.RateLimitResponse_DescriptorStatus{
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+			},
+		},
+		response)
+	t.assert.Nil(err)
+
+	// Verify global shadow mode counter is incremented
+	t.assert.EqualValues(1, t.statStore.NewCounter("global_shadow_mode").Value())
+}
+
+func TestServiceMixedModeWithShadowMode(test *testing.T) {
+	os.Setenv("SHADOW_MODE", "true")
+	defer func() {
+		os.Unsetenv("SHADOW_MODE")
+	}()
+
+	t := commonSetup(test)
+	defer t.controller.Finish()
+
+	service := t.setupBasicService()
+
+	// Force a config reload to pick up environment variables.
+	barrier := newBarrier()
+	t.configUpdateEvent.EXPECT().GetConfig().DoAndReturn(func() (config.RateLimitConfig, any) {
+		barrier.signal()
+		return t.config, nil
+	})
+	t.configUpdateEventChan <- t.configUpdateEvent
+	barrier.wait()
+
 	request := common.NewRateLimitRequest(
 		"quota-domain", [][][2]string{{{"regular", "limit"}}, {{"quota", "limit"}}}, 1)
 
@@ -930,7 +1147,7 @@ func TestServiceQuotaModeWithShadowMode(test *testing.T) {
 			OverallCode: pb.RateLimitResponse_OK,
 			Statuses: []*pb.RateLimitResponse_DescriptorStatus{
 				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[0].Limit, LimitRemaining: 0},
-				{Code: pb.RateLimitResponse_OK, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
+				{Code: pb.RateLimitResponse_OVER_LIMIT, CurrentLimit: limits[1].Limit, LimitRemaining: 0},
 			},
 		},
 		response)