diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop.go b/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop.go
index d1ffbfb6f..2c88de135 100644
--- a/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop.go
+++ b/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop.go
@@ -58,11 +58,15 @@ const (
 	// OnConfigurationChangeRestart means exit the process so the pod restarts with the new config.
 	OnConfigurationChangeRestart = "restart"
 
-	// RecoveryActionNone means do nothing, CHI stays Aborted.
+	// RecoveryActionNone means do nothing, CHI stays in its current state.
 	RecoveryActionNone = "none"
 	// RecoveryActionRetry means re-enqueue CHI for reconcile (default).
 	RecoveryActionRetry = "retry"
 
+	// defaultCompletedOnPodNotReadyThreshold is the minimum time a pod must remain in
+	// Ready=False before the operator considers the host stuck and re-enqueues a reconcile
+	defaultCompletedOnPodNotReadyThreshold = 5 * time.Minute
+
 	// Default values for ClickHouse user configuration
 	// 1. user/profile
 	// 2. user/quota
@@ -578,6 +582,8 @@ type OperatorConfigReconcileRecovery struct {
 type OperatorConfigReconcileRecoveryFrom struct {
 	// Aborted scope — recovery from Status=Aborted.
 	Aborted OperatorConfigReconcileRecoveryScope `json:"aborted,omitempty" yaml:"aborted,omitempty"`
+	// Completed scope — recovery from Status=Completed when a child pod regresses to Ready=False
+	Completed OperatorConfigReconcileRecoveryCompletedScope `json:"completed,omitempty" yaml:"completed,omitempty"`
 	// Future: Failed, Broken, etc.
 }
 
@@ -592,6 +598,21 @@ type OperatorConfigReconcileRecoveryScope struct {
 	// Future: OnKeeperReady, OnOperatorRestart.
 }
 
+// OperatorConfigReconcileRecoveryCompletedScope holds the event→action mappings for the
+// Completed scope.
+type OperatorConfigReconcileRecoveryCompletedScope struct {
+	// OnPodNotReady controls reaction when a pod belonging to a Completed CHI flips
+	// Ready=True → Ready=False and stays NotReady for at least OnPodNotReadyThreshold:
+	//   nil / "retry" (default) — re-enqueue CHI for reconcile so shouldForceRestartHost
+	//                             can decide whether to restart the host
+	//   "none"                  — do nothing, host stays Ready=False until external action
+	OnPodNotReady *types.String `json:"onPodNotReady,omitempty" yaml:"onPodNotReady,omitempty"`
+	// OnPodNotReadyThreshold is the minimum duration a pod must remain in Ready=False
+	// before this scope fires. Accepts any time.ParseDuration string (default "5m"
+	// when unset, empty, or unparseable).
+	OnPodNotReadyThreshold *types.String `json:"onPodNotReadyThreshold,omitempty" yaml:"onPodNotReadyThreshold,omitempty"`
+}
+
 type OperatorConfigReconcileRuntime struct {
 	ReconcileCHIsThreadsNumber           int `json:"reconcileCHIsThreadsNumber"           yaml:"reconcileCHIsThreadsNumber"`
 	ReconcileShardsThreadsNumber         int `json:"reconcileShardsThreadsNumber"         yaml:"reconcileShardsThreadsNumber"`
@@ -1634,6 +1655,35 @@ func (c *OperatorConfig) ShouldRecoverAbortedOnPodReady() bool {
 	return value == RecoveryActionRetry
 }
 
+// ShouldRecoverCompletedOnPodNotReady reports whether the operator should re-enqueue a
+// CHI reconcile when a pod belonging to a Completed CHI flips to Ready=False and stays
+// there for longer than CompletedOnPodNotReadyThreshold. Default is to retry.
+// Backed by reconcile.recovery.from.completed.onPodNotReady config key.
+func (c *OperatorConfig) ShouldRecoverCompletedOnPodNotReady() bool {
+	value := strings.ToLower(c.Reconcile.Recovery.From.Completed.OnPodNotReady.String())
+	if value == "" {
+		// Default behavior — retry
+		return true
+	}
+	return value == RecoveryActionRetry
+}
+
+// CompletedOnPodNotReadyThreshold returns the minimum duration a pod must remain in
+// Ready=False before the Completed recovery scope fires. Falls back to the package
+// default (5m) if the config value is unset, empty, or unparseable.
+// Backed by reconcile.recovery.from.completed.onPodNotReadyThreshold config key.
+func (c *OperatorConfig) CompletedOnPodNotReadyThreshold() time.Duration {
+	raw := strings.TrimSpace(c.Reconcile.Recovery.From.Completed.OnPodNotReadyThreshold.String())
+	if raw == "" {
+		return defaultCompletedOnPodNotReadyThreshold
+	}
+	d, err := time.ParseDuration(raw)
+	if err != nil || d <= 0 {
+		return defaultCompletedOnPodNotReadyThreshold
+	}
+	return d
+}
+
 // IsNamespaceWatched returns whether specified namespace is in a list of watched
 // TODO unify with GetInformerNamespace
 func (c *OperatorConfig) IsNamespaceWatched(namespace string) bool {
diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop_recovery_test.go b/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop_recovery_test.go
index 1efc717f2..f5c1b8be2 100644
--- a/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop_recovery_test.go
+++ b/pkg/apis/clickhouse.altinity.com/v1/type_configuration_chop_recovery_test.go
@@ -16,6 +16,7 @@ package v1
 
 import (
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/require"
 
@@ -57,3 +58,66 @@ func TestRecoveryActionConstants(t *testing.T) {
 	require.Equal(t, "none", RecoveryActionNone)
 	require.Equal(t, "retry", RecoveryActionRetry)
 }
+
+// TestShouldRecoverCompletedOnPodNotReady verifies the accessor's behavior across the
+// full matrix of possible values for reconcile.recovery.from.completed.onPodNotReady.
+// Mirrors TestShouldRecoverAbortedOnPodReady so symmetric config keys behave identically.
+func TestShouldRecoverCompletedOnPodNotReady(t *testing.T) {
+	tests := []struct {
+		name         string
+		onPodNotRdy  *types.String
+		expected     bool
+	}{
+		{"nil defaults to retry (close the gap by default)", nil, true},
+		{"empty string defaults to retry", types.NewString(""), true},
+		{"retry lowercase", types.NewString("retry"), true},
+		{"Retry mixed case", types.NewString("Retry"), true},
+		{"RETRY upper case", types.NewString("RETRY"), true},
+		{"none lowercase — opt-out", types.NewString("none"), false},
+		{"None mixed case", types.NewString("None"), false},
+		{"NONE upper case", types.NewString("NONE"), false},
+		{"unknown value treated as no-retry (fail safe)", types.NewString("bogus"), false},
+		{"whitespace-only treated as no-retry", types.NewString("  "), false},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			c := &OperatorConfig{}
+			c.Reconcile.Recovery.From.Completed.OnPodNotReady = tc.onPodNotRdy
+			require.Equal(t, tc.expected, c.ShouldRecoverCompletedOnPodNotReady())
+		})
+	}
+}
+
+// TestCompletedOnPodNotReadyThreshold verifies the threshold parser. Unparseable,
+// empty, and non-positive values must fall back to the package default — operators
+// who *really* want to disable the safety net should use onPodNotReady=none, not
+// pass a malformed duration.
+func TestCompletedOnPodNotReadyThreshold(t *testing.T) {
+	tests := []struct {
+		name     string
+		raw      *types.String
+		expected time.Duration
+	}{
+		{"nil falls back to default", nil, defaultCompletedOnPodNotReadyThreshold},
+		{"empty falls back to default", types.NewString(""), defaultCompletedOnPodNotReadyThreshold},
+		{"whitespace falls back to default", types.NewString("   "), defaultCompletedOnPodNotReadyThreshold},
+		{"unparseable falls back to default", types.NewString("five minutes"), defaultCompletedOnPodNotReadyThreshold},
+		{"zero falls back to default (don't accidentally disable)",
+			types.NewString("0s"), defaultCompletedOnPodNotReadyThreshold},
+		{"negative falls back to default", types.NewString("-30s"), defaultCompletedOnPodNotReadyThreshold},
+		{"30 seconds — aggressive", types.NewString("30s"), 30 * time.Second},
+		{"5 minutes — the documented default in string form",
+			types.NewString("5m"), 5 * time.Minute},
+		{"1 hour — conservative", types.NewString("1h"), time.Hour},
+		{"complex duration: 1h30m", types.NewString("1h30m"), 90 * time.Minute},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			c := &OperatorConfig{}
+			c.Reconcile.Recovery.From.Completed.OnPodNotReadyThreshold = tc.raw
+			require.Equal(t, tc.expected, c.CompletedOnPodNotReadyThreshold())
+		})
+	}
+}
diff --git a/pkg/controller/chi/worker-boilerplate.go b/pkg/controller/chi/worker-boilerplate.go
index 97b157602..d8b28fde1 100644
--- a/pkg/controller/chi/worker-boilerplate.go
+++ b/pkg/controller/chi/worker-boilerplate.go
@@ -151,6 +151,8 @@ func (w *worker) processReconcilePod(ctx context.Context, cmd *cmd_queue.Reconci
 		// and re-enqueue the CHI for reconcile. Controlled by config option
 		// reconcile.recovery.from.aborted.onPodReady (default: retry).
 		w.recoverAbortedReconcileOnPodReady(ctx, cmd.Old, cmd.New)
+		// Symmetric path for Ready→NotReady on Completed CHIs.
+		w.recoverCompletedReconcileOnPodNotReady(ctx, cmd.Old, cmd.New)
 		return nil
 	case cmd_queue.ReconcileDelete:
 		w.a.V(1).M(cmd.Old).F().Info("Delete Pod. %s/%s", cmd.Old.Namespace, cmd.Old.Name)
diff --git a/pkg/controller/chi/worker-pod-retry.go b/pkg/controller/chi/worker-pod-retry.go
index 86b901b46..3cb4d2980 100644
--- a/pkg/controller/chi/worker-pod-retry.go
+++ b/pkg/controller/chi/worker-pod-retry.go
@@ -17,6 +17,7 @@ package chi
 import (
 	"context"
 	"strings"
+	"time"
 
 	core "k8s.io/api/core/v1"
 
@@ -28,6 +29,14 @@ import (
 	"github.com/altinity/clickhouse-operator/pkg/model/k8s"
 )
 
+// stuckHostMinDelay floors the deferred-re-enqueue delay so a 1-second flap
+// can't produce an immediate reconcile.
+const stuckHostMinDelay = 5 * time.Second
+
+// stuckHostExtraDelay buffers threshold against apiserver/informer latency
+// so the eventual reconcile observes an up-to-date LastTransitionTime.
+const stuckHostExtraDelay = 2 * time.Second
+
 // normalizeTimeAbortReasons enumerates Aborted reasons that cannot recover via
 // pod transitions — the spec itself must be edited. Auto-recovery skips these
 // to avoid metrics churn on pod-Ready flips that would just re-trigger the same
@@ -132,3 +141,104 @@ func isPodNotReadyToReadyTransition(oldPod, newPod *core.Pod) bool {
 	isReadyNow := !k8s.PodHasNotReadyContainers(newPod)
 	return wasNotReady && isReadyNow
 }
+
+// recoverCompletedReconcileOnPodNotReady is the symmetric counterpart of
+// recoverAbortedReconcileOnPodReady. It inspects a pod update event and schedules a
+// delayed CHI reconcile when a child pod of a Completed CHI transitions Ready → NotReady.
+// The delay equals the configured threshold (default 5m), so by the time the reconcile
+// fires, shouldForceRestartHost can observe a sustained Ready=False and decide whether
+// to restart the host.
+func (w *worker) recoverCompletedReconcileOnPodNotReady(ctx context.Context, oldPod, newPod *core.Pod) {
+	if !chop.Config().ShouldRecoverCompletedOnPodNotReady() {
+		return
+	}
+
+	if !isPodReadyToNotReadyTransition(oldPod, newPod) {
+		return
+	}
+
+	// Skip pods that are terminating — the Ready→NotReady flip is normal
+	// shutdown bookkeeping, not a host regression.
+	if newPod.GetDeletionTimestamp() != nil && !newPod.GetDeletionTimestamp().IsZero() {
+		return
+	}
+
+	// Skip pods already in a kubelet-driven failure mode (ImagePullBackOff,
+	// CrashLoopBackOff, Pending, etc.) — kubelet is handling those and an
+	// operator-driven StatefulSet rollout would just race it.
+	if podIsInKubeletFailureMode(newPod) {
+		return
+	}
+
+	cr, err := w.c.GetCR(&newPod.ObjectMeta)
+	if err != nil || cr == nil {
+		return
+	}
+
+	if !shouldTriggerStuckHostRecovery(cr) {
+		return
+	}
+
+	threshold := chop.Config().CompletedOnPodNotReadyThreshold()
+	delay := stuckHostScheduleDelay(newPod, threshold, time.Now())
+
+	w.a.V(1).M(cr).F().
+		WithEvent(cr, a.EventActionReconcile, a.EventReasonStuckHostRecoveryTriggered).
+		Info(
+			"Stuck-host recovery scheduled: pod %s became NotReady while CHI %s/%s is Completed; "+
+				"re-enqueue in %s (threshold %s)",
+			newPod.Name, cr.Namespace, cr.Name, delay.Truncate(time.Second), threshold,
+		)
+
+	scheduled := cr
+	time.AfterFunc(delay, func() {
+		w.c.enqueueObject(cmd_queue.NewReconcileCHI(cmd_queue.ReconcileAdd, nil, scheduled))
+	})
+}
+
+// shouldTriggerStuckHostRecovery reports whether the given CHI is a valid stuck-host
+// recovery target: status is Completed and the CHI is not being deleted.
+func shouldTriggerStuckHostRecovery(cr *api.ClickHouseInstallation) bool {
+	if cr == nil {
+		return false
+	}
+	status := cr.EnsureStatus()
+	if status.GetStatus() != api.StatusCompleted {
+		return false
+	}
+	if !cr.GetDeletionTimestamp().IsZero() {
+		return false
+	}
+	return true
+}
+
+// isPodReadyToNotReadyTransition reports whether the pod transitioned from "all containers
+// ready" to "some container not ready". The dual of isPodNotReadyToReadyTransition.
+func isPodReadyToNotReadyTransition(oldPod, newPod *core.Pod) bool {
+	if oldPod == nil || newPod == nil {
+		return false
+	}
+	wasReady := !k8s.PodHasNotReadyContainers(oldPod)
+	isNotReadyNow := k8s.PodHasNotReadyContainers(newPod)
+	return wasReady && isNotReadyNow
+}
+
+// stuckHostScheduleDelay computes how long to wait before firing the stuck-host
+// re-enqueue. It returns max(threshold − elapsed + extra, minDelay), clamped to
+// non-negative.
+func stuckHostScheduleDelay(newPod *core.Pod, threshold time.Duration, now time.Time) time.Duration {
+	elapsed := time.Duration(0)
+	if newPod != nil {
+		for _, cond := range newPod.Status.Conditions {
+			if cond.Type == core.PodReady && !cond.LastTransitionTime.IsZero() {
+				elapsed = now.Sub(cond.LastTransitionTime.Time)
+				break
+			}
+		}
+	}
+	delay := threshold - elapsed + stuckHostExtraDelay
+	if delay < stuckHostMinDelay {
+		delay = stuckHostMinDelay
+	}
+	return delay
+}
diff --git a/pkg/controller/chi/worker-pod-retry_test.go b/pkg/controller/chi/worker-pod-retry_test.go
index 39893bb18..829375919 100644
--- a/pkg/controller/chi/worker-pod-retry_test.go
+++ b/pkg/controller/chi/worker-pod-retry_test.go
@@ -145,3 +145,186 @@ func TestShouldTriggerAutoRecovery(t *testing.T) {
 		})
 	}
 }
+
+// TestIsPodReadyToNotReadyTransition verifies the dual of isPodNotReadyToReadyTransition:
+// fires only on Ready→NotReady, mirrors the same nil/edge-case handling.
+func TestIsPodReadyToNotReadyTransition(t *testing.T) {
+	tests := []struct {
+		name     string
+		old, new *core.Pod
+		expected bool
+	}{
+		{"nil old", nil, pod(false), false},
+		{"nil new", pod(true), nil, false},
+		{"both nil", nil, nil, false},
+		{"ready → not ready (the target case)", pod(true), pod(false), true},
+		{"ready → ready (no transition)", pod(true), pod(true), false},
+		{"not ready → ready (wrong direction, handled by sibling)", pod(false), pod(true), false},
+		{"not ready → not ready", pod(false), pod(false), false},
+		{"multi-container: all ready → one not ready", multiContainerPod(true, true), multiContainerPod(false, true), true},
+		{"multi-container: one not ready → all ready", multiContainerPod(true, false), multiContainerPod(true, true), false},
+		{"multi-container: all ready → all ready", multiContainerPod(true, true), multiContainerPod(true, true), false},
+		{"empty statuses → not ready (fires; empty counts as ready)",
+			&core.Pod{}, pod(false), true},
+		{"12-container pod: last flips to not ready",
+			multiContainerPod(true, true, true, true, true, true, true, true, true, true, true, true),
+			multiContainerPod(true, true, true, true, true, true, true, true, true, true, true, false),
+			true},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got := isPodReadyToNotReadyTransition(tc.old, tc.new)
+			require.Equal(t, tc.expected, got)
+		})
+	}
+}
+
+// TestShouldTriggerStuckHostRecovery verifies the CR-state gate used by
+// recoverCompletedReconcileOnPodNotReady.
+func TestShouldTriggerStuckHostRecovery(t *testing.T) {
+	makeCR := func(status string, deleting bool) *api.ClickHouseInstallation {
+		cr := &api.ClickHouseInstallation{
+			ObjectMeta: meta.ObjectMeta{Name: "chi", Namespace: "ns"},
+		}
+		cr.EnsureStatus().Status = status
+		if deleting {
+			now := meta.NewTime(time.Now())
+			cr.ObjectMeta.DeletionTimestamp = &now
+		}
+		return cr
+	}
+
+	tests := []struct {
+		name     string
+		cr       *api.ClickHouseInstallation
+		expected bool
+	}{
+		{"nil CR — reject", nil, false},
+		// The target case: Completed CHI whose host has just regressed.
+		{"Completed, not deleting — accept (the target case)", makeCR(api.StatusCompleted, false), true},
+		// Aborted is the sibling path's responsibility; firing stuck-host recovery on it
+		// would double-enqueue with recoverAbortedReconcileOnPodReady once the pod
+		// eventually becomes Ready again.
+		{"Aborted — reject (handled by sibling recoverAbortedReconcileOnPodReady path)",
+			makeCR(api.StatusAborted, false), false},
+		// InProgress means a reconcile is already in flight; let it observe the pod state
+		// on its own rather than racing another enqueue.
+		{"InProgress — reject (reconcile already running)", makeCR(api.StatusInProgress, false), false},
+		{"Terminating — reject", makeCR(api.StatusTerminating, false), false},
+		{"Completed but being deleted — reject", makeCR(api.StatusCompleted, true), false},
+		// Fresh CR with no status field set yet — happens between Create and the first
+		// status update by the operator.
+		{"empty status (fresh CR) — reject", makeCR("", false), false},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.expected, shouldTriggerStuckHostRecovery(tc.cr))
+		})
+	}
+}
+
+// TestStuckHostScheduleDelay verifies the delay computation for the deferred re-enqueue.
+// The helper is pure (clock + threshold injected as args), so we can exercise the
+// boundary conditions without time-mocking the rest of the controller.
+func TestStuckHostScheduleDelay(t *testing.T) {
+	now := time.Date(2026, 5, 28, 12, 0, 0, 0, time.UTC)
+
+	// podWithReadyTransition builds a pod whose PodReady condition transitioned at the
+	// given offset from "now". Negative offset means the transition is in the past.
+	podWithReadyTransition := func(offset time.Duration) *core.Pod {
+		return &core.Pod{
+			Status: core.PodStatus{
+				Conditions: []core.PodCondition{
+					{Type: core.PodReady, Status: core.ConditionFalse,
+						LastTransitionTime: meta.NewTime(now.Add(offset))},
+				},
+			},
+		}
+	}
+
+	tests := []struct {
+		name      string
+		pod       *core.Pod
+		threshold time.Duration
+		// expected delay must satisfy lo <= got <= hi (small tolerance for arithmetic).
+		expectMin time.Duration
+		expectMax time.Duration
+	}{
+		{
+			// Fresh transition: full threshold + small extra padding for apiserver
+			// catch-up. With threshold=5m and elapsed=0, delay should be ~5m02s.
+			name:      "fresh transition: schedule full threshold + extra",
+			pod:       podWithReadyTransition(0),
+			threshold: 5 * time.Minute,
+			expectMin: 5*time.Minute + stuckHostExtraDelay - time.Second,
+			expectMax: 5*time.Minute + stuckHostExtraDelay + time.Second,
+		},
+		{
+			// Already half-elapsed: remaining ~2.5m + extra.
+			name:      "half-elapsed: schedule the remainder",
+			pod:       podWithReadyTransition(-150 * time.Second),
+			threshold: 5 * time.Minute,
+			expectMin: 150*time.Second + stuckHostExtraDelay - time.Second,
+			expectMax: 150*time.Second + stuckHostExtraDelay + time.Second,
+		},
+		{
+			// Threshold already past at schedule time (e.g. operator restart after
+			// long outage): clamp to stuckHostMinDelay rather than firing instantly,
+			// so a single quick flap doesn't produce an immediate restart.
+			name:      "threshold already past: clamp to minDelay",
+			pod:       podWithReadyTransition(-10 * time.Minute),
+			threshold: 5 * time.Minute,
+			expectMin: stuckHostMinDelay,
+			expectMax: stuckHostMinDelay,
+		},
+		{
+			// Nil pod: no LastTransitionTime info → treat as elapsed=0 → full threshold.
+			name:      "nil pod: full threshold",
+			pod:       nil,
+			threshold: 5 * time.Minute,
+			expectMin: 5*time.Minute + stuckHostExtraDelay,
+			expectMax: 5*time.Minute + stuckHostExtraDelay,
+		},
+		{
+			// Pod has no PodReady condition (very early in lifecycle): elapsed=0.
+			name:      "pod missing PodReady condition: full threshold",
+			pod:       &core.Pod{Status: core.PodStatus{Conditions: []core.PodCondition{}}},
+			threshold: 5 * time.Minute,
+			expectMin: 5*time.Minute + stuckHostExtraDelay,
+			expectMax: 5*time.Minute + stuckHostExtraDelay,
+		},
+		{
+			// Zero LastTransitionTime (apiserver hasn't stamped it yet): treat as
+			// elapsed=0, schedule full threshold.
+			name: "zero LastTransitionTime: full threshold",
+			pod: &core.Pod{
+				Status: core.PodStatus{
+					Conditions: []core.PodCondition{
+						{Type: core.PodReady, Status: core.ConditionFalse},
+					},
+				},
+			},
+			threshold: 5 * time.Minute,
+			expectMin: 5*time.Minute + stuckHostExtraDelay,
+			expectMax: 5*time.Minute + stuckHostExtraDelay,
+		},
+		{
+			// Threshold smaller than minDelay: minDelay still floors the result.
+			name:      "tiny threshold: clamp to minDelay",
+			pod:       podWithReadyTransition(0),
+			threshold: 1 * time.Second,
+			expectMin: stuckHostMinDelay,
+			expectMax: stuckHostMinDelay,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got := stuckHostScheduleDelay(tc.pod, tc.threshold, now)
+			require.GreaterOrEqual(t, got, tc.expectMin, "delay below expected minimum")
+			require.LessOrEqual(t, got, tc.expectMax, "delay above expected maximum")
+		})
+	}
+}
diff --git a/pkg/controller/chi/worker-status-helpers.go b/pkg/controller/chi/worker-status-helpers.go
index 8da4e493d..f04cb3f2d 100644
--- a/pkg/controller/chi/worker-status-helpers.go
+++ b/pkg/controller/chi/worker-status-helpers.go
@@ -18,6 +18,8 @@ import (
 	"context"
 	"time"
 
+	core "k8s.io/api/core/v1"
+
 	log "github.com/altinity/clickhouse-operator/pkg/announcer"
 	api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1"
 	"github.com/altinity/clickhouse-operator/pkg/apis/common/types"
@@ -52,6 +54,94 @@ func (w *worker) isPodReady(ctx context.Context, host *api.Host) bool {
 	return false
 }
 
+// isPodSustainedNotReady reports whether the host's pod is currently Ready=False AND
+// has been so for at least `threshold`. Returns false for pods whose failure mode is
+// already being handled by kubelet (ImagePullBackOff, CrashLoopBackOff, Pending, etc.)
+// so the operator does not race kubelet on its own recovery path.
+func (w *worker) isPodSustainedNotReady(ctx context.Context, host *api.Host, threshold time.Duration) bool {
+	if threshold <= 0 {
+		// Threshold of 0/negative means "feature disabled"
+		return false
+	}
+	pod, err := w.c.kube.Pod().Get(ctx, host)
+	if err != nil || pod == nil {
+		return false
+	}
+	if podIsInKubeletFailureMode(pod) {
+		return false
+	}
+	return podIsSustainedNotReady(pod, threshold, time.Now())
+}
+
+// podIsSustainedNotReady is the pure inner predicate of isPodSustainedNotReady,
+// extracted so it can be exercised without a kube client. Returns true iff the pod
+// has a PodReady condition that is currently not True and whose LastTransitionTime
+// is at least `threshold` in the past relative to `now`.
+func podIsSustainedNotReady(pod *core.Pod, threshold time.Duration, now time.Time) bool {
+	if pod == nil || threshold <= 0 {
+		return false
+	}
+	for _, cond := range pod.Status.Conditions {
+		if cond.Type != core.PodReady {
+			continue
+		}
+		if cond.Status == core.ConditionTrue {
+			return false
+		}
+		// Status is False or Unknown. Treat both as "not ready"
+		if cond.LastTransitionTime.IsZero() {
+			return false
+		}
+		return now.Sub(cond.LastTransitionTime.Time) >= threshold
+	}
+	// No PodReady condition at all.
+	return false
+}
+
+// kubeletDrivenWaitingReasons is the set of container Waiting.Reason values that
+// indicate kubelet is already actively recovering the pod and a parallel
+// operator-driven StatefulSet rollout would just race kubelet.
+var kubeletDrivenWaitingReasons = map[string]struct{}{
+	"CrashLoopBackOff":           {},
+	"ImagePullBackOff":           {},
+	"ErrImagePull":               {},
+	"InvalidImageName":           {},
+	"CreateContainerError":       {},
+	"RunContainerError":          {},
+	"ContainerCannotRun":         {},
+	"CreateContainerConfigError": {},
+}
+
+// podIsInKubeletFailureMode reports whether the pod is in a state where kubelet
+// (or the kube-scheduler) is already handling the failure: not yet scheduled,
+// in Pending phase, or any container in a kubelet-driven waiting reason.
+// In those states an operator-driven reconcile would race kubelet without value.
+func podIsInKubeletFailureMode(pod *core.Pod) bool {
+	if pod == nil {
+		return false
+	}
+	if pod.Status.Phase == core.PodPending {
+		return true
+	}
+	for _, cs := range pod.Status.ContainerStatuses {
+		if cs.State.Waiting == nil {
+			continue
+		}
+		if _, hit := kubeletDrivenWaitingReasons[cs.State.Waiting.Reason]; hit {
+			return true
+		}
+	}
+	for _, cs := range pod.Status.InitContainerStatuses {
+		if cs.State.Waiting == nil {
+			continue
+		}
+		if _, hit := kubeletDrivenWaitingReasons[cs.State.Waiting.Reason]; hit {
+			return true
+		}
+	}
+	return false
+}
+
 func (w *worker) isPodStarted(ctx context.Context, host *api.Host) bool {
 	if pod, err := w.c.kube.Pod().Get(ctx, host); err == nil {
 		return k8s.PodHasAllContainersStarted(pod)
diff --git a/pkg/controller/chi/worker-status-helpers_test.go b/pkg/controller/chi/worker-status-helpers_test.go
new file mode 100644
index 000000000..cb4d7391b
--- /dev/null
+++ b/pkg/controller/chi/worker-status-helpers_test.go
@@ -0,0 +1,201 @@
+// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chi
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+	core "k8s.io/api/core/v1"
+	meta "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// TestPodIsSustainedNotReady covers the pure post-fetch decision used by
+// isPodSustainedNotReady.
+func TestPodIsSustainedNotReady(t *testing.T) {
+	now := time.Date(2026, 5, 28, 12, 0, 0, 0, time.UTC)
+
+	withReady := func(status core.ConditionStatus, transitionOffset time.Duration) *core.Pod {
+		return &core.Pod{
+			Status: core.PodStatus{
+				Conditions: []core.PodCondition{
+					{Type: core.PodReady, Status: status,
+						LastTransitionTime: meta.NewTime(now.Add(transitionOffset))},
+				},
+			},
+		}
+	}
+
+	tests := []struct {
+		name      string
+		pod       *core.Pod
+		threshold time.Duration
+		expected  bool
+	}{
+		{
+			name:      "nil pod — never sustained",
+			pod:       nil,
+			threshold: 5 * time.Minute,
+			expected:  false,
+		},
+		{
+			name:      "zero threshold — feature disabled, never fires",
+			pod:       withReady(core.ConditionFalse, -30*time.Minute),
+			threshold: 0,
+			expected:  false,
+		},
+		{
+			name:      "negative threshold — feature disabled, never fires",
+			pod:       withReady(core.ConditionFalse, -30*time.Minute),
+			threshold: -1 * time.Second,
+			expected:  false,
+		},
+		{
+			name: "no PodReady condition — early lifecycle, never sustained",
+			pod: &core.Pod{Status: core.PodStatus{Conditions: []core.PodCondition{
+				{Type: core.PodInitialized, Status: core.ConditionTrue,
+					LastTransitionTime: meta.NewTime(now.Add(-10 * time.Minute))},
+			}}},
+			threshold: 5 * time.Minute,
+			expected:  false,
+		},
+		{
+			name:      "PodReady=True — not sustained even with old LastTransitionTime",
+			pod:       withReady(core.ConditionTrue, -30*time.Minute),
+			threshold: 5 * time.Minute,
+			expected:  false,
+		},
+		{
+			name:      "PodReady=False but only 1m ago — under threshold (transient)",
+			pod:       withReady(core.ConditionFalse, -1*time.Minute),
+			threshold: 5 * time.Minute,
+			expected:  false,
+		},
+		{
+			name:      "PodReady=False for exactly the threshold — fires (>= semantics)",
+			pod:       withReady(core.ConditionFalse, -5*time.Minute),
+			threshold: 5 * time.Minute,
+			expected:  true,
+		},
+		{
+			name:      "PodReady=False for 26h — the production incident, fires",
+			pod:       withReady(core.ConditionFalse, -26*time.Hour),
+			threshold: 5 * time.Minute,
+			expected:  true,
+		},
+		{
+			name:      "PodReady=Unknown for 10m — treated as not-ready, fires",
+			pod:       withReady(core.ConditionUnknown, -10*time.Minute),
+			threshold: 5 * time.Minute,
+			expected:  true,
+		},
+		{
+			name:      "PodReady=False but LastTransitionTime is zero — conservative, don't fire",
+			pod:       &core.Pod{Status: core.PodStatus{Conditions: []core.PodCondition{{Type: core.PodReady, Status: core.ConditionFalse}}}},
+			threshold: 5 * time.Minute,
+			expected:  false,
+		},
+		{
+			name: "multiple PodReady entries — use first match",
+			pod: &core.Pod{Status: core.PodStatus{Conditions: []core.PodCondition{
+				{Type: core.PodReady, Status: core.ConditionFalse,
+					LastTransitionTime: meta.NewTime(now.Add(-10 * time.Minute))},
+				{Type: core.PodReady, Status: core.ConditionTrue,
+					LastTransitionTime: meta.NewTime(now)},
+			}}},
+			threshold: 5 * time.Minute,
+			expected:  true,
+		},
+		{
+			name: "PodScheduled present alongside PodReady=False — still fires on Ready",
+			pod: &core.Pod{Status: core.PodStatus{Conditions: []core.PodCondition{
+				{Type: core.PodScheduled, Status: core.ConditionTrue,
+					LastTransitionTime: meta.NewTime(now.Add(-1 * time.Hour))},
+				{Type: core.PodReady, Status: core.ConditionFalse,
+					LastTransitionTime: meta.NewTime(now.Add(-10 * time.Minute))},
+			}}},
+			threshold: 5 * time.Minute,
+			expected:  true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.expected, podIsSustainedNotReady(tc.pod, tc.threshold, now))
+		})
+	}
+}
+
+// TestPodIsInKubeletFailureMode locks in the kubelet-recovery filter: any pod whose
+// failure mode is already being handled by kubelet (image pull errors, crash loops,
+// pending, etc.) must NOT trigger the stuck-host recovery path.
+func TestPodIsInKubeletFailureMode(t *testing.T) {
+	waitingContainer := func(reason string) *core.Pod {
+		return &core.Pod{Status: core.PodStatus{
+			Phase: core.PodRunning,
+			ContainerStatuses: []core.ContainerStatus{
+				{Name: "clickhouse", State: core.ContainerState{
+					Waiting: &core.ContainerStateWaiting{Reason: reason},
+				}},
+			},
+		}}
+	}
+	waitingInit := func(reason string) *core.Pod {
+		return &core.Pod{Status: core.PodStatus{
+			Phase: core.PodRunning,
+			InitContainerStatuses: []core.ContainerStatus{
+				{Name: "init", State: core.ContainerState{
+					Waiting: &core.ContainerStateWaiting{Reason: reason},
+				}},
+			},
+		}}
+	}
+
+	tests := []struct {
+		name     string
+		pod      *core.Pod
+		expected bool
+	}{
+		{"nil pod", nil, false},
+		{"no statuses, running phase", &core.Pod{Status: core.PodStatus{Phase: core.PodRunning}}, false},
+		{"Pending phase — scheduler/kubelet handling", &core.Pod{Status: core.PodStatus{Phase: core.PodPending}}, true},
+		{"ImagePullBackOff — kubelet handling", waitingContainer("ImagePullBackOff"), true},
+		{"ErrImagePull — kubelet handling", waitingContainer("ErrImagePull"), true},
+		{"InvalidImageName — kubelet handling", waitingContainer("InvalidImageName"), true},
+		{"CrashLoopBackOff — kubelet handling", waitingContainer("CrashLoopBackOff"), true},
+		{"CreateContainerError — kubelet handling", waitingContainer("CreateContainerError"), true},
+		{"RunContainerError — kubelet handling", waitingContainer("RunContainerError"), true},
+		{"ContainerCannotRun — kubelet handling", waitingContainer("ContainerCannotRun"), true},
+		{"CreateContainerConfigError — kubelet handling", waitingContainer("CreateContainerConfigError"), true},
+		{"init container in ImagePullBackOff — kubelet handling", waitingInit("ImagePullBackOff"), true},
+		{"ContainerCreating — transient, not kubelet failure", waitingContainer("ContainerCreating"), false},
+		{"PodInitializing — transient, not kubelet failure", waitingContainer("PodInitializing"), false},
+		{"running container, no waiting state", &core.Pod{Status: core.PodStatus{
+			Phase: core.PodRunning,
+			ContainerStatuses: []core.ContainerStatus{
+				{Name: "clickhouse", Ready: true, State: core.ContainerState{
+					Running: &core.ContainerStateRunning{},
+				}},
+			},
+		}}, false},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.expected, podIsInKubeletFailureMode(tc.pod))
+		})
+	}
+}
diff --git a/pkg/controller/chi/worker.go b/pkg/controller/chi/worker.go
index caf251699..77e1eed29 100644
--- a/pkg/controller/chi/worker.go
+++ b/pkg/controller/chi/worker.go
@@ -27,6 +27,7 @@ import (
 	log "github.com/altinity/clickhouse-operator/pkg/announcer"
 	api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1"
 	"github.com/altinity/clickhouse-operator/pkg/apis/common/types"
+	"github.com/altinity/clickhouse-operator/pkg/chop"
 	"github.com/altinity/clickhouse-operator/pkg/controller/chi/metrics"
 	"github.com/altinity/clickhouse-operator/pkg/controller/common"
 	a "github.com/altinity/clickhouse-operator/pkg/controller/common/announcer"
@@ -193,6 +194,17 @@ func (w *worker) shouldForceRestartHost(ctx context.Context, host *api.Host) boo
 		w.a.V(1).M(host).F().Info("Host with unknown version and in CrashLoopBackOff should be restarted. It most likely is unable to start due to bad config. Host: %s", host.GetName())
 		return true
 
+	case chop.Config().ShouldRecoverCompletedOnPodNotReady() &&
+		w.isPodSustainedNotReady(ctx, host, chop.Config().CompletedOnPodNotReadyThreshold()):
+		// Closes the gap where Completed CHIs with a sustained-NotReady host
+		// were left stuck indefinitely.
+		threshold := chop.Config().CompletedOnPodNotReadyThreshold()
+		w.a.V(1).M(host).F().
+			WithEvent(host.GetCR(), a.EventActionReconcile, a.EventReasonHostStuckNotReady).
+			Info("Host pod has been Ready=False past threshold %s — force restart. Host: %s",
+				threshold, host.GetName())
+		return true
+
 	default:
 		w.a.V(1).M(host).F().Info("Host force restart is not required. Host: %s", host.GetName())
 		return false
diff --git a/pkg/controller/common/announcer/event-emitter.go b/pkg/controller/common/announcer/event-emitter.go
index f7247b6c5..e71547ad1 100644
--- a/pkg/controller/common/announcer/event-emitter.go
+++ b/pkg/controller/common/announcer/event-emitter.go
@@ -65,6 +65,15 @@ const (
 	// reconcile was aborted, on observing a recovery signal (e.g. a pod became Ready).
 	EventReasonAutoRecoveryTriggered = "AutoRecoveryTriggered"
 
+	// EventReasonStuckHostRecoveryTriggered fires when the operator re-enqueues a
+	// Completed CHI for reconcile because one of its hosts has been Ready=False for
+	// longer than the configured threshold.
+	EventReasonStuckHostRecoveryTriggered = "StuckHostRecoveryTriggered"
+
+	// EventReasonHostStuckNotReady fires when shouldForceRestartHost decides to force
+	// a host restart because the pod has been Ready=False past the configured threshold.
+	EventReasonHostStuckNotReady = "HostStuckNotReady"
+
 	// EventReasonKeeperUpdateNoEndpointChange fires when the operator observes a referenced
 	// CHK reconcile completing but decides not to trigger a CHI reconcile because the resolved
 	// zookeeper endpoints have not changed.