Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 189 additions & 4 deletions test/extended/node/node_e2e/node.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
package node

import (
"context"
"fmt"
"path/filepath"
"regexp"
"strings"
"time"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
nodeutils "github.com/openshift/origin/test/extended/node"
exutil "github.com/openshift/origin/test/extended/util"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"

nodeutils "github.com/openshift/origin/test/extended/node"
exutil "github.com/openshift/origin/test/extended/util"
)

var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager", func() {
Expand All @@ -22,8 +29,24 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",

// Skip all tests on MicroShift clusters as MachineConfig resources are not available
g.BeforeEach(func() {
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred())
var isMicroShift bool
var err error

// Retry check for robustness - OpenShift should eventually respond
pollErr := wait.Poll(2*time.Second, 30*time.Second, func() (bool, error) {
isMicroShift, err = exutil.IsMicroShiftCluster(oc.AdminKubeClient())
if err != nil {
e2e.Logf("Failed to check if cluster is MicroShift: %v, retrying...", err)
return false, nil
}
return true, nil
})

if pollErr != nil {
e2e.Logf("Setup failed: unable to determine if cluster is MicroShift after retries: %v", err)
g.Fail("Setup failed: unable to determine cluster type - this is an infrastructure/connectivity issue, not a test failure")
}
Comment on lines +35 to +48
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

This MicroShift retry wrapper never actually retries or fails setup

Line 37 / Line 184 call exutil.IsMicroShiftCluster, but that helper already does its own 5-minute poll and always returns err == nil even on timeout (test/extended/util/framework.go:2327-2358). That means this outer 30-second wait.Poll can still block for ~5 minutes per call, then return success with isMicroShift == false, so Line 45 / Line 192's setup-failure path is effectively dead.

Either call the helper once, or change the helper to surface API failures and own the retry there; in its current form this adds long setup stalls without giving the clearer setup-vs-test failure behavior the PR is trying to introduce.

Also applies to: 182-195

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/extended/node/node_e2e/node.go` around lines 35 - 48, The outer retry
using wait.Poll around exutil.IsMicroShiftCluster is ineffective because
IsMicroShiftCluster already polls internally and suppresses timeouts; remove the
outer wait.Poll and call exutil.IsMicroShiftCluster(oc.AdminKubeClient())
directly, then check its returned err and isMicroShift to decide failure (use
pollErr variable logic but replace it with the direct call result), or
alternatively modify exutil.IsMicroShiftCluster to return an error on timeout so
the outer wait.Poll can meaningfully retry; apply the same change to the
duplicate occurrence that references the same functions/variables
(exutil.IsMicroShiftCluster, wait.Poll, isMicroShift, pollErr).


if isMicroShift {
g.Skip("Skipping test on MicroShift cluster - MachineConfig resources are not available")
}
Expand Down Expand Up @@ -143,3 +166,165 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
o.Expect(output).To(o.ContainSubstring("fuse"), "dev fuse is not mounted inside pod")
})
})

var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] NODE initContainer policy,volume,readiness,quota", func() {
defer g.GinkgoRecover()

var (
oc = exutil.NewCLI("node-initcontainer")
)

// Skip all tests on MicroShift clusters as MachineConfig resources are not available
g.BeforeEach(func() {
var isMicroShift bool
var err error

// Retry check for robustness - OpenShift should eventually respond
pollErr := wait.Poll(2*time.Second, 30*time.Second, func() (bool, error) {
isMicroShift, err = exutil.IsMicroShiftCluster(oc.AdminKubeClient())
if err != nil {
e2e.Logf("Failed to check if cluster is MicroShift: %v, retrying...", err)
return false, nil
}
return true, nil
})

if pollErr != nil {
e2e.Logf("Setup failed: unable to determine if cluster is MicroShift after retries: %v", err)
g.Fail("Setup failed: unable to determine cluster type - this is an infrastructure/connectivity issue, not a test failure")
}

if isMicroShift {
g.Skip("Skipping test on MicroShift cluster - MachineConfig resources are not available")
}
})

//author: bgudi@redhat.com
g.It("[OTP] Init containers should not restart when the exited init container is removed from node [OCP-38271]", func() {
g.By("Test for case OCP-38271")
oc.SetupProject()

podName := "initcon-pod"
namespace := oc.Namespace()
ctx := context.Background()

g.By("Create a pod with init container")
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: namespace,
},
Spec: corev1.PodSpec{
InitContainers: []corev1.Container{
{
Name: "inittest",
Image: "quay.io/openshifttest/busybox@sha256:c5439d7db88ab5423999530349d327b04279ad3161d7596d2126dfb5b02bfd1f",
Command: []string{"/bin/sh", "-ec", "echo running >> /mnt/data/test"},
VolumeMounts: []corev1.VolumeMount{
{
Name: "data",
MountPath: "/mnt/data",
},
},
},
},
Containers: []corev1.Container{
{
Name: "hello-test",
Image: "quay.io/openshifttest/busybox@sha256:c5439d7db88ab5423999530349d327b04279ad3161d7596d2126dfb5b02bfd1f",
Command: []string{"/bin/sh", "-c", "sleep 3600"},
VolumeMounts: []corev1.VolumeMount{
{
Name: "data",
MountPath: "/mnt/data",
},
},
},
},
Volumes: []corev1.Volume{
{
Name: "data",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
},
},
RestartPolicy: corev1.RestartPolicyNever,
},
}

_, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
defer func() {
oc.KubeClient().CoreV1().Pods(namespace).Delete(ctx, podName, metav1.DeleteOptions{})
}()

g.By("Check pod status")
err = e2epod.WaitForPodRunningInNamespace(ctx, oc.KubeClient(), pod)
o.Expect(err).NotTo(o.HaveOccurred(), "pod is not running")

g.By("Check init container exit normally")
err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
pod, err := oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
if err != nil {
return false, err
}
for _, status := range pod.Status.InitContainerStatuses {
if status.Name == "inittest" {
if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 {
e2e.Logf("Init container exited with code 0")
return true, nil
}
}
}
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "container not exit normally")

g.By("Get node where pod is running")
pod, err = oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
nodeName := pod.Spec.NodeName
o.Expect(nodeName).NotTo(o.BeEmpty(), "pod node name is empty")

g.By("Get init container ID from pod status")
var containerID string
for _, status := range pod.Status.InitContainerStatuses {
if status.Name == "inittest" {
containerID = status.ContainerID
break
}
}
o.Expect(containerID).NotTo(o.BeEmpty(), "init container ID is empty")

// Extract the actual container ID (remove prefix like "cri-o://")
containerIDPattern := regexp.MustCompile(`^[^/]+://(.+)$`)
matches := containerIDPattern.FindStringSubmatch(containerID)
o.Expect(matches).To(o.HaveLen(2), "failed to parse container ID")
actualContainerID := matches[1]

g.By("Delete init container from node")
output, err := nodeutils.ExecOnNodeWithChroot(oc, nodeName, "crictl", "rm", actualContainerID)
o.Expect(err).NotTo(o.HaveOccurred(), "fail to delete container")
e2e.Logf("Container deletion output: %s", output)

g.By("Check init container not restart again")
err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
pod, err := oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
if err != nil {
return false, err
}
for _, status := range pod.Status.InitContainerStatuses {
if status.Name == "inittest" {
if status.RestartCount > 0 {
e2e.Logf("Init container restarted, restart count: %d", status.RestartCount)
return true, fmt.Errorf("init container restarted")
}
}
}
e2e.Logf("Init container has not restarted")
return false, nil
})
o.Expect(err).To(o.Equal(wait.ErrWaitTimeout), "expected timeout while waiting confirms init container did not restart")
})
})