Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions cmd/plugins/resctrl-mon/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"sync"
"time"

"github.com/google/uuid"
"sigs.k8s.io/yaml"

"github.com/containerd/nri/pkg/api"
Expand Down Expand Up @@ -142,12 +143,15 @@ func (p *plugin) Synchronize(ctx context.Context, pods []*api.PodSandbox, contai
log.Warnf("Synchronize: failed to create mon_group for pod %s: %v", podUID, err)
continue
}
// Use canonical form for state lookups (ensureMonGroup stores canonical).
u, _ := uuid.Parse(podUID)
canonicalUID := u.String()
pid := int(ctr.GetPid())
if pid == 0 {
pid = readContainerPID(ctr.GetId())
}
if pid > 0 {
monGroupDir := p.state.getMonGroupDir(podUID)
monGroupDir := p.state.getMonGroupDir(canonicalUID)
if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil {
log.Warnf("Synchronize: failed to write PID %d for pod %s: %v", pid, podUID, err)
} else {
Expand Down Expand Up @@ -227,6 +231,9 @@ func (p *plugin) PostCreateContainer(ctx context.Context, pod *api.PodSandbox, c
// to PostStartContainer which will write PIDs after the process starts.
func (p *plugin) StartContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) error {
podUID := pod.GetUid()
if u, err := uuid.Parse(podUID); err == nil {
podUID = u.String()
}
ctrName := pprintCtr(pod, ctr)
pid := int(ctr.GetPid())

Expand Down Expand Up @@ -261,6 +268,9 @@ func (p *plugin) StartContainer(ctx context.Context, pod *api.PodSandbox, ctr *a
// the RMID.
func (p *plugin) PostStartContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) error {
podUID := pod.GetUid()
if u, err := uuid.Parse(podUID); err == nil {
podUID = u.String()
}
ctrName := pprintCtr(pod, ctr)
pid := int(ctr.GetPid())

Expand Down Expand Up @@ -300,6 +310,9 @@ func (p *plugin) PostStartContainer(ctx context.Context, pod *api.PodSandbox, ct
// StopContainer is called when a container is being stopped.
func (p *plugin) StopContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) ([]*api.ContainerUpdate, error) {
podUID := pod.GetUid()
if u, err := uuid.Parse(podUID); err == nil {
podUID = u.String()
}
ctrName := pprintCtr(pod, ctr)

log.Debugf("StopContainer %s", ctrName)
Expand Down Expand Up @@ -329,9 +342,11 @@ func (p *plugin) StopContainer(ctx context.Context, pod *api.PodSandbox, ctr *ap
// container's RDT class. If an allocation plugin assigns different classes to
// containers in the same pod, subsequent containers use the first class.
func (p *plugin) ensureMonGroup(podUID, containerID, rdtClass string) error {
if !looksLikePodUID(podUID) {
u, err := uuid.Parse(podUID)
if err != nil {
return fmt.Errorf("invalid pod UID %q", podUID)
}
podUID = u.String()

p.mu.Lock()
defer p.mu.Unlock()
Expand Down
71 changes: 71 additions & 0 deletions cmd/plugins/resctrl-mon/plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,77 @@ func TestStartContainer_FilteredPod(t *testing.T) {
require.NoError(t, err)
}

func TestCompactUID_EnsureMonGroupStoresCanonical(t *testing.T) {
tmpDir := t.TempDir()
p := newTestPlugin(tmpDir)

compactUID := "a1b2c3d4e5f678901234abcdef567890"
canonicalUID := "a1b2c3d4-e5f6-7890-1234-abcdef567890"

pod := makePod(compactUID, "default", "test-pod")
ctr := makeContainer("c1", "container1", compactUID, 0, "")

err := p.PostCreateContainer(context.Background(), pod, ctr)
require.NoError(t, err)

// State must be keyed under the canonical dashed form.
assert.True(t, p.state.hasPod(canonicalUID))
assert.False(t, p.state.hasPod(compactUID))

monDir := p.state.getMonGroupDir(canonicalUID)
assert.Contains(t, monDir, canonicalUID)
}

func TestCompactUID_StartContainerFindsMonGroup(t *testing.T) {
tmpDir := t.TempDir()
p := newTestPlugin(tmpDir)

compactUID := "a1b2c3d4e5f678901234abcdef567890"
canonicalUID := "a1b2c3d4-e5f6-7890-1234-abcdef567890"

pod := makePod(compactUID, "default", "test-pod")
ctr := makeContainer("c1", "container1", compactUID, 0, "")

// Create mon_group via compact UID.
err := p.PostCreateContainer(context.Background(), pod, ctr)
require.NoError(t, err)

monDir := p.state.getMonGroupDir(canonicalUID)
require.NotEmpty(t, monDir)
require.NoError(t, os.WriteFile(filepath.Join(monDir, "tasks"), nil, 0644))

// StartContainer also using compact UID must find and write to the same mon_group.
ctrWithPid := makeContainer("c1", "container1", compactUID, 77, "")
err = p.StartContainer(context.Background(), pod, ctrWithPid)
require.NoError(t, err)

data, err := os.ReadFile(filepath.Join(monDir, "tasks"))
require.NoError(t, err)
assert.Equal(t, "77\n", string(data))
}

func TestCompactUID_StopContainerCleansUp(t *testing.T) {
tmpDir := t.TempDir()
p := newTestPlugin(tmpDir)

compactUID := "a1b2c3d4e5f678901234abcdef567890"
canonicalUID := "a1b2c3d4-e5f6-7890-1234-abcdef567890"

pod := makePod(compactUID, "default", "test-pod")
ctr := makeContainer("c1", "container1", compactUID, 0, "")

err := p.PostCreateContainer(context.Background(), pod, ctr)
require.NoError(t, err)
assert.Equal(t, 1, p.state.podCount())

_, err = p.StopContainer(context.Background(), pod, ctr)
require.NoError(t, err)

// Pod must be gone from state after stop.
assert.Equal(t, 0, p.state.podCount())
assert.False(t, p.state.hasPod(canonicalUID))
}

func TestStopContainer_RemovesStateOnRmdirFailure(t *testing.T) {
tmpDir := t.TempDir()
p := newTestPlugin(tmpDir)
Expand Down
34 changes: 6 additions & 28 deletions cmd/plugins/resctrl-mon/resctrl.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"strconv"
"strings"
"syscall"

"github.com/google/uuid"
)

const (
Expand Down Expand Up @@ -158,12 +160,13 @@ func (r *resctrlOps) cleanOrphanedInDir(monGroupsPath string, state *podState) {
continue
}
name := entry.Name()
// Only clean directories that look like pod UIDs (contain dashes like UUIDs).
if !looksLikePodUID(name) {
// Only clean directories that look like pod UIDs.
u, err := uuid.Parse(name)
if err != nil {
continue
}
orphanDir := filepath.Join(monGroupsPath, name)
trackedDir := state.getMonGroupDir(name)
trackedDir := state.getMonGroupDir(u.String())
if trackedDir == orphanDir {
// This is the active mon_group for this pod.
continue
Expand All @@ -175,31 +178,6 @@ func (r *resctrlOps) cleanOrphanedInDir(monGroupsPath string, state *podState) {
}
}

// looksLikePodUID returns true if the name looks like a Kubernetes pod UID
// (UUID format with dashes, e.g., a1b2c3d4-e5f6-7890-abcd-ef1234567890).
func looksLikePodUID(name string) bool {
if len(name) != 36 {
return false
}
// Check for UUID-like pattern: 8-4-4-4-12 hex chars.
parts := strings.Split(name, "-")
if len(parts) != 5 {
return false
}
expectedLens := []int{8, 4, 4, 4, 12}
for i, part := range parts {
if len(part) != expectedLens[i] {
return false
}
for _, c := range part {
if (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F') {
return false
}
}
}
return true
}

// isValidRDTClass returns true if the name is a safe resctrl ctrl_group name.
// It rejects path separators, dot-segments, and empty strings to prevent
// path traversal outside the resctrl mount.
Expand Down
35 changes: 25 additions & 10 deletions cmd/plugins/resctrl-mon/resctrl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,16 +192,31 @@ func TestCleanOrphanedMonGroups_StaleLocation(t *testing.T) {
assert.True(t, os.IsNotExist(err))
}

func TestLooksLikePodUID(t *testing.T) {
assert.True(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890"))
assert.True(t, looksLikePodUID("DEADBEEF-DEAD-BEEF-DEAD-BEEFDEADBEEF"))
assert.True(t, looksLikePodUID("00000000-0000-0000-0000-000000000000"))

assert.False(t, looksLikePodUID("short"))
assert.False(t, looksLikePodUID("not-a-uuid-at-all-nope-notthisone!"))
assert.False(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef123456789")) // too short last segment
assert.False(t, looksLikePodUID("g1b2c3d4-e5f6-7890-abcd-ef1234567890")) // 'g' is not hex
assert.False(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890x")) // too long
func TestCleanOrphanedMonGroups_CompactUIDIsOrphaned(t *testing.T) {
tmpDir := t.TempDir()
r := newResctrlOps(tmpDir)
state := newPodState()

// Simulate: pod is tracked under the canonical dashed UID (as ensureMonGroup stores it).
canonicalUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
canonicalDir, err := r.createMonGroup("", canonicalUID)
require.NoError(t, err)
state.addPod(canonicalUID, canonicalDir)

// Simulate: a stale compact-form directory left by a previous run (e.g. CRI-O).
compactUID := "a1b2c3d4e5f678901234abcdef567890"
compactDir := filepath.Join(tmpDir, "mon_groups", compactUID)
require.NoError(t, os.Mkdir(compactDir, 0755))

r.cleanOrphanedMonGroups(state)

// Canonical mon_group (tracked) must survive.
_, err = os.Stat(canonicalDir)
assert.NoError(t, err)

// Compact-named directory is not tracked — must be removed as orphan.
_, err = os.Stat(compactDir)
assert.True(t, os.IsNotExist(err))
}

func TestIsValidRDTClass(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/containers/nri-plugins/pkg/topology v0.0.0
github.com/coreos/go-systemd/v22 v22.5.0
github.com/fsnotify/fsnotify v1.6.0
github.com/google/uuid v1.6.0
github.com/intel/goresctrl v0.12.0
github.com/intel/memtierd v0.1.1
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.1.2
Expand Down Expand Up @@ -63,7 +64,6 @@ require (
github.com/google/gnostic-models v0.6.9 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
Expand Down
Loading