From c74333c4952902b262403e20d0f9274f910c152b Mon Sep 17 00:00:00 2001 From: Ian Fijolek Date: Tue, 19 Nov 2024 16:45:19 -0800 Subject: [PATCH] Add method for deduping images This is helpful with Nomad because some images end up used as sidecars for many jobs. Also periodic jobs spawn many tasks with the same image. --- internal/model/image.go | 35 +++++++++++++++++ internal/model/image_test.go | 55 +++++++++++++++++++++++++++ internal/provider/docker/container.go | 4 +- internal/provider/dockerfile/image.go | 4 +- internal/provider/file/image.go | 4 +- internal/provider/kubernetes/pod.go | 4 +- internal/provider/nomad/task.go | 4 +- internal/provider/swarm/service.go | 4 +- 8 files changed, 103 insertions(+), 11 deletions(-) create mode 100644 internal/model/image_test.go diff --git a/internal/model/image.go b/internal/model/image.go index fe712df57..3fb99c824 100644 --- a/internal/model/image.go +++ b/internal/model/image.go @@ -1,7 +1,10 @@ package model import ( + "encoding/json" + "github.com/crazy-max/diun/v4/pkg/registry" + "github.com/pkg/errors" ) // Image holds image configuration @@ -20,6 +23,38 @@ type Image struct { Metadata map[string]string `yaml:"metadata,omitempty" json:",omitempty"` } +func (i Image) hash() (string, error) { + // Return json serialized image to use as a hashable key + b, err := json.Marshal(i) + if err != nil { + return "", errors.Errorf("cannot hash image: %v", err) + } + + return string(b), nil +} + +type ImageList []Image + +// Dedupe removes duplicate images from the list and returns a new list +func (il ImageList) Dedupe() []Image { + keys := make(map[string]bool) + list := []Image{} + for _, entry := range il { + hash, err := entry.hash() + if err != nil { + // If we couldn't hash the entry, we can't dedupe it so we add it anyway + list = append(list, entry) + } else { + if _, value := keys[hash]; !value { + keys[hash] = true + list = append(list, entry) + } + } + } + + return list +} + // ImagePlatform holds image platform configuration type ImagePlatform struct { OS string `yaml:"os,omitempty" json:",omitempty"` diff --git a/internal/model/image_test.go b/internal/model/image_test.go new file mode 100644 index 000000000..3bf7a990c --- /dev/null +++ b/internal/model/image_test.go @@ -0,0 +1,55 @@ +package model_test + +import ( + "testing" + + "github.com/crazy-max/diun/v4/internal/model" + "github.com/stretchr/testify/assert" +) + +func TestDedupeImageList(t *testing.T) { + testCases := []struct { + desc string + input []model.Image + expected []model.Image + }{ + { + desc: "dedupe", + input: []model.Image{ + { + Name: "alpine", + IncludeTags: []string{"latest"}, + }, + { + Name: "alpine", + IncludeTags: []string{"latest"}, + }, + { + Name: "alpine", + IncludeTags: []string{"oldest"}, + }, + }, + expected: []model.Image{ + { + Name: "alpine", + IncludeTags: []string{"latest"}, + }, + { + Name: "alpine", + IncludeTags: []string{"oldest"}, + }, + }, + }, + } + + for _, tt := range testCases { + tt := tt + + t.Run(tt.desc, func(t *testing.T) { + t.Parallel() + + result := model.ImageList(tt.input).Dedupe() + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/internal/provider/docker/container.go b/internal/provider/docker/container.go index bf3b03755..ce4d53de6 100644 --- a/internal/provider/docker/container.go +++ b/internal/provider/docker/container.go @@ -40,7 +40,7 @@ func (c *Client) listContainerImage() []model.Image { return []model.Image{} } - var list []model.Image + var list model.ImageList for _, ctn := range ctns { imageName := ctn.Image imageRaw, err := cli.ImageInspectWithRaw(imageName) @@ -112,7 +112,7 @@ func (c *Client) listContainerImage() []model.Image { list = append(list, image) } - return list + return list.Dedupe() } func metadata(ctn types.Container) map[string]string { diff --git a/internal/provider/dockerfile/image.go b/internal/provider/dockerfile/image.go index b2d48fe89..1e07f4e41 100644 --- a/internal/provider/dockerfile/image.go +++ b/internal/provider/dockerfile/image.go @@ -11,7 +11,7 @@ import ( "github.com/crazy-max/diun/v4/pkg/utl" ) -func (c *Client) listExtImage() (list []model.Image) { +func (c *Client) listExtImage() (list model.ImageList) { for _, filename := range c.listDockerfiles(c.config.Patterns) { dfile, err := dockerfile.New(dockerfile.Options{ Filename: filename, @@ -50,7 +50,9 @@ func (c *Client) listExtImage() (list []model.Image) { Msg("Watch disabled") continue } + list = append(list, image) + list = list.Dedupe() } } return diff --git a/internal/provider/file/image.go b/internal/provider/file/image.go index 836e63fcb..035f1c752 100644 --- a/internal/provider/file/image.go +++ b/internal/provider/file/image.go @@ -12,7 +12,7 @@ import ( ) func (c *Client) listFileImage() []model.Image { - var images []model.Image + var images model.ImageList files := c.getFiles() if len(files) == 0 { @@ -99,7 +99,7 @@ func (c *Client) listFileImage() []model.Image { } } - return images + return images.Dedupe() } func (c *Client) getFiles() []string { diff --git a/internal/provider/kubernetes/pod.go b/internal/provider/kubernetes/pod.go index a870b746d..9d208b41e 100644 --- a/internal/provider/kubernetes/pod.go +++ b/internal/provider/kubernetes/pod.go @@ -31,7 +31,7 @@ func (c *Client) listPodImage() []model.Image { return []model.Image{} } - var list []model.Image + var list model.ImageList for _, pod := range pods { for _, ctn := range pod.Spec.Containers { c.logger.Debug(). @@ -64,7 +64,7 @@ func (c *Client) listPodImage() []model.Image { } } - return list + return list.Dedupe() } func metadata(pod v1.Pod, ctn v1.Container) map[string]string { diff --git a/internal/provider/nomad/task.go b/internal/provider/nomad/task.go index 57218cec2..486193248 100644 --- a/internal/provider/nomad/task.go +++ b/internal/provider/nomad/task.go @@ -50,7 +50,7 @@ func (c *Client) listTaskImages() []model.Image { c.logger.Error().Err(err).Msg("Cannot list Nomad jobs") } - var list []model.Image + var list model.ImageList for _, job := range jobs { jobInfo, _, err := client.Jobs().Info(job.ID, nil) @@ -129,7 +129,7 @@ func (c *Client) listTaskImages() []model.Image { } } - return list + return list.Dedupe() } func metadata(job *nomad.JobListStub, taskGroup *nomad.TaskGroup, task *nomad.Task) map[string]string { diff --git a/internal/provider/swarm/service.go b/internal/provider/swarm/service.go index 44f42a656..333df75b5 100644 --- a/internal/provider/swarm/service.go +++ b/internal/provider/swarm/service.go @@ -29,7 +29,7 @@ func (c *Client) listServiceImage() []model.Image { return []model.Image{} } - var list []model.Image + var list model.ImageList for _, svc := range svcs { c.logger.Debug(). Str("svc_name", svc.Spec.Name). @@ -57,7 +57,7 @@ func (c *Client) listServiceImage() []model.Image { list = append(list, image) } - return list + return list.Dedupe() } func metadata(svc swarm.Service) map[string]string {