Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ require (
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
dario.cat/mergo v1.0.1 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0 // indirect
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect
github.com/GoogleCloudPlatform/k8s-cloud-provider v1.25.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0 h1:2qsI
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0/go.mod h1:AW8VEadnhw9xox+VaVd9sP7NjzOAnaZBLRH6Tq3cJ38=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.0.0 h1:pPvTJ1dY0sA35JOeFq6TsY2xj6Z85Yo23Pj4wCCvu4o=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.0.0/go.mod h1:mLfWfj8v3jfWKsL9G4eoBoXVcsqcIUTapmdKy7uGOp0=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0 h1:L7G3dExHBgUxsO3qpTGhk/P2dgnYyW48yn7AO33Tbek=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0/go.mod h1:Ms6gYEy0+A2knfKrwdatsggTXYA2+ICKug8w7STorFw=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork v1.1.0 h1:QM6sE5k2ZT/vI5BEe0r7mqjsUSnhVBFbOsVkEuaEfiA=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork v1.1.0/go.mod h1:243D9iHbcQXoFUtgHJwL7gl2zx1aDuDMjvBZVGr2uW0=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 h1:Dd+RhdJn0OTtVGaeDLZpcumkIVCtA/3/Fo42+eoYvVM=
Expand Down
30 changes: 22 additions & 8 deletions pkg/model/azuremodel/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,22 +97,32 @@ func (c *AzureModelContext) LinkToApplicationSecurityGroupNodes() *azuretasks.Ap
return &azuretasks.ApplicationSecurityGroup{Name: fi.PtrTo(c.NameForApplicationSecurityGroupNodes())}
}

// CloudTagsForInstanceGroup computes the tags to apply to instances in the specified InstanceGroup
// Mostly copied from pkg/model/context.go, but "/" in tag keys are replaced with "_" as Azure
// doesn't allow "/" in tag keys.
func (c *AzureModelContext) CloudTagsForInstanceGroup(ig *kops.InstanceGroup) map[string]*string {
// CloudTagsForClusterResource returns tags for a cluster-scoped resource
// (one that is not tied to a specific instance group).
func (c *AzureModelContext) CloudTagsForClusterResource() map[string]*string {
labels := make(map[string]string)
for k, v := range c.Cluster.Spec.CloudLabels {
labels[k] = v
}
labels[azure.TagClusterName] = c.ClusterName()
return toAzureTags(labels)
}

// CloudTagsForInstanceGroupResource computes the tags to apply to instances in the specified InstanceGroup.
// "/" in tag keys are replaced with "_" as Azure doesn't allow "/" in tag keys.
func (c *AzureModelContext) CloudTagsForInstanceGroupResource(ig *kops.InstanceGroup) map[string]*string {
const (
clusterNodeTemplateLabel = "k8s.io_cluster_node-template_label_"
clusterNodeTemplateTaint = "k8s.io_cluster_node-template_taint_"
)

labels := make(map[string]string)
// Apply any user-specified global labels first so they can be overridden by IG-specific labels.
// Apply any user-specified global labels first (may be overridden by IG-level labels).
for k, v := range c.Cluster.Spec.CloudLabels {
labels[k] = v
}

// Apply any user-specified labels.
// Apply any user-specified IG labels (may override cluster-level labels).
for k, v := range ig.Spec.CloudLabels {
labels[k] = v
}
Expand All @@ -134,7 +144,8 @@ func (c *AzureModelContext) CloudTagsForInstanceGroup(ig *kops.InstanceGroup) ma
}
}

// The system tags take priority because the cluster likely breaks without them...
// System tags take priority because the cluster likely breaks without them.
labels[azure.TagClusterName] = c.ClusterName()
labels[azure.TagNameRolePrefix+ig.Spec.Role.ToLowerString()] = "1"
if ig.Spec.Role == kops.InstanceGroupRoleControlPlane {
labels[azure.TagNameRolePrefix+"master"] = "1"
Expand All @@ -143,7 +154,10 @@ func (c *AzureModelContext) CloudTagsForInstanceGroup(ig *kops.InstanceGroup) ma
// Set the tag used by kops-controller to identify the instance group to which the VM ScaleSet belongs.
labels[nodeidentityazure.InstanceGroupNameTag] = ig.Name

// Replace all "/" with "_" as "/" is not an allowed key character in Azure.
return toAzureTags(labels)
}

func toAzureTags(labels map[string]string) map[string]*string {
m := make(map[string]*string)
for k, v := range labels {
m[strings.ReplaceAll(k, "/", "_")] = fi.PtrTo(v)
Expand Down
53 changes: 51 additions & 2 deletions pkg/model/azuremodel/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ package azuremodel

import (
"reflect"
"strings"
"testing"

"k8s.io/kops/upup/pkg/fi"
)

func TestCloudTagsForInstanceGroup(t *testing.T) {
func TestCloudTagsForInstanceGroupResource(t *testing.T) {
c := newTestAzureModelContext()
c.Cluster.Spec.CloudLabels = map[string]string{
"cluster_label_key": "cluster_label_value",
Expand All @@ -41,7 +42,7 @@ func TestCloudTagsForInstanceGroup(t *testing.T) {
"taint_key=taint_value",
}

actual := c.CloudTagsForInstanceGroup(c.InstanceGroups[0])
actual := c.CloudTagsForInstanceGroupResource(c.InstanceGroups[0])
expected := map[string]*string{
"cluster_label_key": fi.PtrTo("cluster_label_value"),
"ig_label_key": fi.PtrTo("ig_label_value"),
Expand All @@ -50,9 +51,57 @@ func TestCloudTagsForInstanceGroup(t *testing.T) {
"k8s.io_cluster_node-template_taint_taint_key": fi.PtrTo("taint_value"),
"k8s.io_role_node": fi.PtrTo("1"),
"kops.k8s.io_instancegroup": fi.PtrTo("nodes"),
"KubernetesCluster": fi.PtrTo("testcluster.test.com"),
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected tags %+v, but got %+v", expected, actual)
}
}

func TestCloudTagsForClusterResource(t *testing.T) {
c := newTestAzureModelContext()
c.Cluster.ObjectMeta.Name = "my.k8s"
c.Cluster.Spec.CloudLabels = map[string]string{
"cluster_label_key": "cluster_label_value",
"node_label/key": "node_label_value",
}

actual := c.CloudTagsForClusterResource()
expected := map[string]*string{
"cluster_label_key": fi.PtrTo("cluster_label_value"),
"node_label_key": fi.PtrTo("node_label_value"),
"KubernetesCluster": fi.PtrTo("my.k8s"),
}

if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected tags %+v, but got %+v", expected, actual)
}
}

func TestSanitizeUserAssignedManagedIdentityName(t *testing.T) {
t.Run("preserves valid names", func(t *testing.T) {
const name = "nodepool-1"
if got := sanitizeUserAssignedManagedIdentityName(name); got != name {
t.Fatalf("expected %q, but got %q", name, got)
}
})

t.Run("replaces invalid characters", func(t *testing.T) {
got := sanitizeUserAssignedManagedIdentityName("my.cluster.example.com")
if strings.Contains(got, ".") {
t.Fatalf("expected dots to be replaced, got %q", got)
}
if got != "my-cluster-example-com" {
t.Fatalf("expected %q, but got %q", "my-cluster-example-com", got)
}
})

t.Run("truncates long names", func(t *testing.T) {
name := strings.Repeat("a", maxUserAssignedManagedIdentityNameLength+1)
got := sanitizeUserAssignedManagedIdentityName(name)
if len(got) > maxUserAssignedManagedIdentityNameLength {
t.Fatalf("expected name length <= %d, got %d (%q)", maxUserAssignedManagedIdentityNameLength, len(got), got)
}
})
}
41 changes: 41 additions & 0 deletions pkg/model/azuremodel/names.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
Copyright 2026 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package azuremodel

import (
"regexp"

"k8s.io/kops/pkg/truncate"
)

// Azure user-assigned identity names must start with a letter or number, contain only alphanumerics, hyphens, and underscores,
// and are limited to 128 characters. See:
// https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/managed-identity-best-practice-recommendations
const maxUserAssignedManagedIdentityNameLength = 128

var userAssignedManagedIdentityNameInvalidCharacters = regexp.MustCompile(`[^A-Za-z0-9_-]+`)

func sanitizeUserAssignedManagedIdentityName(name string) string {
sanitized := userAssignedManagedIdentityNameInvalidCharacters.ReplaceAllString(name, "-")
return truncate.TruncateString(sanitized, truncate.TruncateStringOptions{
MaxLength: maxUserAssignedManagedIdentityNameLength,
})
}

func (c *AzureModelContext) NameForUserAssignedManagedIdentityControlPlane() string {
return sanitizeUserAssignedManagedIdentityName(c.ClusterName())
}
65 changes: 41 additions & 24 deletions pkg/model/azuremodel/vmscaleset.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,43 +54,60 @@ func (b *VMScaleSetModelBuilder) Build(c *fi.CloudupModelBuilderContext) error {
Tags: map[string]*string{},
})

controlPlaneIdentity := b.addControlPlaneManagedIdentityTasks(c)

for _, ig := range b.InstanceGroups {
name := b.AutoscalingGroupName(ig)
vmss, err := b.buildVMScaleSetTask(c, name, ig)
if err != nil {
return err
}
c.AddTask(vmss)

if ig.IsControlPlane() || b.Cluster.UsesLegacyGossip() {
// Create tasks for assigning built-in roles to VM Scale Sets.
// See https://docs.microsoft.com/en-us/azure/role-based-access-control/built-in-roles
resourceGroupID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s",
b.Cluster.Spec.CloudProvider.Azure.SubscriptionID,
b.Cluster.AzureResourceGroupName(),
)
c.AddTask(&azuretasks.RoleAssignment{
Name: to.Ptr(fmt.Sprintf("%s-%s", *vmss.Name, "owner")),
Lifecycle: b.Lifecycle,
Scope: to.Ptr(resourceGroupID),
VMScaleSet: vmss,
// Owner
RoleDefID: to.Ptr("8e3af657-a8ff-443c-a75c-2fe8c4bcb635"),
})
c.AddTask(&azuretasks.RoleAssignment{
Name: to.Ptr(fmt.Sprintf("%s-%s", *vmss.Name, "blob")),
Lifecycle: b.Lifecycle,
Scope: to.Ptr(b.Cluster.Spec.CloudProvider.Azure.StorageAccountID),
VMScaleSet: vmss,
// Storage Blob Data Contributor
RoleDefID: to.Ptr("ba92f5b4-2d11-453d-a403-e96b0029c9fe"),
})
vmss.ManagedIdentities = []*azuretasks.ManagedIdentity{controlPlaneIdentity}
}

c.AddTask(vmss)
}

return nil
}

func (b *VMScaleSetModelBuilder) addControlPlaneManagedIdentityTasks(c *fi.CloudupModelBuilderContext) *azuretasks.ManagedIdentity {
// Create tasks for assigning built-in roles to the shared control-plane managed identity.
// See https://docs.microsoft.com/en-us/azure/role-based-access-control/built-in-roles
controlPlaneIdentity := &azuretasks.ManagedIdentity{
Name: fi.PtrTo(b.NameForUserAssignedManagedIdentityControlPlane()),
Lifecycle: b.Lifecycle,
ResourceGroup: b.LinkToResourceGroup(),
Tags: b.CloudTagsForClusterResource(),
}
c.AddTask(controlPlaneIdentity)

resourceGroupID := fmt.Sprintf("/subscriptions/%s/resourceGroups/%s",
b.Cluster.Spec.CloudProvider.Azure.SubscriptionID,
b.Cluster.AzureResourceGroupName(),
)
// Owner role on the resource group.
c.AddTask(&azuretasks.RoleAssignment{
Name: to.Ptr(fmt.Sprintf("owner-%s", *controlPlaneIdentity.Name)),
Lifecycle: b.Lifecycle,
Scope: to.Ptr(resourceGroupID),
ManagedIdentity: controlPlaneIdentity,
RoleDefID: to.Ptr("8e3af657-a8ff-443c-a75c-2fe8c4bcb635"),
})
// Storage Blob Data Contributor role on the storage account.
c.AddTask(&azuretasks.RoleAssignment{
Name: to.Ptr(fmt.Sprintf("blob-%s", *controlPlaneIdentity.Name)),
Lifecycle: b.Lifecycle,
Scope: to.Ptr(b.Cluster.Spec.CloudProvider.Azure.StorageAccountID),
ManagedIdentity: controlPlaneIdentity,
RoleDefID: to.Ptr("ba92f5b4-2d11-453d-a403-e96b0029c9fe"),
})

return controlPlaneIdentity
}

func (b *VMScaleSetModelBuilder) buildVMScaleSetTask(
c *fi.CloudupModelBuilderContext,
name string,
Expand Down Expand Up @@ -176,7 +193,7 @@ func (b *VMScaleSetModelBuilder) buildVMScaleSetTask(
}
}

t.Tags = b.CloudTagsForInstanceGroup(ig)
t.Tags = b.CloudTagsForInstanceGroupResource(ig)

return t, nil
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/model/azuremodel/vmscaleset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,16 @@ func TestVMScaleSetModelBuilder_Build(t *testing.T) {
if err != nil {
t.Errorf("unexpected error %s", err)
}
expectedName := b.NameForUserAssignedManagedIdentityControlPlane()
for _, taskKey := range []string{
"ManagedIdentity/" + expectedName,
"RoleAssignment/owner-" + expectedName,
"RoleAssignment/blob-" + expectedName,
} {
if _, ok := c.Tasks[taskKey]; !ok {
t.Fatalf("expected task %q", taskKey)
}
}
}

func TestGetCapacity(t *testing.T) {
Expand Down
54 changes: 53 additions & 1 deletion pkg/resources/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ const (
typeLoadBalancer = "LoadBalancer"
typePublicIPAddress = "PublicIPAddress"
typeNatGateway = "NatGateway"
typeManagedIdentity = "ManagedIdentity"
)

// ListResourcesAzure lists all resources for the cluster by quering Azure.
Expand Down Expand Up @@ -409,8 +410,33 @@ func (g *resourceGetter) listVMScaleSetsAndRoleAssignments(ctx context.Context)
}
rs = append(rs, r)

principalIDs[*vmss.Identity.PrincipalID] = vmss
if vmss.Identity != nil {
// Collect principal IDs from both system-assigned and user-assigned identities.
if vmss.Identity.PrincipalID != nil {
principalIDs[*vmss.Identity.PrincipalID] = vmss
}
for _, uai := range vmss.Identity.UserAssignedIdentities {
if uai != nil && uai.PrincipalID != nil {
principalIDs[*uai.PrincipalID] = vmss
}
}
}
}

// Collect VMSS IDs so that managed identities are not deleted before all VMSS are gone.
var blocked []string
for _, r := range rs {
if r.Type == typeVMScaleSet {
blocked = append(blocked, toKey(typeVMScaleSet, r.ID))
}
}

// Also list and delete managed identities owned by the cluster.
miResources, err := g.listManagedIdentities(ctx, blocked)
if err != nil {
return nil, err
}
rs = append(rs, miResources...)

resourceGroupRAs, err := g.listRoleAssignments(ctx, principalIDs, g.resourceGroupID())
if err != nil {
Expand Down Expand Up @@ -751,6 +777,32 @@ func (g *resourceGetter) deleteNatGateway(_ fi.Cloud, r *resources.Resource) err
return g.cloud.NatGateway().Delete(context.TODO(), g.resourceGroupName(), r.Name)
}

func (g *resourceGetter) listManagedIdentities(ctx context.Context, blocked []string) ([]*resources.Resource, error) {
mis, err := g.cloud.ManagedIdentity().List(ctx, g.resourceGroupName())
if err != nil {
return nil, err
}

var rs []*resources.Resource
for _, mi := range mis {
if !g.isOwnedByCluster(mi.Tags) {
continue
}
rs = append(rs, &resources.Resource{
Obj: mi,
Type: typeManagedIdentity,
ID: *mi.ID,
Name: *mi.Name,
Deleter: func(_ fi.Cloud, r *resources.Resource) error {
return g.cloud.ManagedIdentity().Delete(context.TODO(), g.resourceGroupName(), r.Name)
},
Blocks: []string{toKey(typeResourceGroup, g.resourceGroupID())},
Blocked: blocked,
})
}
return rs, nil
}

// isOwnedByCluster returns true if the resource is owned by the cluster.
func (g *resourceGetter) isOwnedByCluster(tags map[string]*string) bool {
for k, v := range tags {
Expand Down
Loading
Loading