Skip to content

Galaxy can not create pod sucess with containerd runtime #137

@Huimintai

Description

@Huimintai

TKEStack with containerd runtime engine can not create pods sucess with errors:

# kubectl get pods -n kube-system
NAME                                     READY   STATUS              RESTARTS   AGE
coredns-ccc77fb9d-d8vsj                  1/1     Running             0          17h
coredns-ccc77fb9d-l2jn2                  1/1     Running             0          17h
etcd-10.0.32.211                         1/1     Running             0          17h
flannel-rcf9p                            1/1     Running             0          17h
galaxy-daemonset-b88np                   1/1     Running             0          17h
kube-apiserver-10.0.32.211               1/1     Running             0          17h
kube-controller-manager-10.0.32.211      1/1     Running             0          17h
kube-proxy-llkg6                         1/1     Running             0          17h
kube-scheduler-10.0.32.211               1/1     Running             0          17h
metrics-server-v0.3.6-59c66b5dfd-57zz4   2/2     Running             0          17h
metrics-server-v0.3.6-794ccd69c8-6zdrg   0/2     ContainerCreating   0          17h
# kubectl describe pods metrics-server-v0.3.6-794ccd69c8-6zdrg -n kube-system
 Warning  FailedCreatePodSandBox  94s (x4848 over 17h)  kubelet  (combined from similar events): Failed to create pod sandbox: rpc error: code = Unknown desc = failed to setup network for sandbox "2869e68816e95f2eeaa62f26905c049fd4e70240e5a73bf4c30875610d9c8aef": galaxy returns: fail to establish network map[]:failed to open netns "/var/run/netns/cni-93beae62-e333-54c2-12f3-49069a567f4b": failed to Statfs "/var/run/netns/cni-93beae62-e333-54c2-12f3-49069a567f4b": no such file or directory

The galaxy error log:

I0713 02:08:18.298218  184815 server.go:114] ADD metrics-server-v0.3.6-794ccd69c8-6zdrg_kube-system, 16aedda9d4d54b18301610162a4e4d24397a2455fb49a1b2ec6fe270084e9585, /var/run/netns/cni-aee74e86-14a1-fcae-26bd-e96cdf62fa02, [], Jul 13 02:08:18.298209+
I0713 02:08:18.300783  184815 cni.go:93] delegate add 16aedda9d4d54b18301610162a4e4d24397a2455fb49a1b2ec6fe270084e9585 args K8S_POD_NAME=metrics-server-v0.3.6-794ccd69c8-6zdrg;K8S_POD_INFRA_CONTAINER_ID=16aedda9d4d54b18301610162a4e4d24397a2455fb49a1b2ec6fe270084e9585;IgnoreUnknown=1;K8S_POD_NAMESPACE=kube-system conf {"delegate":{"type":"galaxy-veth"},"name":"galaxy-flannel","subnetFile":"/run/flannel/subnet.env","type":"galaxy-flannel"}
E0713 02:08:18.335744  184815 cni.go:146] fail to add network map[]: failed to open netns "/var/run/netns/cni-aee74e86-14a1-fcae-26bd-e96cdf62fa02": failed to Statfs "/var/run/netns/cni-aee74e86-14a1-fcae-26bd-e96cdf62fa02": no such file or directory, begin to rollback and delete it
I0713 02:08:18.335898  184815 cni.go:114] delegate del 16aedda9d4d54b18301610162a4e4d24397a2455fb49a1b2ec6fe270084e9585 args K8S_POD_NAME=metrics-server-v0.3.6-794ccd69c8-6zdrg;K8S_POD_INFRA_CONTAINER_ID=16aedda9d4d54b18301610162a4e4d24397a2455fb49a1b2ec6fe270084e9585;IgnoreUnknown=1;K8S_POD_NAMESPACE=kube-system conf {"delegate":{"type":"galaxy-veth"},"name":"galaxy-flannel","subnetFile":"/run/flannel/subnet.env","type":"galaxy-flannel"}
W0713 02:08:18.342705  184815 cni.go:148] fail to delete cni in rollback <nil>

But when I do not install galaxy the metrics-server can runnning well:

root@VM-32-165-ubuntu:~# kubectl get pods -n kube-system
NAME                                       READY   STATUS    RESTARTS   AGE
coredns-ccc77fb9d-qgx2g                    1/1     Running   1          9m46s
coredns-ccc77fb9d-wlb82                    1/1     Running   1          9m46s
etcd-vm-32-165-ubuntu                      1/1     Running   2          10m
kube-apiserver-vm-32-165-ubuntu            1/1     Running   2          10m
kube-controller-manager-vm-32-165-ubuntu   1/1     Running   1          10m
kube-proxy-8sqdn                           1/1     Running   1          9m46s
kube-scheduler-vm-32-165-ubuntu            1/1     Running   7          6m25s
metrics-server-v0.3.6-794ccd69c8-wfv7d     2/2     Running   3          9m32s

Also when I install community flannel the metrics-server also can running well withhout any errors:

# kubectl get pods -n kube-system
NAME                                       READY   STATUS    RESTARTS   AGE
coredns-ccc77fb9d-qgx2g                    1/1     Running   1          13m
coredns-ccc77fb9d-wlb82                    1/1     Running   1          13m
etcd-vm-32-165-ubuntu                      1/1     Running   2          14m
kube-apiserver-vm-32-165-ubuntu            1/1     Running   2          14m
kube-controller-manager-vm-32-165-ubuntu   1/1     Running   1          14m
kube-flannel-ds-2grmk                      1/1     Running   0          69s
kube-proxy-8sqdn                           1/1     Running   1          13m
kube-scheduler-vm-32-165-ubuntu            1/1     Running   7          10m
metrics-server-v0.3.6-794ccd69c8-wfv7d     2/2     Running   3          13m
root@VM-32-165-ubuntu:~#
root@VM-32-165-ubuntu:~#
root@VM-32-165-ubuntu:~# kubectl delete pods metrics-server-v0.3.6-794ccd69c8-wfv7d -n kube-system
pod "metrics-server-v0.3.6-794ccd69c8-wfv7d" deleted
root@VM-32-165-ubuntu:~#
root@VM-32-165-ubuntu:~#
root@VM-32-165-ubuntu:~# kubectl get pods -n kube-system
NAME                                       READY   STATUS    RESTARTS   AGE
coredns-ccc77fb9d-qgx2g                    1/1     Running   1          13m
coredns-ccc77fb9d-wlb82                    1/1     Running   1          13m
etcd-vm-32-165-ubuntu                      1/1     Running   2          14m
kube-apiserver-vm-32-165-ubuntu            1/1     Running   2          14m
kube-controller-manager-vm-32-165-ubuntu   1/1     Running   1          14m
kube-flannel-ds-2grmk                      1/1     Running   0          93s
kube-proxy-8sqdn                           1/1     Running   1          13m
kube-scheduler-vm-32-165-ubuntu            1/1     Running   7          10m
metrics-server-v0.3.6-794ccd69c8-grvvq     2/2     Running   0          15s
root@VM-32-165-ubuntu:~# ls /var/run/netns/
cni-62aefc67-2e1a-3287-bb49-123ffc5eb62a  cni-99a91844-ea0c-96ef-79ae-5b43e1b5aa28
cni-99744035-01d0-1a18-2ec9-4a94c68cf683

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions