Skip to content

Commit b9d61de

Browse files
committed
MON-4036: Add NodeExporterConfig to ClusterMonitoring API
Add configuration for the node-exporter agent that runs as a DaemonSet in openshift-monitoring, collecting hardware and OS-level metrics from every node in the cluster. Signed-off-by: Daniel Mellado <dmellado@fedoraproject.org>
1 parent b3abc73 commit b9d61de

9 files changed

Lines changed: 3055 additions & 41226 deletions

File tree

config/v1alpha1/tests/clustermonitorings.config.openshift.io/ClusterMonitoringConfig.yaml

Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,3 +815,356 @@ tests:
815815
topologyKey: topology.kubernetes.io/zone
816816
whenUnsatisfiable: DoNotSchedule
817817
expectedError: "Duplicate value"
818+
- name: Should be able to create NodeExporterConfig with valid resources
819+
initial: |
820+
apiVersion: config.openshift.io/v1alpha1
821+
kind: ClusterMonitoring
822+
spec:
823+
nodeExporterConfig:
824+
resources:
825+
- name: "cpu"
826+
request: "50m"
827+
limit: "200m"
828+
- name: "memory"
829+
request: "50Mi"
830+
limit: "200Mi"
831+
expected: |
832+
apiVersion: config.openshift.io/v1alpha1
833+
kind: ClusterMonitoring
834+
spec:
835+
nodeExporterConfig:
836+
resources:
837+
- name: "cpu"
838+
request: "50m"
839+
limit: "200m"
840+
- name: "memory"
841+
request: "50Mi"
842+
limit: "200Mi"
843+
- name: Should be able to create NodeExporterConfig with valid tolerations
844+
initial: |
845+
apiVersion: config.openshift.io/v1alpha1
846+
kind: ClusterMonitoring
847+
spec:
848+
nodeExporterConfig:
849+
tolerations:
850+
- operator: "Exists"
851+
expected: |
852+
apiVersion: config.openshift.io/v1alpha1
853+
kind: ClusterMonitoring
854+
spec:
855+
nodeExporterConfig:
856+
tolerations:
857+
- operator: "Exists"
858+
- name: Should be able to create NodeExporterConfig with collectors
859+
initial: |
860+
apiVersion: config.openshift.io/v1alpha1
861+
kind: ClusterMonitoring
862+
spec:
863+
nodeExporterConfig:
864+
collectors:
865+
cpuFreq:
866+
collectionPolicy: Collect
867+
tcpStat:
868+
collectionPolicy: DoNotCollect
869+
netDev:
870+
collectionPolicy: Collect
871+
netClass:
872+
collectionPolicy: Collect
873+
statsGatherer: Netlink
874+
systemd:
875+
collectionPolicy: Collect
876+
units:
877+
- "kubelet.service"
878+
- "crio.service"
879+
expected: |
880+
apiVersion: config.openshift.io/v1alpha1
881+
kind: ClusterMonitoring
882+
spec:
883+
nodeExporterConfig:
884+
collectors:
885+
cpuFreq:
886+
collectionPolicy: Collect
887+
tcpStat:
888+
collectionPolicy: DoNotCollect
889+
netDev:
890+
collectionPolicy: Collect
891+
netClass:
892+
collectionPolicy: Collect
893+
statsGatherer: Netlink
894+
systemd:
895+
collectionPolicy: Collect
896+
units:
897+
- "kubelet.service"
898+
- "crio.service"
899+
- name: Should be able to create NodeExporterConfig with all fields
900+
initial: |
901+
apiVersion: config.openshift.io/v1alpha1
902+
kind: ClusterMonitoring
903+
spec:
904+
nodeExporterConfig:
905+
nodeSelector:
906+
kubernetes.io/os: linux
907+
resources:
908+
- name: "cpu"
909+
request: "50m"
910+
limit: "200m"
911+
tolerations:
912+
- operator: "Exists"
913+
collectors:
914+
cpuFreq:
915+
collectionPolicy: Collect
916+
buddyInfo:
917+
collectionPolicy: DoNotCollect
918+
maxProcs: 4
919+
ignoredNetworkDevices:
920+
- "^veth.*$"
921+
- "^docker.*$"
922+
expected: |
923+
apiVersion: config.openshift.io/v1alpha1
924+
kind: ClusterMonitoring
925+
spec:
926+
nodeExporterConfig:
927+
nodeSelector:
928+
kubernetes.io/os: linux
929+
resources:
930+
- name: "cpu"
931+
request: "50m"
932+
limit: "200m"
933+
tolerations:
934+
- operator: "Exists"
935+
collectors:
936+
cpuFreq:
937+
collectionPolicy: Collect
938+
buddyInfo:
939+
collectionPolicy: DoNotCollect
940+
maxProcs: 4
941+
ignoredNetworkDevices:
942+
- "^veth.*$"
943+
- "^docker.*$"
944+
- name: Should reject NodeExporterConfig with empty object
945+
initial: |
946+
apiVersion: config.openshift.io/v1alpha1
947+
kind: ClusterMonitoring
948+
spec:
949+
nodeExporterConfig: {}
950+
expectedError: 'spec.nodeExporterConfig: Invalid value: 0: spec.nodeExporterConfig in body should have at least 1 properties'
951+
- name: Should reject NodeExporterConfig with too many resources
952+
initial: |
953+
apiVersion: config.openshift.io/v1alpha1
954+
kind: ClusterMonitoring
955+
spec:
956+
nodeExporterConfig:
957+
resources:
958+
- name: "cpu"
959+
request: "100m"
960+
- name: "memory"
961+
request: "64Mi"
962+
- name: "hugepages-2Mi"
963+
request: "32Mi"
964+
- name: "hugepages-1Gi"
965+
request: "1Gi"
966+
- name: "ephemeral-storage"
967+
request: "1Gi"
968+
- name: "nvidia.com/gpu"
969+
request: "1"
970+
- name: "example.com/foo"
971+
request: "1"
972+
- name: "example.com/bar"
973+
request: "1"
974+
- name: "example.com/baz"
975+
request: "1"
976+
- name: "example.com/qux"
977+
request: "1"
978+
- name: "example.com/quux"
979+
request: "1"
980+
expectedError: 'spec.nodeExporterConfig.resources: Too many: 11: must have at most 10 items'
981+
- name: Should reject NodeExporterConfig with duplicate resource names
982+
initial: |
983+
apiVersion: config.openshift.io/v1alpha1
984+
kind: ClusterMonitoring
985+
spec:
986+
nodeExporterConfig:
987+
resources:
988+
- name: "cpu"
989+
request: "100m"
990+
- name: "cpu"
991+
request: "200m"
992+
expectedError: 'spec.nodeExporterConfig.resources[1]: Duplicate value: map[string]interface {}{"name":"cpu"}'
993+
- name: Should reject NodeExporterConfig with limit less than request
994+
initial: |
995+
apiVersion: config.openshift.io/v1alpha1
996+
kind: ClusterMonitoring
997+
spec:
998+
nodeExporterConfig:
999+
resources:
1000+
- name: "cpu"
1001+
request: "500m"
1002+
limit: "200m"
1003+
expectedError: 'spec.nodeExporterConfig.resources[0]: Invalid value: "object": limit must be greater than or equal to request'
1004+
- name: Should reject NodeExporterConfig with empty resources array
1005+
initial: |
1006+
apiVersion: config.openshift.io/v1alpha1
1007+
kind: ClusterMonitoring
1008+
spec:
1009+
nodeExporterConfig:
1010+
resources: []
1011+
expectedError: 'spec.nodeExporterConfig.resources: Invalid value: 0: spec.nodeExporterConfig.resources in body should have at least 1 items'
1012+
- name: Should reject NodeExporterConfig with empty collectors object
1013+
initial: |
1014+
apiVersion: config.openshift.io/v1alpha1
1015+
kind: ClusterMonitoring
1016+
spec:
1017+
nodeExporterConfig:
1018+
collectors: {}
1019+
expectedError: 'spec.nodeExporterConfig.collectors: Invalid value: 0: spec.nodeExporterConfig.collectors in body should have at least 1 properties'
1020+
- name: Should accept NodeExporterConfig with empty ignoredNetworkDevices list
1021+
initial: |
1022+
apiVersion: config.openshift.io/v1alpha1
1023+
kind: ClusterMonitoring
1024+
spec:
1025+
nodeExporterConfig:
1026+
ignoredNetworkDevices: []
1027+
expected: |
1028+
apiVersion: config.openshift.io/v1alpha1
1029+
kind: ClusterMonitoring
1030+
spec:
1031+
nodeExporterConfig:
1032+
ignoredNetworkDevices: []
1033+
- name: Should reject NodeExporterConfig with maxProcs below minimum
1034+
initial: |
1035+
apiVersion: config.openshift.io/v1alpha1
1036+
kind: ClusterMonitoring
1037+
spec:
1038+
nodeExporterConfig:
1039+
maxProcs: -1
1040+
expectedError: 'spec.nodeExporterConfig.maxProcs'
1041+
- name: Should reject NodeExporterConfig with maxProcs exceeding maximum
1042+
initial: |
1043+
apiVersion: config.openshift.io/v1alpha1
1044+
kind: ClusterMonitoring
1045+
spec:
1046+
nodeExporterConfig:
1047+
maxProcs: 1025
1048+
expectedError: 'spec.nodeExporterConfig.maxProcs'
1049+
- name: Should reject netClass with statsGatherer set when collector is DoNotCollect
1050+
initial: |
1051+
apiVersion: config.openshift.io/v1alpha1
1052+
kind: ClusterMonitoring
1053+
spec:
1054+
nodeExporterConfig:
1055+
collectors:
1056+
netClass:
1057+
collectionPolicy: DoNotCollect
1058+
statsGatherer: Netlink
1059+
expectedError: 'statsGatherer can only be specified when the netclass collector collectionPolicy is Collect'
1060+
- name: Should accept netClass DoNotCollect without statsGatherer
1061+
initial: |
1062+
apiVersion: config.openshift.io/v1alpha1
1063+
kind: ClusterMonitoring
1064+
spec:
1065+
nodeExporterConfig:
1066+
collectors:
1067+
netClass:
1068+
collectionPolicy: DoNotCollect
1069+
expected: |
1070+
apiVersion: config.openshift.io/v1alpha1
1071+
kind: ClusterMonitoring
1072+
spec:
1073+
nodeExporterConfig:
1074+
collectors:
1075+
netClass:
1076+
collectionPolicy: DoNotCollect
1077+
- name: Should reject systemd with units set when collector is DoNotCollect
1078+
initial: |
1079+
apiVersion: config.openshift.io/v1alpha1
1080+
kind: ClusterMonitoring
1081+
spec:
1082+
nodeExporterConfig:
1083+
collectors:
1084+
systemd:
1085+
collectionPolicy: DoNotCollect
1086+
units:
1087+
- "kubelet.service"
1088+
expectedError: 'units can only be specified when the systemd collector collectionPolicy is Collect'
1089+
- name: Should accept systemd DoNotCollect without units
1090+
initial: |
1091+
apiVersion: config.openshift.io/v1alpha1
1092+
kind: ClusterMonitoring
1093+
spec:
1094+
nodeExporterConfig:
1095+
collectors:
1096+
systemd:
1097+
collectionPolicy: DoNotCollect
1098+
expected: |
1099+
apiVersion: config.openshift.io/v1alpha1
1100+
kind: ClusterMonitoring
1101+
spec:
1102+
nodeExporterConfig:
1103+
collectors:
1104+
systemd:
1105+
collectionPolicy: DoNotCollect
1106+
- name: Should reject NodeExporterConfig with empty nodeSelector
1107+
initial: |
1108+
apiVersion: config.openshift.io/v1alpha1
1109+
kind: ClusterMonitoring
1110+
spec:
1111+
nodeExporterConfig:
1112+
nodeSelector: {}
1113+
expectedError: 'spec.nodeExporterConfig.nodeSelector: Invalid value: 0: spec.nodeExporterConfig.nodeSelector in body should have at least 1 properties'
1114+
- name: Should reject NodeExporterConfig with too many nodeSelector entries
1115+
initial: |
1116+
apiVersion: config.openshift.io/v1alpha1
1117+
kind: ClusterMonitoring
1118+
spec:
1119+
nodeExporterConfig:
1120+
nodeSelector:
1121+
key1: val1
1122+
key2: val2
1123+
key3: val3
1124+
key4: val4
1125+
key5: val5
1126+
key6: val6
1127+
key7: val7
1128+
key8: val8
1129+
key9: val9
1130+
key10: val10
1131+
key11: val11
1132+
expectedError: 'spec.nodeExporterConfig.nodeSelector: Too many: 11: must have at most 10 items'
1133+
- name: Should reject NodeExporterConfig with empty tolerations array
1134+
initial: |
1135+
apiVersion: config.openshift.io/v1alpha1
1136+
kind: ClusterMonitoring
1137+
spec:
1138+
nodeExporterConfig:
1139+
tolerations: []
1140+
expectedError: 'spec.nodeExporterConfig.tolerations: Invalid value: 0: spec.nodeExporterConfig.tolerations in body should have at least 1 items'
1141+
- name: Should reject NodeExporterConfig with too many tolerations
1142+
initial: |
1143+
apiVersion: config.openshift.io/v1alpha1
1144+
kind: ClusterMonitoring
1145+
spec:
1146+
nodeExporterConfig:
1147+
tolerations:
1148+
- key: "key1"
1149+
operator: "Exists"
1150+
- key: "key2"
1151+
operator: "Exists"
1152+
- key: "key3"
1153+
operator: "Exists"
1154+
- key: "key4"
1155+
operator: "Exists"
1156+
- key: "key5"
1157+
operator: "Exists"
1158+
- key: "key6"
1159+
operator: "Exists"
1160+
- key: "key7"
1161+
operator: "Exists"
1162+
- key: "key8"
1163+
operator: "Exists"
1164+
- key: "key9"
1165+
operator: "Exists"
1166+
- key: "key10"
1167+
operator: "Exists"
1168+
- key: "key11"
1169+
operator: "Exists"
1170+
expectedError: 'spec.nodeExporterConfig.tolerations: Too many: 11: must have at most 10 items'

0 commit comments

Comments
 (0)