sigs.k8s.io/cluster-api-provider-aws@v1.5.5/test/e2e/data/infrastructure-aws/kustomize_sources/gpu/gpu-operator-components.yaml (about) 1 --- 2 # Source: gpu-operator/templates/resources-namespace.yaml 3 apiVersion: v1 4 kind: Namespace 5 metadata: 6 name: gpu-operator-resources 7 labels: 8 app.kubernetes.io/component: "gpu-operator" 9 openshift.io/cluster-monitoring: "true" 10 --- 11 # Source: gpu-operator/charts/node-feature-discovery/templates/clusterrole.yaml 12 apiVersion: rbac.authorization.k8s.io/v1 13 kind: ClusterRole 14 metadata: 15 name: gpu-operator-node-feature-discovery 16 namespace: gpu-operator-resources 17 labels: 18 helm.sh/chart: node-feature-discovery-0.10.1 19 app.kubernetes.io/name: node-feature-discovery 20 app.kubernetes.io/instance: gpu-operator 21 app.kubernetes.io/version: "v0.10.1" 22 app.kubernetes.io/managed-by: Helm 23 rules: 24 - apiGroups: 25 - "" 26 resources: 27 - nodes 28 # when using command line flag --resource-labels to create extended resources 29 # you will need to uncomment "- nodes/status" 30 # - nodes/status 31 verbs: 32 - get 33 - patch 34 - update 35 - list 36 --- 37 # Source: gpu-operator/charts/node-feature-discovery/templates/clusterrolebinding.yaml 38 apiVersion: rbac.authorization.k8s.io/v1 39 kind: ClusterRoleBinding 40 metadata: 41 name: gpu-operator-node-feature-discovery 42 labels: 43 helm.sh/chart: node-feature-discovery-0.8.2 44 app.kubernetes.io/name: node-feature-discovery 45 app.kubernetes.io/instance: gpu-operator 46 app.kubernetes.io/version: "v0.8.2" 47 app.kubernetes.io/managed-by: Helm 48 roleRef: 49 apiGroup: rbac.authorization.k8s.io 50 kind: ClusterRole 51 name: gpu-operator-node-feature-discovery 52 subjects: 53 - kind: ServiceAccount 54 name: node-feature-discovery 55 namespace: gpu-operator-resources 56 --- 57 # Source: gpu-operator/charts/node-feature-discovery/templates/master.yaml 58 apiVersion: apps/v1 59 kind: Deployment 60 metadata: 61 name: gpu-operator-node-feature-discovery-master 62 namespace: gpu-operator-resources 63 labels: 64 helm.sh/chart: node-feature-discovery-0.10.1 65 app.kubernetes.io/name: node-feature-discovery 66 app.kubernetes.io/instance: gpu-operator 67 app.kubernetes.io/version: "v0.10.1" 68 app.kubernetes.io/managed-by: Helm 69 role: master 70 spec: 71 replicas: 1 72 selector: 73 matchLabels: 74 app.kubernetes.io/name: node-feature-discovery 75 app.kubernetes.io/instance: gpu-operator 76 role: master 77 template: 78 metadata: 79 labels: 80 app.kubernetes.io/name: node-feature-discovery 81 app.kubernetes.io/instance: gpu-operator 82 role: master 83 annotations: 84 {} 85 spec: 86 serviceAccountName: node-feature-discovery 87 securityContext: 88 {} 89 containers: 90 - name: master 91 securityContext: 92 allowPrivilegeEscalation: false 93 capabilities: 94 drop: 95 - ALL 96 readOnlyRootFilesystem: true 97 runAsNonRoot: true 98 image: "k8s.gcr.io/nfd/node-feature-discovery:v0.10.1" 99 imagePullPolicy: IfNotPresent 100 ports: 101 - containerPort: 8080 102 name: grpc 103 namespace: gpu-operator-resources 104 env: 105 - name: NODE_NAME 106 valueFrom: 107 fieldRef: 108 fieldPath: spec.nodeName 109 command: 110 - "nfd-master" 111 resources: 112 {} 113 args: 114 - "--extra-label-ns=nvidia.com" 115 affinity: 116 nodeAffinity: 117 preferredDuringSchedulingIgnoredDuringExecution: 118 - preference: 119 matchExpressions: 120 - key: node-role.kubernetes.io/master 121 operator: In 122 values: 123 - "" 124 weight: 1 125 tolerations: 126 - effect: NoSchedule 127 key: node-role.kubernetes.io/master 128 operator: Equal 129 value: "" 130 --- 131 # Source: gpu-operator/charts/node-feature-discovery/templates/nfd-worker-conf.yaml 132 apiVersion: v1 133 kind: ConfigMap 134 metadata: 135 name: nfd-worker-conf 136 namespace: gpu-operator-resources 137 labels: 138 helm.sh/chart: node-feature-discovery-0.10.1 139 app.kubernetes.io/name: node-feature-discovery 140 app.kubernetes.io/instance: gpu-operator 141 app.kubernetes.io/version: "v0.10.1" 142 app.kubernetes.io/managed-by: Helm 143 data: 144 nfd-worker.conf: |- 145 sources: 146 pci: 147 deviceClassWhitelist: 148 - "02" 149 - "0200" 150 - "0207" 151 - "0300" 152 - "0302" 153 deviceLabelFields: 154 - vendor 155 --- 156 # Source: gpu-operator/charts/node-feature-discovery/templates/service.yaml 157 apiVersion: v1 158 kind: Service 159 metadata: 160 name: gpu-operator-node-feature-discovery-master 161 namespace: gpu-operator-resources 162 labels: 163 helm.sh/chart: node-feature-discovery-0.10.1 164 app.kubernetes.io/name: node-feature-discovery 165 app.kubernetes.io/instance: gpu-operator 166 app.kubernetes.io/version: "v0.8.2" 167 app.kubernetes.io/managed-by: Helm 168 role: master 169 spec: 170 type: ClusterIP 171 ports: 172 - port: 8080 173 targetPort: grpc 174 protocol: TCP 175 name: grpc 176 namespace: gpu-operator-resources 177 selector: 178 app.kubernetes.io/name: node-feature-discovery 179 app.kubernetes.io/instance: gpu-operator 180 --- 181 # Source: gpu-operator/charts/node-feature-discovery/templates/serviceaccount.yaml 182 apiVersion: v1 183 kind: ServiceAccount 184 metadata: 185 name: node-feature-discovery 186 namespace: gpu-operator-resources 187 labels: 188 helm.sh/chart: node-feature-discovery-0.10.1 189 app.kubernetes.io/name: node-feature-discovery 190 app.kubernetes.io/instance: gpu-operator 191 app.kubernetes.io/version: "v0.10.1" 192 app.kubernetes.io/managed-by: Helm 193 --- 194 # Source: gpu-operator/charts/node-feature-discovery/templates/worker.yaml 195 apiVersion: apps/v1 196 kind: DaemonSet 197 metadata: 198 name: gpu-operator-node-feature-discovery-worker 199 namespace: gpu-operator-resources 200 labels: 201 helm.sh/chart: node-feature-discovery-0.10.1 202 app.kubernetes.io/name: node-feature-discovery 203 app.kubernetes.io/instance: gpu-operator 204 app.kubernetes.io/version: "v0.10.1" 205 app.kubernetes.io/managed-by: Helm 206 role: worker 207 spec: 208 selector: 209 matchLabels: 210 app.kubernetes.io/name: node-feature-discovery 211 app.kubernetes.io/instance: gpu-operator 212 role: worker 213 template: 214 metadata: 215 labels: 216 app.kubernetes.io/name: node-feature-discovery 217 app.kubernetes.io/instance: gpu-operator 218 role: worker 219 annotations: 220 {} 221 spec: 222 dnsPolicy: ClusterFirstWithHostNet 223 securityContext: 224 {} 225 containers: 226 - name: worker 227 securityContext: 228 allowPrivilegeEscalation: false 229 capabilities: 230 drop: 231 - ALL 232 readOnlyRootFilesystem: true 233 runAsNonRoot: true 234 image: "k8s.gcr.io/nfd/node-feature-discovery:v0.8.2" 235 imagePullPolicy: IfNotPresent 236 env: 237 - name: NODE_NAME 238 valueFrom: 239 fieldRef: 240 fieldPath: spec.nodeName 241 resources: 242 {} 243 command: 244 - "nfd-worker" 245 args: 246 - "--sleep-interval=60s" 247 - "--server=gpu-operator-node-feature-discovery-master:8080" 248 volumeMounts: 249 - name: host-boot 250 mountPath: "/host-boot" 251 readOnly: true 252 - name: host-os-release 253 mountPath: "/host-etc/os-release" 254 readOnly: true 255 - name: host-sys 256 mountPath: "/host-sys" 257 readOnly: true 258 - name: source-d 259 mountPath: "/etc/kubernetes/node-feature-discovery/source.d/" 260 readOnly: true 261 - name: features-d 262 mountPath: "/etc/kubernetes/node-feature-discovery/features.d/" 263 readOnly: true 264 - name: nfd-worker-conf 265 mountPath: "/etc/kubernetes/node-feature-discovery" 266 readOnly: true 267 volumes: 268 - name: host-boot 269 hostPath: 270 path: "/boot" 271 - name: host-os-release 272 hostPath: 273 path: "/etc/os-release" 274 - name: host-sys 275 hostPath: 276 path: "/sys" 277 - name: source-d 278 hostPath: 279 path: "/etc/kubernetes/node-feature-discovery/source.d/" 280 - name: features-d 281 hostPath: 282 path: "/etc/kubernetes/node-feature-discovery/features.d/" 283 - name: nfd-worker-conf 284 configMap: 285 name: nfd-worker-conf 286 namespace: gpu-operator-resources 287 items: 288 - key: nfd-worker.conf 289 path: nfd-worker.conf 290 tolerations: 291 - effect: NoSchedule 292 key: node-role.kubernetes.io/master 293 operator: Equal 294 value: "" 295 - effect: NoSchedule 296 key: nvidia.com/gpu 297 operator: Equal 298 value: present 299 --- 300 # Source: gpu-operator/templates/clusterpolicy.yaml 301 apiVersion: nvidia.com/v1 302 kind: ClusterPolicy 303 metadata: 304 name: cluster-policy 305 namespace: gpu-operator-resources 306 labels: 307 app.kubernetes.io/component: "gpu-operator" 308 309 spec: 310 operator: 311 defaultRuntime: docker 312 runtimeClass: nvidia 313 initContainer: 314 repository: nvcr.io/nvidia 315 image: cuda 316 version: 11.4.2-base-ubi8 317 imagePullPolicy: IfNotPresent 318 daemonsets: 319 tolerations: 320 - effect: NoSchedule 321 key: nvidia.com/gpu 322 operator: Exists 323 priorityClassName: system-node-critical 324 validator: 325 repository: nvcr.io/nvidia/cloud-native 326 image: gpu-operator-validator 327 version: v1.11.1 328 imagePullPolicy: IfNotPresent 329 securityContext: 330 privileged: true 331 seLinuxOptions: 332 level: s0 333 plugin: 334 env: 335 - name: WITH_WORKLOAD 336 value: "false" 337 mig: 338 strategy: single 339 psp: 340 enabled: false 341 driver: 342 enabled: true 343 repository: nvcr.io/nvidia 344 image: driver 345 version: 515.48.07 346 imagePullPolicy: IfNotPresent 347 rdma: 348 enabled: false 349 useHostMofed: false 350 manager: 351 repository: nvcr.io/nvidia/cloud-native 352 image: k8s-driver-manager 353 version: v0.4.1 354 imagePullPolicy: IfNotPresent 355 env: 356 - name: ENABLE_AUTO_DRAIN 357 value: "true" 358 - name: DRAIN_USE_FORCE 359 value: "false" 360 - name: DRAIN_POD_SELECTOR_LABEL 361 value: "" 362 - name: DRAIN_TIMEOUT_SECONDS 363 value: 0s 364 - name: DRAIN_DELETE_EMPTYDIR_DATA 365 value: "false" 366 repoConfig: 367 configMapName: "" 368 certConfig: 369 name: "" 370 licensingConfig: 371 configMapName: "" 372 nlsEnabled: false 373 virtualTopology: 374 config: "" 375 securityContext: 376 privileged: true 377 seLinuxOptions: 378 level: s0 379 toolkit: 380 enabled: true 381 repository: nvcr.io/nvidia/k8s 382 image: container-toolkit 383 version: v1.10.0-ubuntu20.04 384 imagePullPolicy: IfNotPresent 385 securityContext: 386 privileged: true 387 seLinuxOptions: 388 level: s0 389 devicePlugin: 390 repository: nvcr.io/nvidia 391 image: k8s-device-plugin 392 version: v0.12.2-ubi8 393 imagePullPolicy: IfNotPresent 394 securityContext: 395 privileged: true 396 env: 397 - name: PASS_DEVICE_SPECS 398 value: "true" 399 - name: FAIL_ON_INIT_ERROR 400 value: "true" 401 - name: DEVICE_LIST_STRATEGY 402 value: envvar 403 - name: DEVICE_ID_STRATEGY 404 value: uuid 405 - name: NVIDIA_VISIBLE_DEVICES 406 value: all 407 - name: NVIDIA_DRIVER_CAPABILITIES 408 value: all 409 dcgm: 410 enabled: false 411 repository: nvcr.io/nvidia/cloud-native 412 image: dcgm 413 version: 2.4.5-1-ubuntu20.04 414 imagePullPolicy: IfNotPresent 415 hostPort: 5555 416 dcgmExporter: 417 repository: nvcr.io/nvidia/k8s 418 image: dcgm-exporter 419 version: 2.4.5-2.6.7-ubuntu20.04 420 imagePullPolicy: IfNotPresent 421 env: 422 - name: DCGM_EXPORTER_LISTEN 423 value: :9400 424 - name: DCGM_EXPORTER_KUBERNETES 425 value: "true" 426 - name: DCGM_EXPORTER_COLLECTORS 427 value: /etc/dcgm-exporter/dcp-metrics-included.csv 428 gfd: 429 repository: nvcr.io/nvidia 430 image: gpu-feature-discovery 431 version: v0.6.1-ubi8 432 imagePullPolicy: IfNotPresent 433 env: 434 - name: GFD_SLEEP_INTERVAL 435 value: 60s 436 - name: GFD_FAIL_ON_INIT_ERROR 437 value: "true" 438 migManager: 439 enabled: true 440 repository: nvcr.io/nvidia/cloud-native 441 image: k8s-mig-manager 442 version: v0.4.2-ubuntu20.04 443 imagePullPolicy: IfNotPresent 444 securityContext: 445 privileged: true 446 env: 447 - name: WITH_REBOOT 448 value: "false" 449 config: 450 name: "" 451 gpuClientsConfig: 452 name: "" 453 nodeStatusExporter: 454 enabled: false 455 repository: nvcr.io/nvidia/cloud-native 456 image: gpu-operator-validator 457 version: v1.11.1 458 imagePullPolicy: IfNotPresent 459 --- 460 # Source: gpu-operator/templates/operator.yaml 461 apiVersion: apps/v1 462 kind: Deployment 463 metadata: 464 name: gpu-operator 465 namespace: gpu-operator-resources 466 labels: 467 app.kubernetes.io/component: "gpu-operator" 468 469 spec: 470 replicas: 1 471 selector: 472 matchLabels: 473 474 app.kubernetes.io/component: "gpu-operator" 475 app: "gpu-operator" 476 template: 477 metadata: 478 labels: 479 480 app.kubernetes.io/component: "gpu-operator" 481 app: "gpu-operator" 482 annotations: 483 openshift.io/scc: restricted-readonly 484 spec: 485 serviceAccountName: gpu-operator 486 priorityClassName: system-node-critical 487 containers: 488 - name: gpu-operator 489 image: nvcr.io/nvidia/gpu-operator:v1.11.1 490 imagePullPolicy: IfNotPresent 491 command: ["gpu-operator"] 492 args: 493 - --leader-elect 494 env: 495 - name: WATCH_NAMESPACE 496 value: "" 497 - name: OPERATOR_NAMESPACE 498 valueFrom: 499 fieldRef: 500 fieldPath: metadata.namespace 501 volumeMounts: 502 - name: host-os-release 503 mountPath: "/host-etc/os-release" 504 readOnly: true 505 livenessProbe: 506 httpGet: 507 path: /healthz 508 port: 8081 509 initialDelaySeconds: 15 510 periodSeconds: 20 511 readinessProbe: 512 httpGet: 513 path: /readyz 514 port: 8081 515 initialDelaySeconds: 5 516 periodSeconds: 10 517 resources: 518 limits: 519 cpu: 500m 520 memory: 350Mi 521 requests: 522 cpu: 200m 523 memory: 100Mi 524 ports: 525 - name: metrics 526 containerPort: 8080 527 volumes: 528 - name: host-os-release 529 hostPath: 530 path: "/etc/os-release" 531 affinity: 532 nodeAffinity: 533 preferredDuringSchedulingIgnoredDuringExecution: 534 - preference: 535 matchExpressions: 536 - key: node-role.kubernetes.io/master 537 operator: In 538 values: 539 - "" 540 weight: 1 541 tolerations: 542 - effect: NoSchedule 543 key: node-role.kubernetes.io/master 544 operator: Equal 545 value: "" 546 --- 547 # Source: gpu-operator/templates/role.yaml 548 apiVersion: rbac.authorization.k8s.io/v1 549 kind: ClusterRole 550 metadata: 551 creationTimestamp: null 552 name: gpu-operator 553 namespace: gpu-operator-resources 554 labels: 555 app.kubernetes.io/component: "gpu-operator" 556 557 rules: 558 - apiGroups: 559 - config.openshift.io 560 resources: 561 - proxies 562 verbs: 563 - get 564 - apiGroups: 565 - rbac.authorization.k8s.io 566 resources: 567 - roles 568 - rolebindings 569 - clusterroles 570 - clusterrolebindings 571 verbs: 572 - '*' 573 - apiGroups: 574 - "" 575 resources: 576 - pods 577 - services 578 - endpoints 579 - persistentvolumeclaims 580 - events 581 - configmaps 582 - secrets 583 - serviceaccounts 584 - nodes 585 verbs: 586 - '*' 587 - apiGroups: 588 - "" 589 resources: 590 - namespaces 591 verbs: 592 - get 593 - list 594 - create 595 - watch 596 - update 597 - apiGroups: 598 - apps 599 resources: 600 - deployments 601 - daemonsets 602 - replicasets 603 - statefulsets 604 verbs: 605 - '*' 606 - apiGroups: 607 - monitoring.coreos.com 608 resources: 609 - servicemonitors 610 - prometheusrules 611 verbs: 612 - get 613 - list 614 - create 615 - watch 616 - update 617 - apiGroups: 618 - nvidia.com 619 resources: 620 - '*' 621 verbs: 622 - '*' 623 - apiGroups: 624 - scheduling.k8s.io 625 resources: 626 - priorityclasses 627 verbs: 628 - get 629 - list 630 - watch 631 - create 632 - apiGroups: 633 - security.openshift.io 634 resources: 635 - securitycontextconstraints 636 verbs: 637 - '*' 638 - apiGroups: 639 - policy 640 resources: 641 - podsecuritypolicies 642 verbs: 643 - use 644 resourceNames: 645 - gpu-operator-restricted 646 - apiGroups: 647 - policy 648 resources: 649 - podsecuritypolicies 650 verbs: 651 - create 652 - get 653 - update 654 - list 655 - apiGroups: 656 - config.openshift.io 657 resources: 658 - clusterversions 659 verbs: 660 - get 661 - list 662 - watch 663 - apiGroups: 664 - "" 665 - coordination.k8s.io 666 resources: 667 - configmaps 668 - leases 669 verbs: 670 - get 671 - list 672 - watch 673 - create 674 - update 675 - patch 676 - delete 677 - apiGroups: 678 - node.k8s.io 679 resources: 680 - runtimeclasses 681 verbs: 682 - get 683 - list 684 - create 685 - update 686 - watch 687 - apiGroups: 688 - image.openshift.io 689 resources: 690 - imagestreams 691 verbs: 692 - get 693 - list 694 - watch 695 --- 696 # Source: gpu-operator/templates/rolebinding.yaml 697 kind: ClusterRoleBinding 698 apiVersion: rbac.authorization.k8s.io/v1 699 metadata: 700 name: gpu-operator 701 labels: 702 app.kubernetes.io/component: "gpu-operator" 703 704 subjects: 705 - kind: ServiceAccount 706 name: gpu-operator 707 namespace: gpu-operator-resources 708 - kind: ServiceAccount 709 name: node-feature-discovery 710 namespace: gpu-operator-resources 711 roleRef: 712 kind: ClusterRole 713 name: gpu-operator 714 apiGroup: rbac.authorization.k8s.io 715 --- 716 # Source: gpu-operator/templates/serviceaccount.yaml 717 apiVersion: v1 718 kind: ServiceAccount 719 metadata: 720 name: gpu-operator 721 namespace: gpu-operator-resources 722 labels: 723 app.kubernetes.io/component: "gpu-operator"