Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ nvidia_gpu_operator_automatic_install_plan_approval: true
nvidia_gpu_operator_starting_csv: gpu-operator-certified.v23.6.0
nvidia_gpu_operator_wait_for_deploy: true
nvidia_gpu_operator_use_catalog_snapshot: false
nvidia_gpu_operator_catalogsource_name: ""
nvidia_gpu_operator_catalog_snapshot_image: ""
nvidia_gpu_operator_catalog_snapshot_image_tag: ""
nvidia_gpu_operator_catalogsource_name: "redhat-certified-nv-snap"
nvidia_gpu_operator_catalog_snapshot_image: "quay.io/gpte-devops-automation/olm_snapshot_certified_catalog"
nvidia_gpu_operator_catalog_snapshot_image_tag: "v4.20_2026_02_11"

ocp4_workload_nvidia_gpu_setup_create_dashboard: false
ocp4_workload_nvidia_gpu_setup_dcgm_exporter_dashboard_url: >
https://github.com/NVIDIA/dcgm-exporter/raw/main/grafana/dcgm-exporter-dashboard.json
https://github.com/NVIDIA/dcgm-exporter/raw/main/grafana/dcgm-exporter-dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@
kind: Namespace
name: "{{ nvidia_gpu_operator_namespace }}"

- name: Create catalog source snapshot
when: nvidia_gpu_operator_use_catalog_snapshot | default(false) | bool
kubernetes.core.k8s:
state: present
definition: "{{ lookup('template', 'certified-operators-index.yaml.j2') | from_yaml }}"
register: snapshot_result
retries: 30
delay: 5

- name: Create NVIDIA GPU operatorgroup
kubernetes.core.k8s:
state: present
Expand All @@ -22,6 +31,35 @@
retries: 40
delay: 6

- name: wait for the status of the subscription to not be empty
when: nvidia_gpu_operator_automatic_install_plan_approval | bool == false
k8s_info:
api_version: operators.coreos.com/v1alpha1
kind: Subscription
name: gpu-operator-certified
namespace: nvidia-gpu-operator
register: nv_subscription_out
until:
- nv_subscription_out is defined
- nv_subscription_out.resources is defined
- nv_subscription_out.resources[0] is defined
- nv_subscription_out.resources[0].status is defined
- nv_subscription_out.resources[0].status.installplan is defined
retries: 30
delay: 20

- name: patch the installplan to approve it
when: nvidia_gpu_operator_automatic_install_plan_approval | bool == false
k8s:
definition:
apiVersion: operators.coreos.com/v1alpha1
kind: InstallPlan
metadata:
namespace: nvidia-gpu-operator
name: "{{ nv_subscription_out.resources[0].status.installplan.name }}"
spec:
approved: true

- name: 120 second pause for NVIDIA GPU operator setup
pause:
seconds: 180
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: operators.coreos.com/v1alpha1
kind: CatalogSource
metadata:
name: "{{ nvidia_gpu_operator_catalogsource_name }}"
namespace: openshift-marketplace
spec:
displayName: "Certified Operators Index {{ nvidia_gpu_operator_catalog_snapshot_image_tag }}"
image: "{{ nvidia_gpu_operator_catalog_snapshot_image }}:{{ nvidia_gpu_operator_catalog_snapshot_image_tag }}"
publisher: redhat-cop-agnosticd
sourceType: grpc
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ metadata:
namespace: nvidia-gpu-operator
spec:
channel: '{{ nvidia_gpu_operator_channel }}'
installPlanApproval: Automatic
installPlanApproval: "{{ ( nvidia_gpu_operator_automatic_install_plan_approval | default(true) | bool ) | ternary( 'Automatic', 'Manual') }}"
name: gpu-operator-certified
source: certified-operators
source: "{{ ( nvidia_gpu_operator_use_catalog_snapshot | default(false) | bool ) | ternary( nvidia_gpu_operator_catalogsource_name, 'certified-operators') }}"
sourceNamespace: openshift-marketplace
startingCSV: '{{ nvidia_gpu_operator_starting_csv }}'