diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/defaults/main.yaml b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/defaults/main.yaml new file mode 100644 index 00000000000..75ebab9bda6 --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/defaults/main.yaml @@ -0,0 +1,12 @@ +--- +ocp_username: system:admin +become_override: false +silent: false + +# ODH operator parameters +ocp4_workload_oai_parasol_insurance_repo_url: https://github.com/rh-aiservices-bu/parasol-insurance.git +ocp4_workload_oai_parasol_insurance_branch: main +ocp4_workload_oai_parasol_insurance_yaml_file_path: bootstrap/applicationset/applicationset-bootstrap.yaml + +# Workload destroy as var +ocp4_workload_oai_parasol_insurance_workload_destroy: "{{ False if (ACTION=='create' or ACTION=='provision') else True }}" diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/meta/main.yaml b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/meta/main.yaml new file mode 100644 index 00000000000..234c8623fae --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/meta/main.yaml @@ -0,0 +1,12 @@ +--- +galaxy_info: + role_name: ocp4_workload_oai_parasol_insurance + author: + - Guillaume Moutier + description: OpenShift AI Lab Insurance Claim workload + company: Red Hat + license: BSD + min_ansible_version: 2.9 + galaxy_tags: [] + +dependencies: [] diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/readme.adoc b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/readme.adoc new file mode 100644 index 00000000000..d9639ca2ba9 --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/readme.adoc @@ -0,0 +1,69 @@ += ocp4_workload_oai_parasol_insurance - Configuration to setup OpenShift AI Parasol Insurance Lab workload on OpenShift Cluster. + +== Role overview + +* This role deploy's the OpenShift AI Insurance Claim Lab workload. It uses GitOps to deploy some applications used in the Lab. + +* OpenShift GitOps must be enabled on the cluster for this role to work. + +ocp4_workload_gitea_operator & ocp4_workload_rhods_olm role should be setup before this role. + +This role consists of the following tasks files: +** Tasks: link:./tasks/pre_workload.yml[pre_workload.yml] - Sets up an + environment for the workload deployment. +*** Debug task will print out: `pre_workload Tasks completed successfully.` + +** Tasks: link:./tasks/workload.yml[workload.yml] - Used to configure the cluster +*** Debug task will print out: `workload Tasks completed successfully.` + +** Tasks: link:./tasks/post_workload.yml[post_workload.yml] - Used to + configure the workload after deployment +*** This role doesn't do anything here +*** Debug task will print out: `post_workload Tasks completed successfully.` + +** Tasks: link:./tasks/remove_workload.yml[remove_workload.yml] - Used to + delete the workload +*** Debug task will print out: `remove_workload Tasks completed successfully.` + +== Review the defaults variable file + +* This file link:./defaults/main.yml[./defaults/main.yml] contains all the variables you need to define to control the deployment of your workload. +* The variable *ocp_username* is mandatory to assign the workload to the correct OpenShift user. +* A variable *silent=True* can be passed to suppress debug messages. + +=== Deploy a Workload with the `ocp-workload` playbook [Mostly for testing] + +---- +TARGET_HOST="bastion.shared-na46.openshift.opentlc.com" +OCP_USERNAME="rshah-redhat.com" +WORKLOAD="ocp4_workload_object_detection_ai_ml_setup" +GUID=1001 + +# a TARGET_HOST is specified in the command line, without using an inventory file +ansible-playbook -i ${TARGET_HOST}, ./configs/ocp-workloads/ocp-workload.yml \ + -e"ansible_ssh_private_key_file=~/.ssh/keytoyourhost.pem" \ + -e"ansible_user=ec2-user" \ + -e"ocp_username=${OCP_USERNAME}" \ + -e"ocp_workload=${WORKLOAD}" \ + -e"silent=False" \ + -e"guid=${GUID}" \ + -e"ACTION=create" +---- + +=== To Delete an environment + +---- +TARGET_HOST="bastion.shared-na46.openshift.opentlc.com" +OCP_USERNAME="rshah-redhat.com" +WORKLOAD="ocp4_workload_object_detection_ai_ml_setup" +GUID=1002 + +# a TARGET_HOST is specified in the command line, without using an inventory file +ansible-playbook -i ${TARGET_HOST}, ./configs/ocp-workloads/ocp-workload.yml \ + -e"ansible_ssh_private_key_file=~/.ssh/keytoyourhost.pem" \ + -e"ansible_user=ec2-user" \ + -e"ocp_username=${OCP_USERNAME}" \ + -e"ocp_workload=${WORKLOAD}" \ + -e"guid=${GUID}" \ + -e"ACTION=remove" +---- diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/main.yaml b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/main.yaml new file mode 100644 index 00000000000..659c5b83e7f --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/main.yaml @@ -0,0 +1,29 @@ +--- +- name: Run workload + block: + - name: Running Pre Workload Tasks + include_tasks: + file: pre_workload.yaml + apply: + become: "{{ become_override | bool }}" + + - name: Running Workload Tasks + include_tasks: + file: workload.yaml + apply: + become: "{{ become_override | bool }}" + when: ACTION == "create" or ACTION == "provision" + + - name: Running Post Workload Tasks + include_tasks: + file: post_workload.yaml + apply: + become: "{{ become_override | bool }}" + when: ACTION == "create" or ACTION == "provision" + + - name: Running Workload removal Tasks + include_tasks: + file: remove_workload.yaml + apply: + become: "{{ become_override | bool }}" + when: ACTION == "destroy" or ACTION == "remove" diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/post_workload.yaml b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/post_workload.yaml new file mode 100644 index 00000000000..9a0acb0195f --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/post_workload.yaml @@ -0,0 +1,30 @@ +--- +# Implement your Post Workload deployment tasks here +# -------------------------------------------------- + +# +# +# + +# Leave these as the last tasks in the playbook +# --------------------------------------------- + +# For deployment onto a dedicated cluster (as part of the +# cluster deployment) set workload_shared_deployment to False +# This is the default so it does not have to be set explicitely +- name: post_workload tasks complete + debug: + msg: "Post-Workload tasks completed successfully." + when: + - not silent|bool + - not workload_shared_deployment|default(False) + +# For RHPDS deployment (onto a shared cluster) set +# workload_shared_deployment to True +# (in the deploy script or AgnosticV configuration) +- name: post_workload tasks complete + debug: + msg: "Post-Software checks completed successfully" + when: + - not silent|bool + - workload_shared_deployment|default(False) diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/pre_workload.yaml b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/pre_workload.yaml new file mode 100644 index 00000000000..a70be0704c9 --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/pre_workload.yaml @@ -0,0 +1,36 @@ +--- +# Implement your Pre Workload deployment tasks here +# ------------------------------------------------- + +- name: Set state + set_fact: + state: present + when: not ocp4_workload_oai_parasol_insurance_workload_destroy|bool + +- name: Set state + set_fact: + state: absent + when: ocp4_workload_oai_parasol_insurance_workload_destroy|bool + +# Leave these as the last tasks in the playbook +# --------------------------------------------- + +# For deployment onto a dedicated cluster (as part of the +# cluster deployment) set workload_shared_deployment to False +# This is the default so it does not have to be set explicitely +- name: pre_workload tasks complete + debug: + msg: "Pre-Workload tasks completed successfully." + when: + - not silent|bool + - not workload_shared_deployment|default(False) + +# For RHPDS deployment (onto a shared cluster) set +# workload_shared_deployment to True +# (in the deploy script or AgnosticV configuration) +- name: pre_workload tasks complete + debug: + msg: "Pre-Software checks completed successfully" + when: + - not silent|bool + - workload_shared_deployment|default(False) diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/remove_workload.yaml b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/remove_workload.yaml new file mode 100644 index 00000000000..45db1e8710b --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/remove_workload.yaml @@ -0,0 +1,16 @@ +--- +# Implement your Workload removal tasks here +- name: "Removing workload" + include_tasks: "{{ item }}" + with_items: + - "./pre_workload.yaml" + - "./workload.yaml" + - "./post_workload.yaml" + vars: + ocp4_workload_oai_parasol_insurance_workload_destroy: true + +# Leave this as the last task in the playbook. +- name: remove_workload tasks complete + debug: + msg: "Remove Workload tasks completed successfully." + when: not silent|bool diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/workload.yaml b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/workload.yaml new file mode 100644 index 00000000000..873807fc092 --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/tasks/workload.yaml @@ -0,0 +1,91 @@ +--- +# Patch ArgoCD for ApplicationSet rolling updates and InferenceService health check +- name: Get ArgoCD CR if it exists + k8s_info: + api_version: argoproj.io/v1beta1 + kind: ArgoCD + namespace: openshift-gitops + name: openshift-gitops + register: argocd_info + ignore_errors: true + +- name: If ArgoCD present, patch it for ApplicationSet rolling updates + command: >- + oc patch argocd/openshift-gitops -n openshift-gitops --type merge -p='{{ lookup('template', 'rolling_updates.json') | string }}' + when: argocd_info.resources | length > 0 + +- name: Set default value for health_checks_exist + set_fact: + health_checks_exist: false + +- name: Check if resourceHealthChecks exists in the spec + set_fact: + health_checks_exist: "{{ argocd_info.resources[0].spec.resourceHealthChecks is defined }}" + when: argocd_info.resources | length > 0 + +- name: Apply patch to append to resourceHealthChecks if it already exists + command: >- + oc patch argocd/openshift-gitops -n openshift-gitops --type='json' + -p='[{"op": "add", "path": "/spec/resourceHealthChecks/-", "value": {{ lookup('template', 'is_health_check.json') | string }} }]' + when: health_checks_exist and argocd_info.resources | length > 0 + +- name: Apply patch to create resourceHealthChecks if it does not exist + command: >- + oc patch argocd/openshift-gitops -n openshift-gitops --type='merge' + -p='{"spec": {"resourceHealthChecks": [ {{ lookup('template', 'is_health_check.json') | string }} ]}}' + when: not health_checks_exist and argocd_info.resources | length > 0 + +# Patch ArgoCD to allow OpenShift login through restarts +- name: Patch ArgoCD to allow OpenShift login through restarts + when: argocd_info.resources | length > 0 + ignore_errors: true + block: + - name: Get the OpenShift Gitops ArgoCD server route + kubernetes.core.k8s_info: + api_version: route.openshift.io/v1 + kind: Route + name: openshift-gitops-server + namespace: openshift-gitops + register: r_openshift_gitops_server_route + + - name: Patch the ArgoCD server host + command: >- + oc patch argocd/openshift-gitops -n openshift-gitops --type='merge' + -p '{"spec": {"server": {"host": "{{ r_openshift_gitops_server_route.resources[0].spec.host }}"}}}' + +# Restart the DevWorkspace controller that gets sometimes stuck and blocks ArgoCD +- name: Get Pods in openshift-operators namespace + k8s_info: + api_version: v1 + kind: Pod + namespace: openshift-operators + register: pod_info + +- name: Filter Pods that start with "devworkspace-controller-manager" + set_fact: + devworkspace_pods: "{{ pod_info.resources | selectattr('metadata.name', 'match', '^devworkspace-controller-manager') | list }}" + +- name: Restart the Pod + command: >- + oc delete pod {{ item.metadata.name }} -n openshift-operators + loop: "{{ devworkspace_pods }}" + when: devworkspace_pods | length > 0 + ignore_errors: true + +# Clone the Lab repo and apply the ApplicationSet YAML file +- name: Clone the Lab repo + git: + repo: "{{ ocp4_workload_oai_parasol_insurance_repo_url }}" + dest: "/tmp/ocp4_workload_oai_parasol_insurance_repo" + version: "{{ ocp4_workload_oai_parasol_insurance_branch }}" + register: git_clone + +- name: Apply the ApplicationSet YAML file + command: "oc apply -f /tmp/ocp4_workload_oai_parasol_insurance_repo/{{ ocp4_workload_oai_parasol_insurance_yaml_file_path }}" + when: git_clone.changed + +# Leave this as the last task in the playbook. +- name: workload tasks complete + debug: + msg: "Workload Tasks completed successfully." + when: not silent|bool diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/templates/is_health_check.json b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/templates/is_health_check.json new file mode 100644 index 00000000000..e365834a8f4 --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/templates/is_health_check.json @@ -0,0 +1,5 @@ +{ + "group": "serving.kserve.io", + "kind": "InferenceService", + "check": "local health_status = {}\n\nhealth_status.status = \"Progressing\"\nhealth_status.message = \"Waiting for InferenceService to report status...\"\n\nif obj.status ~= nil then\n\n local progressing = false\n local degraded = false\n local status_false = 0\n local status_unknown = 0\n local msg = \"\"\n\n if obj.status.modelStatus ~= nil then\n if obj.status.modelStatus.transitionStatus ~= \"UpToDate\" then\n if obj.status.modelStatus.transitionStatus == \"InProgress\" then\n progressing = true\n else\n degraded = true\n end\n msg = msg .. \"0: transitionStatus | \" .. obj.status.modelStatus.transitionStatus .. \"\\n\"\n end\n end\n\n if obj.status.conditions ~= nil then\n for i, condition in pairs(obj.status.conditions) do\n\n if condition.status == \"Unknown\" then\n status_unknown = status_unknown + 1\n elseif condition.status == \"False\" then\n status_false = status_false + 1\n end\n\n if condition.status ~= \"True\" then\n msg = msg .. i .. \": \" .. condition.type .. \" | \" .. condition.status\n if condition.reason ~= nil and condition.reason ~= \"\" then\n msg = msg .. \" | \" .. condition.reason\n end\n if condition.message ~= nil and condition.message ~= \"\" then\n msg = msg .. \" | \" .. condition.message\n end\n msg = msg .. \"\\n\"\n end\n\n end\n\n if progressing == false and degraded == false and status_unknown == 0 and status_false == 0 then\n health_status.status = \"Healthy\"\n msg = \"InferenceService is healthy.\"\n elseif degraded == false and status_unknown > 0 then\n health_status.status = \"Progressing\"\n else\n health_status.status = \"Degraded\"\n end\n\n health_status.message = msg\n end\nend\n\nreturn health_status" +} \ No newline at end of file diff --git a/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/templates/rolling_updates.json b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/templates/rolling_updates.json new file mode 100644 index 00000000000..8f449660e91 --- /dev/null +++ b/ansible/roles_ocp_workloads/ocp4_workload_oai_parasol_insurance/templates/rolling_updates.json @@ -0,0 +1,10 @@ +{ + "spec": { + "applicationSet": { + "enabled": true, + "extraCommandArgs": [ + "--enable-progressive-syncs" + ] + } + } +} \ No newline at end of file