diff --git a/README.md b/README.md index d9bb63b6f8b..4e49d3ceb90 100644 --- a/README.md +++ b/README.md @@ -412,7 +412,7 @@ Example commands to run test suite for the dashboard `component` only, with the make run-nowebhook ``` ```shell -make e2e-test -e OPERATOR_NAMESPACE= -e E2E_TEST_FLAGS="--test-operator-controller=false --test-webhook=false --test-component=dashboard" +make e2e-test -e OPERATOR_NAMESPACE= -e E2E_TEST_FLAGS="--test-operator-controller=false --test-webhook=false --test-component=dashboard,modelmesh" ``` diff --git a/apis/components/v1/kserve_types.go b/apis/components/v1/kserve_types.go index cc5a842201a..aba99f23473 100644 --- a/apis/components/v1/kserve_types.go +++ b/apis/components/v1/kserve_types.go @@ -21,6 +21,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + ModelMeshCtrlerComponentName = "odh-model-controller" // shared by kserve and mm +) + // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. diff --git a/apis/components/v1/modelmeshserving_types.go b/apis/components/v1/modelmeshserving_types.go index f945869bfc4..79b16985616 100644 --- a/apis/components/v1/modelmeshserving_types.go +++ b/apis/components/v1/modelmeshserving_types.go @@ -21,26 +21,21 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! -// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. - -// ModelMeshServingSpec defines the desired state of ModelMeshServing -type ModelMeshServingSpec struct { - // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster - // Important: Run "make" to regenerate code after modifying this file - - // Foo is an example field of ModelMeshServing. Edit modelmeshserving_types.go to remove/update - Foo string `json:"foo,omitempty"` -} +const ( + ModelMeshServingComponentName = "model-mesh" + // value should match whats set in the XValidation below + ModelMeshServingInstanceName = "default-modelmesh" + ModelMeshServingKind = "ModelMeshServing" +) -// ModelMeshServingStatus defines the observed state of ModelMeshServing -type ModelMeshServingStatus struct { - components.Status `json:",inline"` -} +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster +// +kubebuilder:validation:XValidation:rule="self.metadata.name == 'default-modelmesh'",message="ModelMeshServing name must be default-modelmesh" +// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status`,description="Ready" +// +kubebuilder:printcolumn:name="Reason",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].reason`,description="Reason" // ModelMeshServing is the Schema for the modelmeshservings API type ModelMeshServing struct { @@ -51,16 +46,21 @@ type ModelMeshServing struct { Status ModelMeshServingStatus `json:"status,omitempty"` } -func (c *ModelMeshServing) GetDevFlags() *components.DevFlags { - return nil +// ModelMeshServingSpec defines the desired state of ModelMeshServing +type ModelMeshServingSpec struct { + ModelMeshServingCommonSpec `json:",inline"` } -func (c *ModelMeshServing) GetStatus() *components.Status { - return &c.Status.Status +type ModelMeshServingCommonSpec struct { + components.DevFlagsSpec `json:",inline"` } -// +kubebuilder:object:root=true +// ModelMeshServingStatus defines the observed state of ModelMeshServing +type ModelMeshServingStatus struct { + components.Status `json:",inline"` +} +// +kubebuilder:object:root=true // ModelMeshServingList contains a list of ModelMeshServing type ModelMeshServingList struct { metav1.TypeMeta `json:",inline"` @@ -71,3 +71,17 @@ type ModelMeshServingList struct { func init() { SchemeBuilder.Register(&ModelMeshServing{}, &ModelMeshServingList{}) } + +func (c *ModelMeshServing) GetDevFlags() *components.DevFlags { + return c.Spec.DevFlags +} +func (c *ModelMeshServing) GetStatus() *components.Status { + return &c.Status.Status +} + +// DSCModelMeshServing contains all the configuration exposed in DSC instance for ModelMeshServing component +type DSCModelMeshServing struct { + components.ManagementSpec `json:",inline"` + // configuration fields common across components + ModelMeshServingCommonSpec `json:",inline"` +} diff --git a/apis/components/v1/zz_generated.deepcopy.go b/apis/components/v1/zz_generated.deepcopy.go index d357c5076e6..8b08710145f 100644 --- a/apis/components/v1/zz_generated.deepcopy.go +++ b/apis/components/v1/zz_generated.deepcopy.go @@ -131,6 +131,23 @@ func (in *DSCDashboard) DeepCopy() *DSCDashboard { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DSCModelMeshServing) DeepCopyInto(out *DSCModelMeshServing) { + *out = *in + out.ManagementSpec = in.ManagementSpec + in.ModelMeshServingCommonSpec.DeepCopyInto(&out.ModelMeshServingCommonSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DSCModelMeshServing. +func (in *DSCModelMeshServing) DeepCopy() *DSCModelMeshServing { + if in == nil { + return nil + } + out := new(DSCModelMeshServing) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DSCModelRegistry) DeepCopyInto(out *DSCModelRegistry) { *out = *in @@ -562,7 +579,7 @@ func (in *ModelMeshServing) DeepCopyInto(out *ModelMeshServing) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) in.Status.DeepCopyInto(&out.Status) } @@ -584,6 +601,22 @@ func (in *ModelMeshServing) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelMeshServingCommonSpec) DeepCopyInto(out *ModelMeshServingCommonSpec) { + *out = *in + in.DevFlagsSpec.DeepCopyInto(&out.DevFlagsSpec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelMeshServingCommonSpec. +func (in *ModelMeshServingCommonSpec) DeepCopy() *ModelMeshServingCommonSpec { + if in == nil { + return nil + } + out := new(ModelMeshServingCommonSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ModelMeshServingList) DeepCopyInto(out *ModelMeshServingList) { *out = *in @@ -619,6 +652,7 @@ func (in *ModelMeshServingList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ModelMeshServingSpec) DeepCopyInto(out *ModelMeshServingSpec) { *out = *in + in.ModelMeshServingCommonSpec.DeepCopyInto(&out.ModelMeshServingCommonSpec) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelMeshServingSpec. diff --git a/apis/datasciencecluster/v1/datasciencecluster_types.go b/apis/datasciencecluster/v1/datasciencecluster_types.go index cd473164f24..841f7a5ad44 100644 --- a/apis/datasciencecluster/v1/datasciencecluster_types.go +++ b/apis/datasciencecluster/v1/datasciencecluster_types.go @@ -30,7 +30,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" - "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -52,8 +51,7 @@ type Components struct { Workbenches workbenches.Workbenches `json:"workbenches,omitempty"` // ModelMeshServing component configuration. - // Does not support enabled Kserve at the same time - ModelMeshServing modelmeshserving.ModelMeshServing `json:"modelmeshserving,omitempty"` + ModelMeshServing componentsv1.DSCModelMeshServing `json:"modelmeshserving,omitempty"` // DataServicePipeline component configuration. // Require OpenShift Pipelines Operator to be installed before enable component diff --git a/components/modelmeshserving/modelmeshserving.go b/components/modelmeshserving/modelmeshserving.go deleted file mode 100644 index cb1d07b7838..00000000000 --- a/components/modelmeshserving/modelmeshserving.go +++ /dev/null @@ -1,179 +0,0 @@ -// Package modelmeshserving provides utility functions to config MoModelMesh, a general-purpose model serving management/routing layer -// +groupName=datasciencecluster.opendatahub.io -package modelmeshserving - -import ( - "context" - "fmt" - "path/filepath" - "strings" - - "github.com/go-logr/logr" - operatorv1 "github.com/openshift/api/operator/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - logf "sigs.k8s.io/controller-runtime/pkg/log" - - dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" - "github.com/opendatahub-io/opendatahub-operator/v2/components" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" - "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" -) - -var ( - ComponentName = "model-mesh" - Path = deploy.DefaultManifestPath + "/" + ComponentName + "/overlays/odh" - DependentComponentName = "odh-model-controller" - DependentPath = deploy.DefaultManifestPath + "/" + DependentComponentName + "/base" -) - -// Verifies that Dashboard implements ComponentInterface. -var _ components.ComponentInterface = (*ModelMeshServing)(nil) - -// ModelMeshServing struct holds the configuration for the ModelMeshServing component. -// +kubebuilder:object:generate=true -type ModelMeshServing struct { - components.Component `json:""` -} - -func (m *ModelMeshServing) Init(ctx context.Context, _ cluster.Platform) error { - log := logf.FromContext(ctx).WithName(ComponentName) - - var imageParamMap = map[string]string{ - "odh-mm-rest-proxy": "RELATED_IMAGE_ODH_MM_REST_PROXY_IMAGE", - "odh-modelmesh-runtime-adapter": "RELATED_IMAGE_ODH_MODELMESH_RUNTIME_ADAPTER_IMAGE", - "odh-modelmesh": "RELATED_IMAGE_ODH_MODELMESH_IMAGE", - "odh-modelmesh-controller": "RELATED_IMAGE_ODH_MODELMESH_CONTROLLER_IMAGE", - "odh-model-controller": "RELATED_IMAGE_ODH_MODEL_CONTROLLER_IMAGE", - } - - // odh-model-controller to use - var dependentImageParamMap = map[string]string{ - "odh-model-controller": "RELATED_IMAGE_ODH_MODEL_CONTROLLER_IMAGE", - } - - // Update image parameters - if err := deploy.ApplyParams(Path, imageParamMap); err != nil { - log.Error(err, "failed to update image", "path", Path) - } - - // Update image parameters for odh-model-controller - if err := deploy.ApplyParams(DependentPath, dependentImageParamMap); err != nil { - log.Error(err, "failed to update image", "path", DependentPath) - } - - return nil -} - -func (m *ModelMeshServing) OverrideManifests(ctx context.Context, _ cluster.Platform) error { - // Go through each manifest and set the overlays if defined - for _, subcomponent := range m.DevFlags.Manifests { - if strings.Contains(subcomponent.URI, DependentComponentName) { - // Download subcomponent - if err := deploy.DownloadManifests(ctx, DependentComponentName, subcomponent); err != nil { - return err - } - // If overlay is defined, update paths - defaultKustomizePath := "base" - if subcomponent.SourcePath != "" { - defaultKustomizePath = subcomponent.SourcePath - } - DependentPath = filepath.Join(deploy.DefaultManifestPath, DependentComponentName, defaultKustomizePath) - } - - if strings.Contains(subcomponent.URI, ComponentName) { - // Download subcomponent - if err := deploy.DownloadManifests(ctx, ComponentName, subcomponent); err != nil { - return err - } - // If overlay is defined, update paths - defaultKustomizePath := "overlays/odh" - if subcomponent.SourcePath != "" { - defaultKustomizePath = subcomponent.SourcePath - } - Path = filepath.Join(deploy.DefaultManifestPath, ComponentName, defaultKustomizePath) - } - } - return nil -} - -func (m *ModelMeshServing) GetComponentName() string { - return ComponentName -} - -func (m *ModelMeshServing) ReconcileComponent(ctx context.Context, - cli client.Client, - l logr.Logger, - owner metav1.Object, - dscispec *dsciv1.DSCInitializationSpec, - platform cluster.Platform, - _ bool, -) error { - enabled := m.GetManagementState() == operatorv1.Managed - monitoringEnabled := dscispec.Monitoring.ManagementState == operatorv1.Managed - - // Update Default rolebinding - if enabled { - if m.DevFlags != nil { - // Download manifests and update paths - if err := m.OverrideManifests(ctx, platform); err != nil { - return err - } - } - - if err := cluster.UpdatePodSecurityRolebinding(ctx, cli, dscispec.ApplicationsNamespace, - "modelmesh", - "modelmesh-controller", - "odh-prometheus-operator", - "prometheus-custom"); err != nil { - return err - } - } - - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, Path, dscispec.ApplicationsNamespace, ComponentName, enabled); err != nil { - return fmt.Errorf("failed to apply manifests from %s : %w", Path, err) - } - l.WithValues("Path", Path).Info("apply manifests done for modelmesh") - // For odh-model-controller - if enabled { - if err := cluster.UpdatePodSecurityRolebinding(ctx, cli, dscispec.ApplicationsNamespace, - "odh-model-controller"); err != nil { - return err - } - } - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, DependentPath, dscispec.ApplicationsNamespace, m.GetComponentName(), enabled); err != nil { - // explicitly ignore error if error contains keywords "spec.selector" and "field is immutable" and return all other error. - if !strings.Contains(err.Error(), "spec.selector") || !strings.Contains(err.Error(), "field is immutable") { - return err - } - } - - l.WithValues("Path", DependentPath).Info("apply manifests done for odh-model-controller") - - if enabled { - if err := cluster.WaitForDeploymentAvailable(ctx, cli, ComponentName, dscispec.ApplicationsNamespace, 20, 2); err != nil { - return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err) - } - } - - // CloudService Monitoring handling - if platform == cluster.ManagedRhods { - // first model-mesh rules - if err := m.UpdatePrometheusConfig(cli, l, enabled && monitoringEnabled, ComponentName); err != nil { - return err - } - // then odh-model-controller rules - if err := m.UpdatePrometheusConfig(cli, l, enabled && monitoringEnabled, DependentComponentName); err != nil { - return err - } - if err := deploy.DeployManifestsFromPath(ctx, cli, owner, - filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"), - dscispec.Monitoring.Namespace, - "prometheus", true); err != nil { - return err - } - l.Info("updating SRE monitoring done") - } - - return nil -} diff --git a/components/modelmeshserving/zz_generated.deepcopy.go b/components/modelmeshserving/zz_generated.deepcopy.go deleted file mode 100644 index fee91980836..00000000000 --- a/components/modelmeshserving/zz_generated.deepcopy.go +++ /dev/null @@ -1,39 +0,0 @@ -//go:build !ignore_autogenerated - -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Code generated by controller-gen. DO NOT EDIT. - -package modelmeshserving - -import () - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ModelMeshServing) DeepCopyInto(out *ModelMeshServing) { - *out = *in - in.Component.DeepCopyInto(&out.Component) -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelMeshServing. -func (in *ModelMeshServing) DeepCopy() *ModelMeshServing { - if in == nil { - return nil - } - out := new(ModelMeshServing) - in.DeepCopyInto(out) - return out -} diff --git a/config/crd/bases/components.opendatahub.io_modelmeshservings.yaml b/config/crd/bases/components.opendatahub.io_modelmeshservings.yaml index 32d346c00e0..ee679f92e48 100644 --- a/config/crd/bases/components.opendatahub.io_modelmeshservings.yaml +++ b/config/crd/bases/components.opendatahub.io_modelmeshservings.yaml @@ -14,7 +14,16 @@ spec: singular: modelmeshserving scope: Cluster versions: - - name: v1 + - additionalPrinterColumns: + - description: Ready + jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - description: Reason + jsonPath: .status.conditions[?(@.type=="Ready")].reason + name: Reason + type: string + name: v1 schema: openAPIV3Schema: description: ModelMeshServing is the Schema for the modelmeshservings API @@ -39,10 +48,32 @@ spec: spec: description: ModelMeshServingSpec defines the desired state of ModelMeshServing properties: - foo: - description: Foo is an example field of ModelMeshServing. Edit modelmeshserving_types.go - to remove/update - type: string + devFlags: + description: Add developer fields + properties: + manifests: + description: List of custom manifests for the given component + items: + properties: + contextDir: + default: manifests + description: contextDir is the relative path to the folder + containing manifests in a repository, default value "manifests" + type: string + sourcePath: + default: "" + description: 'sourcePath is the subpath within contextDir + where kustomize builds start. Examples include any sub-folder + or path: `base`, `overlays/dev`, `default`, `odh` etc.' + type: string + uri: + default: "" + description: uri is the URI point to a git repo with tag/branch. + e.g. https://github.com/org/repo/tarball/ + type: string + type: object + type: array + type: object type: object status: description: ModelMeshServingStatus defines the observed state of ModelMeshServing @@ -110,6 +141,9 @@ spec: type: string type: object type: object + x-kubernetes-validations: + - message: ModelMeshServing name must be default-modelmesh + rule: self.metadata.name == 'default-modelmesh' served: true storage: true subresources: diff --git a/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml b/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml index 41d1c76d658..d68ea742e3c 100644 --- a/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml +++ b/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml @@ -351,9 +351,7 @@ spec: type: string type: object modelmeshserving: - description: |- - ModelMeshServing component configuration. - Does not support enabled Kserve at the same time + description: ModelMeshServing component configuration. properties: devFlags: description: Add developer fields diff --git a/controllers/components/modelmeshserving/modelmesh_support.go b/controllers/components/modelmeshserving/modelmesh_support.go new file mode 100644 index 00000000000..44e7156f535 --- /dev/null +++ b/controllers/components/modelmeshserving/modelmesh_support.go @@ -0,0 +1,87 @@ +package modelmeshserving + +import ( + "strings" + + appsv1 "k8s.io/api/apps/v1" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +// a workaround for 2.5 due to odh-model-controller serivceaccount keeps updates with label. +var saPredicates = predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + namespace := e.ObjectNew.GetNamespace() + if e.ObjectNew.GetName() == DependentComponentName && (namespace == "redhat-ods-applications" || namespace == "opendatahub") { + return false + } + return true + }, +} + +// a workaround for modelmesh and kserve both create same odh-model-controller NWP. +var networkpolicyPredicates = predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + return e.ObjectNew.GetName() != DependentComponentName + }, +} + +var rolePredicates = predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + notAllowedNames := []string{"leader-election-role", "proxy-role", "metrics-reader", "kserve-prometheus-k8s", "odh-model-controller-role"} + for _, notallowedName := range notAllowedNames { + if e.ObjectNew.GetName() == notallowedName { + return false + } + } + return true + }, +} + +var rolebindingPredicates = predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + notAllowedNames := []string{"leader-election-rolebinding", "proxy-rolebinding", "odh-model-controller-rolebinding-opendatahub"} + for _, notallowedName := range notAllowedNames { + if e.ObjectNew.GetName() == notallowedName { + return false + } + } + return true + }, +} + +// ignore label updates if it is from application namespace. +var generalPredicates = predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + if strings.Contains(e.ObjectNew.GetName(), DependentComponentName) || strings.Contains(e.ObjectNew.GetName(), "kserve") { + return false + } + return true + }, +} + +// a workaround for 2.5 due to modelmesh-servingruntime.serving.kserve.io keeps updates. +var webhookPredicates = predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + return e.ObjectNew.GetName() != "modelmesh-servingruntime.serving.kserve.io" + }, +} + +// For the case of: +// +// if !strings.Contains(err.Error(), "spec.selector") || !strings.Contains(err.Error(), "field is immutable") { +// return err +// } +var skipDeploymentSelector = predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + oldDeployment, oldOk := e.ObjectOld.(*appsv1.Deployment) + newDeployment, newOk := e.ObjectNew.(*appsv1.Deployment) + if !oldOk || !newOk { + return false + } + if oldDeployment.Spec.Selector != newDeployment.Spec.Selector && e.ObjectNew.GetName() == DependentComponentName { + return false + } + return true + }, +} diff --git a/controllers/components/modelmeshserving/modelmeshserving.go b/controllers/components/modelmeshserving/modelmeshserving.go new file mode 100644 index 00000000000..5c37fdca48a --- /dev/null +++ b/controllers/components/modelmeshserving/modelmeshserving.go @@ -0,0 +1,86 @@ +package modelmeshserving + +import ( + "fmt" + + operatorv1 "github.com/openshift/api/operator/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations" +) + +const ( + ComponentName = componentsv1.ModelMeshServingComponentName + DependentComponentName = componentsv1.ModelMeshCtrlerComponentName +) + +var ( + // modelmeshserving manifests path. + DefaultPath = odhdeploy.DefaultManifestPath + "/" + ComponentName + "/overlays/odh" + // odh-model-controller manifests path. + DependentPath = odhdeploy.DefaultManifestPath + "/" + DependentComponentName + "/base" + ServiceAccountList = []string{ + "modelmesh", + "modelmesh-controller", + "odh-model-controller", + } + serviceAccounts = map[cluster.Platform][]string{ + cluster.SelfManagedRhods: ServiceAccountList, + cluster.ManagedRhods: ServiceAccountList, + cluster.OpenDataHub: ServiceAccountList, + cluster.Unknown: ServiceAccountList, + } +) + +// for DSC to get compoment ModelMeshServing's CR. +func GetComponentCR(dsc *dscv1.DataScienceCluster) *componentsv1.ModelMeshServing { + mmAnnotations := make(map[string]string) + switch dsc.Spec.Components.ModelMeshServing.ManagementState { + case operatorv1.Managed, operatorv1.Removed: + mmAnnotations[annotations.ManagementStateAnnotation] = string(dsc.Spec.Components.ModelMeshServing.ManagementState) + default: // Force and Unmanaged case for unknown values, we do not support these yet + mmAnnotations[annotations.ManagementStateAnnotation] = "Unknown" + } + + return &componentsv1.ModelMeshServing{ + TypeMeta: metav1.TypeMeta{ + Kind: componentsv1.ModelMeshServingKind, + APIVersion: componentsv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: componentsv1.ModelMeshServingInstanceName, + Annotations: mmAnnotations, + }, + Spec: componentsv1.ModelMeshServingSpec{ + ModelMeshServingCommonSpec: dsc.Spec.Components.ModelMeshServing.ModelMeshServingCommonSpec, + }, + } +} + +// Init for set images. +func Init(platform cluster.Platform) error { + imageParamMap := map[string]string{ + "odh-mm-rest-proxy": "RELATED_IMAGE_ODH_MM_REST_PROXY_IMAGE", + "odh-modelmesh-runtime-adapter": "RELATED_IMAGE_ODH_MODELMESH_RUNTIME_ADAPTER_IMAGE", + "odh-modelmesh": "RELATED_IMAGE_ODH_MODELMESH_IMAGE", + "odh-modelmesh-controller": "RELATED_IMAGE_ODH_MODELMESH_CONTROLLER_IMAGE", + } + // odh-model-controller to use + var dependentImageParamMap = map[string]string{ + "odh-model-controller": "RELATED_IMAGE_ODH_MODEL_CONTROLLER_IMAGE", + } + // Update image parameters + if err := odhdeploy.ApplyParams(DefaultPath, imageParamMap); err != nil { + return fmt.Errorf("failed to update images on path %s: %w", DefaultPath, err) + } + // Update image parameters for odh-model-controller + if err := odhdeploy.ApplyParams(DependentPath, dependentImageParamMap); err != nil { + return fmt.Errorf("failed to update images on path %s: %w", DependentPath, err) + } + + return nil +} diff --git a/controllers/components/modelmeshserving/modelmeshserving_controller.go b/controllers/components/modelmeshserving/modelmeshserving_controller.go index 686d1856c10..d51845f6b3f 100644 --- a/controllers/components/modelmeshserving/modelmeshserving_controller.go +++ b/controllers/components/modelmeshserving/modelmeshserving_controller.go @@ -19,40 +19,90 @@ package modelmeshserving import ( "context" - "k8s.io/apimachinery/pkg/runtime" + templatev1 "github.com/openshift/api/template/v1" + promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + rbacv1 "k8s.io/api/rbac/v1" + extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/builder" componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/deploy" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/render/kustomize" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/security" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/updatestatus" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" ) -// ModelMeshServingReconciler reconciles a ModelMeshServing object. -type ModelMeshServingReconciler struct { - client.Client - Scheme *runtime.Scheme -} +func NewComponentReconciler(ctx context.Context, mgr ctrl.Manager) error { + _, err := reconciler.ComponentReconcilerFor( + mgr, + componentsv1.ModelMeshServingInstanceName, + &componentsv1.ModelMeshServing{}, + ). + // customized Owns() for Component with new predicates + Owns(&corev1.ConfigMap{}). + Owns(&corev1.Secret{}). + Owns( + &corev1.ServiceAccount{}, + builder.WithPredicates(saPredicates), + ). + Owns(&promv1.ServiceMonitor{}). + Owns( + &networkingv1.NetworkPolicy{}, + builder.WithPredicates(networkpolicyPredicates), + ). + Owns( + &rbacv1.Role{}, + builder.WithPredicates(rolePredicates)). + Owns( + &rbacv1.ClusterRole{}, + builder.WithPredicates(rolePredicates)). + Owns( + &rbacv1.RoleBinding{}, + builder.WithPredicates(rolebindingPredicates)). + Owns( + &rbacv1.ClusterRoleBinding{}, + builder.WithPredicates(rolebindingPredicates)). + Owns( + &corev1.Service{}, + builder.WithPredicates(generalPredicates)). + Owns( + &admissionregistrationv1.ValidatingWebhookConfiguration{}, + builder.WithPredicates(webhookPredicates), + ). + Owns(&templatev1.Template{}). + Owns(&admissionregistrationv1.ValidatingWebhookConfiguration{}). + Owns(&corev1.Service{}). + Owns(&appsv1.Deployment{}, builder.WithPredicates(resources.NewDeploymentPredicate(), skipDeploymentSelector)). + Watches(&extv1.CustomResourceDefinition{}). // call ForLabel() + new predicates + // Add ModelMeshServing specific actions + WithAction(initialize). + WithAction(devFlags). + WithAction(security.NewUpdatePodSecurityRoleBindingAction(serviceAccounts)). + WithAction(kustomize.NewAction( + kustomize.WithCache(kustomize.DefaultCachingKeyFn), + kustomize.WithLabel(labels.ODH.Component(ComponentName), "true"), + kustomize.WithLabel(labels.K8SCommon.PartOf, ComponentName), + )). + WithAction(deploy.NewAction( + deploy.WithFieldOwner(componentsv1.ModelMeshServingInstanceName), + deploy.WithLabel(labels.ComponentPartOf, componentsv1.ModelMeshServingInstanceName), + )). + WithAction(updatestatus.NewAction( + updatestatus.WithSelectorLabel(labels.ComponentPartOf, componentsv1.ModelMeshServingInstanceName), + )). + Build(ctx) // include GenerationChangedPredicate no need set in each Owns() above -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the ModelMeshServing object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. -// -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.12.2/pkg/reconcile -func (r *ModelMeshServingReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) - - // TODO(user): your logic here - - return ctrl.Result{}, nil -} + if err != nil { + return err // no need customize error, it is done in the caller main + } -// SetupWithManager sets up the controller with the Manager. -func (r *ModelMeshServingReconciler) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). - For(&componentsv1.ModelMeshServing{}). - Complete(r) + return nil } diff --git a/controllers/components/modelmeshserving/modelmeshserving_controller_actions.go b/controllers/components/modelmeshserving/modelmeshserving_controller_actions.go new file mode 100644 index 00000000000..9b4d4ca5441 --- /dev/null +++ b/controllers/components/modelmeshserving/modelmeshserving_controller_actions.go @@ -0,0 +1,89 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package modelmeshserving + +import ( + "context" + "fmt" + "strings" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types" + odhdeploy "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" +) + +func initialize(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { + // early exist + _, ok := rr.Instance.(*componentsv1.ModelMeshServing) + if !ok { + return fmt.Errorf("resource instance %v is not a componentsv1.ModelMeshServing)", rr.Instance) + } + rr.Manifests = []odhtypes.ManifestInfo{} + // setup Manifets[0] for modelmeshserving + rr.Manifests = append(rr.Manifests, odhtypes.ManifestInfo{ + Path: odhdeploy.DefaultManifestPath, + ContextDir: ComponentName, + SourcePath: "overlays/odh", + }) + // setup Manifets[1] for odh-model-controller + rr.Manifests = append(rr.Manifests, odhtypes.ManifestInfo{ + Path: odhdeploy.DefaultManifestPath, + ContextDir: DependentComponentName, + SourcePath: "base", + }) + return nil +} + +func devFlags(ctx context.Context, rr *odhtypes.ReconciliationRequest) error { + mm, ok := rr.Instance.(*componentsv1.ModelMeshServing) + if !ok { + return fmt.Errorf("resource instance %v is not a componentsv1.ModelMeshServing)", rr.Instance) + } + + if mm.Spec.DevFlags == nil { + return nil + } + // Implement devflags support logic + // If dev flags are set, update default manifests path + if len(mm.Spec.DevFlags.Manifests) != 0 { + for _, subcomponent := range mm.Spec.DevFlags.Manifests { + if strings.Contains(subcomponent.URI, ComponentName) { + // Download modelmeshserving + if err := odhdeploy.DownloadManifests(ctx, ComponentName, subcomponent); err != nil { + return err + } + // If overlay is defined, update paths + if subcomponent.SourcePath != "" { + rr.Manifests[0].SourcePath = subcomponent.SourcePath + } + } + + if strings.Contains(subcomponent.URI, DependentComponentName) { + // Download odh-model-controller + if err := odhdeploy.DownloadManifests(ctx, DependentComponentName, subcomponent); err != nil { + return err + } + // If overlay is defined, update paths + if subcomponent.SourcePath != "" { + rr.Manifests[1].SourcePath = subcomponent.SourcePath + } + } + } + } + // TODO: Implement devflags logmode logic + return nil +} diff --git a/controllers/datasciencecluster/datasciencecluster_controller.go b/controllers/datasciencecluster/datasciencecluster_controller.go index 7f00a7ec38c..9abb39db9b7 100644 --- a/controllers/datasciencecluster/datasciencecluster_controller.go +++ b/controllers/datasciencecluster/datasciencecluster_controller.go @@ -21,25 +21,19 @@ import ( "context" "errors" "fmt" - "strings" "time" "github.com/go-logr/logr" "github.com/hashicorp/go-multierror" - buildv1 "github.com/openshift/api/build/v1" - imagev1 "github.com/openshift/api/image/v1" operatorv1 "github.com/openshift/api/operator/v1" admissionregistrationv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - networkingv1 "k8s.io/api/networking/v1" - rbacv1 "k8s.io/api/rbac/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" k8serr "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" - apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -54,6 +48,7 @@ import ( dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" dashboardctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/dashboard" + mmctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/modelmeshserving" modelregistryctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/modelregistry" rayctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" @@ -270,6 +265,14 @@ func (r *DataScienceClusterReconciler) Reconcile(ctx context.Context, req ctrl.R componentErrors = multierror.Append(componentErrors, err) } + // Deploy ModelMeshServing + if instance, err = r.ReconcileComponent(ctx, instance, componentsv1.ModelMeshCtrlerComponentName, func() (error, bool) { + mm := mmctrl.GetComponentCR(instance) + return r.apply(ctx, instance, mm), instance.Spec.Components.ModelMeshServing.ManagementState == operatorv1.Managed + }); err != nil { + componentErrors = multierror.Append(componentErrors, err) + } + // Process errors for components if componentErrors != nil { log.Info("DataScienceCluster Deployment Incomplete.") @@ -382,7 +385,7 @@ var configMapPredicates = predicate.Funcs{ } // Do not reconcile on kserver's inferenceservice-config CM updates, for rawdeployment namespace := e.ObjectNew.GetNamespace() - if e.ObjectNew.GetName() == "inferenceservice-config" && (namespace == "redhat-ods-applications" || namespace == "opendatahub") { //nolint:goconst + if e.ObjectNew.GetName() == "inferenceservice-config" && (namespace == "redhat-ods-applications" || namespace == "opendatahub") { return false } return true @@ -430,116 +433,31 @@ var componentDeploymentPredicates = predicate.Funcs{ }, } -// a workaround for 2.5 due to odh-model-controller serivceaccount keeps updates with label. -var saPredicates = predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - namespace := e.ObjectNew.GetNamespace() - if e.ObjectNew.GetName() == "odh-model-controller" && (namespace == "redhat-ods-applications" || namespace == "opendatahub") { - return false - } - return true - }, -} - -// a workaround for 2.5 due to modelmesh-servingruntime.serving.kserve.io keeps updates. -var modelMeshwebhookPredicates = predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - return e.ObjectNew.GetName() != "modelmesh-servingruntime.serving.kserve.io" - }, -} - -var modelMeshRolePredicates = predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - notAllowedNames := []string{"leader-election-role", "proxy-role", "metrics-reader", "kserve-prometheus-k8s", "odh-model-controller-role"} - for _, notallowedName := range notAllowedNames { - if e.ObjectNew.GetName() == notallowedName { - return false - } - } - return true - }, -} - -// a workaround for modelmesh and kserve both create same odh-model-controller NWP. -var networkpolicyPredicates = predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - return e.ObjectNew.GetName() != "odh-model-controller" - }, -} - -var modelMeshRBPredicates = predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - notAllowedNames := []string{"leader-election-rolebinding", "proxy-rolebinding", "odh-model-controller-rolebinding-opendatahub"} - for _, notallowedName := range notAllowedNames { - if e.ObjectNew.GetName() == notallowedName { - return false - } - } - return true - }, -} - -// ignore label updates if it is from application namespace. -var modelMeshGeneralPredicates = predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - if strings.Contains(e.ObjectNew.GetName(), "odh-model-controller") || strings.Contains(e.ObjectNew.GetName(), "kserve") { - return false - } - return true - }, -} - // SetupWithManager sets up the controller with the Manager. func (r *DataScienceClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&dscv1.DataScienceCluster{}). - Owns(&corev1.Namespace{}). - Owns(&corev1.Secret{}). + // Owns(&corev1.Namespace{}). + // Owns(&corev1.Secret{}). Owns( &corev1.ConfigMap{}, builder.WithPredicates(configMapPredicates), ). - Owns( - &networkingv1.NetworkPolicy{}, - builder.WithPredicates(networkpolicyPredicates), - ). - Owns( - &rbacv1.Role{}, - builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, modelMeshRolePredicates))). - Owns( - &rbacv1.RoleBinding{}, - builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, modelMeshRBPredicates))). - Owns( - &rbacv1.ClusterRole{}, - builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, modelMeshRolePredicates))). - Owns( - &rbacv1.ClusterRoleBinding{}, - builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, modelMeshRBPredicates))). Owns( &appsv1.Deployment{}, builder.WithPredicates(componentDeploymentPredicates)). - Owns(&corev1.PersistentVolumeClaim{}). - Owns( - &corev1.Service{}, - builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, modelMeshGeneralPredicates))). - Owns(&appsv1.StatefulSet{}). - Owns(&imagev1.ImageStream{}). - Owns(&buildv1.BuildConfig{}). - Owns(&apiregistrationv1.APIService{}). - Owns(&networkingv1.Ingress{}). + // Owns(&corev1.PersistentVolumeClaim{}). + // Owns(&appsv1.StatefulSet{}). + // Owns(&imagev1.ImageStream{}). + // Owns(&buildv1.BuildConfig{}). + // Owns(&apiregistrationv1.APIService{}). + // Owns(&networkingv1.Ingress{}). Owns(&admissionregistrationv1.MutatingWebhookConfiguration{}). - Owns( - &admissionregistrationv1.ValidatingWebhookConfiguration{}, - builder.WithPredicates(modelMeshwebhookPredicates), - ). // components CRs Owns(&componentsv1.Dashboard{}). Owns(&componentsv1.Ray{}). Owns(&componentsv1.ModelRegistry{}). - Owns( - &corev1.ServiceAccount{}, - builder.WithPredicates(saPredicates), - ). + Owns(&componentsv1.ModelMeshServing{}). Watches( &dsciv1.DSCInitialization{}, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, a client.Object) []reconcile.Request { diff --git a/controllers/datasciencecluster/kubebuilder_rbac.go b/controllers/datasciencecluster/kubebuilder_rbac.go index 6a91f3a588c..5743ab9ee5e 100644 --- a/controllers/datasciencecluster/kubebuilder_rbac.go +++ b/controllers/datasciencecluster/kubebuilder_rbac.go @@ -215,7 +215,7 @@ package datasciencecluster // +kubebuilder:rbac:groups=components.opendatahub.io,resources=trainingoperators/status,verbs=get;update;patch // +kubebuilder:rbac:groups=components.opendatahub.io,resources=trainingoperators/finalizers,verbs=update -// TODO: ModelMesh +// ModelMeshServing // +kubebuilder:rbac:groups=components.opendatahub.io,resources=modelmeshservings,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=components.opendatahub.io,resources=modelmeshservings/status,verbs=get;update;patch // +kubebuilder:rbac:groups=components.opendatahub.io,resources=modelmeshservings/finalizers,verbs=update diff --git a/controllers/dscinitialization/suite_test.go b/controllers/dscinitialization/suite_test.go index 618b9a62e91..4f86de593ea 100644 --- a/controllers/dscinitialization/suite_test.go +++ b/controllers/dscinitialization/suite_test.go @@ -23,6 +23,7 @@ import ( "time" routev1 "github.com/openshift/api/route/v1" + templatev1 "github.com/openshift/api/template/v1" userv1 "github.com/openshift/api/user/v1" ofapi "github.com/operator-framework/api/pkg/operators/v1alpha1" ofapiv2 "github.com/operator-framework/api/pkg/operators/v2" @@ -117,6 +118,7 @@ var _ = BeforeSuite(func() { utilruntime.Must(routev1.Install(testScheme)) utilruntime.Must(userv1.Install(testScheme)) utilruntime.Must(monitoringv1.AddToScheme(testScheme)) + utilruntime.Must(templatev1.Install(testScheme)) // +kubebuilder:scaffold:scheme k8sClient, err = client.New(cfg, client.Options{Scheme: testScheme}) diff --git a/controllers/webhook/webhook_suite_test.go b/controllers/webhook/webhook_suite_test.go index cb116af69ba..18735900fcb 100644 --- a/controllers/webhook/webhook_suite_test.go +++ b/controllers/webhook/webhook_suite_test.go @@ -48,7 +48,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/codeflare" "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" - "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" modelregistry2 "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/modelregistry" @@ -271,8 +270,8 @@ func newDSC(name string, namespace string) *dscv1.DataScienceCluster { ManagementState: operatorv1.Removed, }, }, - ModelMeshServing: modelmeshserving.ModelMeshServing{ - Component: componentsold.Component{ + ModelMeshServing: componentsv1.DSCModelMeshServing{ + ManagementSpec: components.ManagementSpec{ ManagementState: operatorv1.Removed, }, }, diff --git a/docs/api-overview.md b/docs/api-overview.md index 7e16979b22a..a36142d9e98 100644 --- a/docs/api-overview.md +++ b/docs/api-overview.md @@ -120,6 +120,23 @@ DSCDashboard contains all the configuration exposed in DSC instance for Dashboar +_Appears in:_ +- [Components](#components) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `managementState` _[ManagementState](#managementstate)_ | Set to one of the following values:

- "Managed" : the operator is actively managing the component and trying to keep it active.
It will only upgrade the component if it is safe to do so

- "Removed" : the operator is actively managing the component and will not install it,
or if it is installed, the operator will try to remove it | | Enum: [Managed Removed]
| +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | + + +#### DSCModelMeshServing + + + +DSCModelMeshServing contains all the configuration exposed in DSC instance for ModelMeshServing component + + + _Appears in:_ - [Components](#components) @@ -525,6 +542,23 @@ _Appears in:_ | `status` _[ModelMeshServingStatus](#modelmeshservingstatus)_ | | | | +#### ModelMeshServingCommonSpec + + + + + + + +_Appears in:_ +- [DSCModelMeshServing](#dscmodelmeshserving) +- [ModelMeshServingSpec](#modelmeshservingspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | + + #### ModelMeshServingList @@ -558,7 +592,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `foo` _string_ | Foo is an example field of ModelMeshServing. Edit modelmeshserving_types.go to remove/update | | | +| `devFlags` _[DevFlags](#devflags)_ | Add developer fields | | | #### ModelMeshServingStatus @@ -1039,7 +1073,6 @@ _Appears in:_ - [DataSciencePipelines](#datasciencepipelines) - [Kserve](#kserve) - [Kueue](#kueue) -- [ModelMeshServing](#modelmeshserving) - [TrainingOperator](#trainingoperator) - [TrustyAI](#trustyai) - [Workbenches](#workbenches) @@ -1082,10 +1115,13 @@ DevFlagsSpec struct defines the component's dev flags configuration. _Appears in:_ - [Component](#component) - [DSCDashboard](#dscdashboard) +- [DSCModelMeshServing](#dscmodelmeshserving) - [DSCModelRegistry](#dscmodelregistry) - [DSCRay](#dscray) - [DashboardCommonSpec](#dashboardcommonspec) - [DashboardSpec](#dashboardspec) +- [ModelMeshServingCommonSpec](#modelmeshservingcommonspec) +- [ModelMeshServingSpec](#modelmeshservingspec) - [ModelRegistryCommonSpec](#modelregistrycommonspec) - [ModelRegistrySpec](#modelregistryspec) - [RayCommonSpec](#raycommonspec) @@ -1107,6 +1143,7 @@ ManagementSpec struct defines the component's management configuration. _Appears in:_ - [Component](#component) - [DSCDashboard](#dscdashboard) +- [DSCModelMeshServing](#dscmodelmeshserving) - [DSCModelRegistry](#dscmodelregistry) - [DSCRay](#dscray) @@ -1235,29 +1272,6 @@ _Appears in:_ -## datasciencecluster.opendatahub.io/modelmeshserving - -Package modelmeshserving provides utility functions to config MoModelMesh, a general-purpose model serving management/routing layer - - - -#### ModelMeshServing - - - -ModelMeshServing struct holds the configuration for the ModelMeshServing component. - - - -_Appears in:_ -- [Components](#components) - -| Field | Description | Default | Validation | -| --- | --- | --- | --- | -| `Component` _[Component](#component)_ | | | | - - - ## datasciencecluster.opendatahub.io/trainingoperator Package trainingoperator provides utility functions to config trainingoperator as part of the stack @@ -1381,7 +1395,7 @@ _Appears in:_ | --- | --- | --- | --- | | `dashboard` _[DSCDashboard](#dscdashboard)_ | Dashboard component configuration. | | | | `workbenches` _[Workbenches](#workbenches)_ | Workbenches component configuration. | | | -| `modelmeshserving` _[ModelMeshServing](#modelmeshserving)_ | ModelMeshServing component configuration.
Does not support enabled Kserve at the same time | | | +| `modelmeshserving` _[DSCModelMeshServing](#dscmodelmeshserving)_ | ModelMeshServing component configuration. | | | | `datasciencepipelines` _[DataSciencePipelines](#datasciencepipelines)_ | DataServicePipeline component configuration.
Require OpenShift Pipelines Operator to be installed before enable component | | | | `kserve` _[Kserve](#kserve)_ | Kserve component configuration.
Require OpenShift Serverless and OpenShift Service Mesh Operators to be installed before enable component
Does not support enabled ModelMeshServing at the same time | | | | `kueue` _[Kueue](#kueue)_ | Kueue component configuration. | | | diff --git a/main.go b/main.go index 1c49ead8c82..2a0e0c6bebc 100644 --- a/main.go +++ b/main.go @@ -31,6 +31,7 @@ import ( operatorv1 "github.com/openshift/api/operator/v1" routev1 "github.com/openshift/api/route/v1" securityv1 "github.com/openshift/api/security/v1" + templatev1 "github.com/openshift/api/template/v1" userv1 "github.com/openshift/api/user/v1" ofapiv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" ofapiv2 "github.com/operator-framework/api/pkg/operators/v2" @@ -64,6 +65,7 @@ import ( featurev1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/features/v1" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/certconfigmapgenerator" dashboardctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/dashboard" + mmctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/modelmeshserving" modelregistryctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/modelregistry" rayctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/ray" dscctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/datasciencecluster" @@ -110,6 +112,7 @@ func init() { //nolint:gochecknoinits utilruntime.Must(operatorv1.Install(scheme)) utilruntime.Must(consolev1.AddToScheme(scheme)) utilruntime.Must(securityv1.Install(scheme)) + utilruntime.Must(templatev1.Install(scheme)) } func initComponents(_ context.Context, p cluster.Platform) error { @@ -124,6 +127,9 @@ func initComponents(_ context.Context, p cluster.Platform) error { if err := modelregistryctrl.Init(p); err != nil { return err } + if err := mmctrl.Init(p); err != nil { + return err + } return multiErr.ErrorOrNil() } @@ -417,6 +423,10 @@ func CreateComponentReconcilers(ctx context.Context, mgr manager.Manager) error } if err := modelregistryctrl.NewComponentReconciler(ctx, mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "ModelRegistryReconciler") + } + + if err := mmctrl.NewComponentReconciler(ctx, mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ModelMeshServingReconciler") return err } diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 2a6963bdf00..a1823df7f8a 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -36,7 +36,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" - "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -73,8 +72,8 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error { Workbenches: workbenches.Workbenches{ Component: componentsold.Component{ManagementState: operatorv1.Managed}, }, - ModelMeshServing: modelmeshserving.ModelMeshServing{ - Component: componentsold.Component{ManagementState: operatorv1.Managed}, + ModelMeshServing: componentsv1.DSCModelMeshServing{ + ManagementSpec: components.ManagementSpec{ManagementState: operatorv1.Managed}, }, DataSciencePipelines: datasciencepipelines.DataSciencePipelines{ Component: componentsold.Component{ManagementState: operatorv1.Managed}, diff --git a/tests/e2e/controller_test.go b/tests/e2e/controller_test.go index cc8f940bb34..086af00fbc9 100644 --- a/tests/e2e/controller_test.go +++ b/tests/e2e/controller_test.go @@ -42,6 +42,7 @@ var ( "dashboard": dashboardTestSuite, "ray": rayTestSuite, "modelregistry": modelRegistryTestSuite, + "modelmesh": modelMeshServingTestSuite, } ) diff --git a/tests/e2e/dashboard_test.go b/tests/e2e/dashboard_test.go index eb67286c308..cde11a36859 100644 --- a/tests/e2e/dashboard_test.go +++ b/tests/e2e/dashboard_test.go @@ -121,7 +121,7 @@ func (tc *DashboardTestCtx) testOwnerReferences() error { } // Test Dashboard CR ownerref - if tc.testDashboardInstance.OwnerReferences[0].Kind != "DataScienceCluster" { + if tc.testDashboardInstance.OwnerReferences[0].Kind != dscKind { return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", tc.testDashboardInstance.OwnerReferences[0].Kind) } diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index ef82d7ad6db..12a0fe7e302 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -31,7 +31,6 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" - "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -129,9 +128,9 @@ func setupDSCInstance(name string) *dscv1.DataScienceCluster { ManagementState: operatorv1.Removed, }, }, - ModelMeshServing: modelmeshserving.ModelMeshServing{ - Component: componentsold.Component{ - ManagementState: operatorv1.Removed, + ModelMeshServing: componentsv1.DSCModelMeshServing{ + ManagementSpec: components.ManagementSpec{ + ManagementState: operatorv1.Managed, }, }, DataSciencePipelines: datasciencepipelines.DataSciencePipelines{ diff --git a/tests/e2e/modelmeshserving_test.go b/tests/e2e/modelmeshserving_test.go new file mode 100644 index 00000000000..404123adb49 --- /dev/null +++ b/tests/e2e/modelmeshserving_test.go @@ -0,0 +1,280 @@ +package e2e_test + +import ( + "context" + "errors" + "fmt" + "reflect" + "testing" + "time" + + operatorv1 "github.com/openshift/api/operator/v1" + "github.com/stretchr/testify/require" + autoscalingv1 "k8s.io/api/autoscaling/v1" + k8serr "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + + componentsv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/components/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" +) + +type ModelMeshServingTestCtx struct { + testCtx *testContext + testModelMeshServingInstance componentsv1.ModelMeshServing +} + +func modelMeshServingTestSuite(t *testing.T) { + t.Helper() + + mmCtx := ModelMeshServingTestCtx{} + var err error + mmCtx.testCtx, err = NewTestContext() + require.NoError(t, err) + + testCtx := mmCtx.testCtx + + t.Run(testCtx.testDsc.Name, func(t *testing.T) { + // creation + t.Run("Creation of ModelMeshServing CR", func(t *testing.T) { + err = mmCtx.testModelMeshServingCreation() + require.NoError(t, err, "error creating ModelMeshServing CR") + }) + + t.Run("Validate ModelMeshServing instance", func(t *testing.T) { + err = mmCtx.validateModelMeshServing() + require.NoError(t, err, "error validating ModelMeshServing instance") + }) + + t.Run("Validate Ownerrefrences exist", func(t *testing.T) { + err = mmCtx.testOwnerReferences() + require.NoError(t, err, "error getting all ModelMeshServing's Ownerrefrences") + }) + + t.Run("Validate ModelMeshServing Ready", func(t *testing.T) { + err = mmCtx.validateModelMeshServingReady() + require.NoError(t, err, "ModelMeshServing instance is not Ready") + }) + + // reconcile + t.Run("Validate Controller reconcile", func(t *testing.T) { + err = mmCtx.testUpdateOnModelMeshServingResources() + require.NoError(t, err, "error testing updates for ModelMeshServing's managed resources") + }) + + t.Run("Validate Disabling ModelMeshServing Component", func(t *testing.T) { + err = mmCtx.testUpdateModelMeshServingComponentDisabled() + require.NoError(t, err, "error testing modemeshserving component enabled field") + }) + }) +} + +func (tc *ModelMeshServingTestCtx) testModelMeshServingCreation() error { + if tc.testCtx.testDsc.Spec.Components.ModelMeshServing.ManagementState != operatorv1.Managed { + return nil + } + + err := tc.testCtx.wait(func(ctx context.Context) (bool, error) { + existingModelMeshServingList := &componentsv1.ModelMeshServingList{} + + if err := tc.testCtx.customClient.List(ctx, existingModelMeshServingList); err != nil { + return false, err + } + + switch { + case len(existingModelMeshServingList.Items) == 1: + tc.testModelMeshServingInstance = existingModelMeshServingList.Items[0] + return true, nil + case len(existingModelMeshServingList.Items) > 1: + return false, fmt.Errorf( + "unexpected ModelMeshServing CR instances. Expected 1 , Found %v instance", len(existingModelMeshServingList.Items)) + default: + return false, nil + } + }) + + if err != nil { + return fmt.Errorf("unable to find ModelMeshServing CR instance: %w", err) + } + + return nil +} + +func (tc *ModelMeshServingTestCtx) validateModelMeshServing() error { + // ModelMeshServing spec should match the spec of ModelMeshServing component in DSC + if !reflect.DeepEqual(tc.testCtx.testDsc.Spec.Components.ModelMeshServing.ModelMeshServingCommonSpec, tc.testModelMeshServingInstance.Spec.ModelMeshServingCommonSpec) { + err := fmt.Errorf("expected .spec for ModelMeshServing %v, got %v", + tc.testCtx.testDsc.Spec.Components.ModelMeshServing.ModelMeshServingCommonSpec, tc.testModelMeshServingInstance.Spec.ModelMeshServingCommonSpec) + return err + } + return nil +} + +func (tc *ModelMeshServingTestCtx) testOwnerReferences() error { + if len(tc.testModelMeshServingInstance.OwnerReferences) != 1 { + return errors.New("expect CR has ownerreferences set") + } + + // Test ModelMeshServing CR ownerref + if tc.testModelMeshServingInstance.OwnerReferences[0].Kind != dscKind { + return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", + tc.testModelMeshServingInstance.OwnerReferences[0].Kind) + } + + // Test ModelMeshServing resources + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ODH.Component(componentsv1.ModelMeshServingComponentName), + }) + if err != nil { + return fmt.Errorf("error listing component deployments %w", err) + } + // test any one deployment for ownerreference + if len(appDeployments.Items) != 0 && appDeployments.Items[0].OwnerReferences[0].Kind != componentsv1.ModelMeshServingKind { + return fmt.Errorf("expected ownerreference not found. Got ownereferrence: %v", + appDeployments.Items[0].OwnerReferences) + } + + return nil +} + +// Verify ModelMeshServing instance is in Ready phase when modelmeshserving deployments are up and running. +func (tc *ModelMeshServingTestCtx) validateModelMeshServingReady() error { + err := wait.PollUntilContextTimeout(tc.testCtx.ctx, generalRetryInterval, componentReadyTimeout, true, func(ctx context.Context) (bool, error) { + key := types.NamespacedName{Name: tc.testModelMeshServingInstance.Name} + mm := &componentsv1.ModelMeshServing{} + + err := tc.testCtx.customClient.Get(ctx, key, mm) + if err != nil { + return false, err + } + return mm.Status.Phase == readyStatus, nil + }) + + if err != nil { + return fmt.Errorf("error waiting Ready state for ModelMeshServing %v: %w", tc.testModelMeshServingInstance.Name, err) + } + + return nil +} + +func (tc *ModelMeshServingTestCtx) testUpdateOnModelMeshServingResources() error { + // Test Updating ModelMeshServing Replicas + + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ComponentPartOf + "=" + tc.testModelMeshServingInstance.Name, + }) + if err != nil { + return err + } + + if len(appDeployments.Items) != 1 { + return fmt.Errorf("error getting deployment for component %s", tc.testModelMeshServingInstance.Name) + } + + const expectedReplica int32 = 2 // from 1 to 2 + + testDeployment := appDeployments.Items[0] + patchedReplica := &autoscalingv1.Scale{ + ObjectMeta: metav1.ObjectMeta{ + Name: testDeployment.Name, + Namespace: testDeployment.Namespace, + }, + Spec: autoscalingv1.ScaleSpec{ + Replicas: expectedReplica, + }, + Status: autoscalingv1.ScaleStatus{}, + } + updatedDep, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).UpdateScale(tc.testCtx.ctx, + testDeployment.Name, patchedReplica, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("error patching component resources : %w", err) + } + if updatedDep.Spec.Replicas != patchedReplica.Spec.Replicas { + return fmt.Errorf("failed to patch replicas : expect to be %v but got %v", patchedReplica.Spec.Replicas, updatedDep.Spec.Replicas) + } + + // Sleep for 20 seconds to allow the operator to reconcile + // we expect it should not revert back to original value because of AllowList + time.Sleep(2 * generalRetryInterval) + reconciledDep, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, testDeployment.Name, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("error getting component resource after reconcile: %w", err) + } + if *reconciledDep.Spec.Replicas != expectedReplica { + return fmt.Errorf("failed to revert back replicas : expect to be %v but got %v", expectedReplica, *reconciledDep.Spec.Replicas) + } + + return nil +} + +func (tc *ModelMeshServingTestCtx) testUpdateModelMeshServingComponentDisabled() error { + // Test Updating ModelMeshServing to be disabled + var mmDeploymentName string + + if tc.testCtx.testDsc.Spec.Components.ModelMeshServing.ManagementState == operatorv1.Managed { + appDeployments, err := tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).List(tc.testCtx.ctx, metav1.ListOptions{ + LabelSelector: labels.ODH.Component(componentsv1.ModelMeshServingComponentName), + }) + if err != nil { + return fmt.Errorf("error getting enabled component %v", componentsv1.ModelMeshServingComponentName) + } + if len(appDeployments.Items) > 0 { + mmDeploymentName = appDeployments.Items[0].Name + if appDeployments.Items[0].Status.ReadyReplicas == 0 { + return fmt.Errorf("error getting enabled component: %s its deployment 'ReadyReplicas'", mmDeploymentName) + } + } + } else { + return errors.New("modelmeshserving spec should be in 'enabled: true' state in order to perform test") + } + + // Disable component ModelMeshServing + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + // refresh DSC instance in case it was updated during the reconcile + err := tc.testCtx.customClient.Get(tc.testCtx.ctx, types.NamespacedName{Name: tc.testCtx.testDsc.Name}, tc.testCtx.testDsc) + if err != nil { + return fmt.Errorf("error getting resource %w", err) + } + // Disable the Component + tc.testCtx.testDsc.Spec.Components.ModelMeshServing.ManagementState = operatorv1.Removed + + // Try to update + err = tc.testCtx.customClient.Update(tc.testCtx.ctx, tc.testCtx.testDsc) + // Return err itself here (not wrapped inside another error) + // so that RetryOnConflict can identify it correctly. + if err != nil { + return fmt.Errorf("error updating component from 'enabled: true' to 'enabled: false': %w", err) + } + + return nil + }) + if err != nil { + return fmt.Errorf("error after retry %w", err) + } + + if err = tc.testCtx.wait(func(ctx context.Context) (bool, error) { + // Verify ModeMeshServing CR is deleted + mm := &componentsv1.ModelMeshServing{} + err = tc.testCtx.customClient.Get(ctx, client.ObjectKey{Name: tc.testModelMeshServingInstance.Name}, mm) + return k8serr.IsNotFound(err), nil + }); err != nil { + return fmt.Errorf("component modemeshserving is disabled, should not get the ModelMeshServing CR %v", tc.testModelMeshServingInstance.Name) + } + + // Sleep for 20 seconds to allow the operator to reconcile + time.Sleep(2 * generalRetryInterval) + _, err = tc.testCtx.kubeClient.AppsV1().Deployments(tc.testCtx.applicationsNamespace).Get(tc.testCtx.ctx, mmDeploymentName, metav1.GetOptions{}) + if err != nil { + if k8serr.IsNotFound(err) { + return nil // correct result: should not find deployment after we disable it already + } + return fmt.Errorf("error getting component resource after reconcile: %w", err) + } + return fmt.Errorf("component %v is disabled, should not get its deployment %v from NS %v any more", + componentsv1.ModelMeshServingKind, + mmDeploymentName, + tc.testCtx.applicationsNamespace) +} diff --git a/tests/e2e/odh_manager_test.go b/tests/e2e/odh_manager_test.go index 90fbba36943..1fd726698a4 100644 --- a/tests/e2e/odh_manager_test.go +++ b/tests/e2e/odh_manager_test.go @@ -56,4 +56,10 @@ func (tc *testContext) validateOwnedCRDs(t *testing.T) { require.NoErrorf(t, tc.validateCRD("modelregistries.components.opendatahub.io"), "error in validating CRD : modelregistries.components.opendatahub.io") }) + + t.Run("Validate ModelMeshServing CRD", func(t *testing.T) { + t.Parallel() + require.NoErrorf(t, tc.validateCRD("modelmeshservings.components.opendatahub.io"), + "error in validating CRD : modelmeshservings.components.opendatahub.io") + }) } diff --git a/tests/e2e/ray_test.go b/tests/e2e/ray_test.go index d867be6e0a6..1183c3ac35f 100644 --- a/tests/e2e/ray_test.go +++ b/tests/e2e/ray_test.go @@ -119,7 +119,7 @@ func (tc *RayTestCtx) testOwnerReferences() error { } // Test Ray CR ownerref - if tc.testRayInstance.OwnerReferences[0].Kind != "DataScienceCluster" { + if tc.testRayInstance.OwnerReferences[0].Kind != dscKind { return fmt.Errorf("expected ownerreference DataScienceCluster not found. Got ownereferrence: %v", tc.testRayInstance.OwnerReferences[0].Kind) }