feat: Configure e2e-wine UAT to run behind proxy (#115)

Configure the pipeline steps in the test to run behind proxy, according to what we do for kfp-v2 UAT. Closes #107 Fixes #116
canonical · Sep 5, 2024 · 109a69f · 109a69f
1 parent c33117b
commit 109a69f
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -143,7 +143,7 @@ tox -e kubeflow-local
 
 #### Prerequisites for KServe UATs
 
-To be able to run the KServe UATs behind proxy, first you need to configure `kserve-controller`
+To be able to run UATs requiring KServe (e2e-wine, kserve, mlflow-kserve) behind proxy, first you need to configure `kserve-controller`
 and `knative-serving` charms to function behind proxy.
 
 > [!NOTE]  
@@ -224,9 +224,12 @@ To run the tests behind proxy using Notebook:
    and run the Notebook.
 
    Currently, the following tests are supported to run behind proxy:
+   * e2e-wine
    * katib
-   * kserve
    * kfp_v2
+   * kserve
+   * mlflow-kserve
+   * mlflow-minio
    * training
 
 #### Running using `driver`

diff --git a/tests/notebooks/e2e-wine/e2e-wine-kfp-mlflow-kserve.ipynb b/tests/notebooks/e2e-wine/e2e-wine-kfp-mlflow-kserve.ipynb
@@ -47,6 +47,38 @@
     "from tenacity import retry, stop_after_attempt, wait_exponential"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16ffa4b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "HTTP_PROXY = HTTPS_PROXY = NO_PROXY = None\n",
+    "\n",
+    "if os.environ.get('HTTP_PROXY') and os.environ.get('HTTPS_PROXY') and os.environ.get('NO_PROXY'):\n",
+    "    HTTP_PROXY = os.environ['HTTP_PROXY']\n",
+    "    HTTPS_PROXY = os.environ['HTTPS_PROXY']\n",
+    "    # add `.kubeflow` to NO_PROXY needed for pipelines\n",
+    "    NO_PROXY = os.environ['NO_PROXY']\n",
+    "\n",
+    "def add_proxy(obj, http_proxy=HTTP_PROXY, https_proxy=HTTPS_PROXY, no_proxy=NO_PROXY):\n",
+    "    \"\"\"Adds the proxy env vars to the PipelineTask object.\"\"\"\n",
+    "    return (\n",
+    "        obj.set_env_variable(name='http_proxy', value=http_proxy)\n",
+    "        .set_env_variable(name='https_proxy', value=https_proxy)\n",
+    "        .set_env_variable(name='HTTP_PROXY', value=http_proxy)\n",
+    "        .set_env_variable(name='HTTPS_PROXY', value=https_proxy)\n",
+    "        .set_env_variable(name='no_proxy', value=no_proxy)\n",
+    "        .set_env_variable(name='NO_PROXY', value=no_proxy)\n",
+    "    )\n",
+    "\n",
+    "def proxy_envs_set() -> bool:\n",
+    "    if HTTP_PROXY and HTTPS_PROXY and NO_PROXY:\n",
+    "        return True\n",
+    "    return False"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -189,7 +221,8 @@
     "    return isvc_url\n",
     "\n",
     "# Fetch environment variables for MLflow tracking and AWS credentials\n",
-    "# These are guaranteed to be present because of the mlflow's poddefault please refer to [this guide](https://documentation.ubuntu.com/charmed-mlflow/en/latest/tutorial/mlflow-kubeflow/\n)",
+    "# These are guaranteed to be present because of the mlflow's poddefault please refer to [this guide](https://documentation.ubuntu.com/charmed-mlflow/en/latest/tutorial/mlflow-kubeflow/\n",
+    "\n",
     "mlflow_tracking_uri = os.getenv('MLFLOW_TRACKING_URI')\n",
     "mlflow_s3_endpoint_url = os.getenv('MLFLOW_S3_ENDPOINT_URL')\n",
     "aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')\n",
@@ -216,7 +249,37 @@
     "    # Step 4: Deploy the trained model with KServe\n",
     "    deploy_task = deploy_model_with_kserve(\n",
     "        model_uri=train_task.output, isvc_name=ISVC_NAME\n",
-    "    ).set_env_variable(name='AWS_SECRET_ACCESS_KEY', value=aws_secret_access_key)"
+    "    ).set_env_variable(name='AWS_SECRET_ACCESS_KEY', value=aws_secret_access_key)\n",
+    "\n",
+    "# This pipeline definition is identical to the one above with the only difference being\n",
+    "# that environment variables are added to each step of the pipeline, in order to enable them\n",
+    "# to run behind a proxy. Which pipeline is used is defined in the next cell according to if \n",
+    "# such environment variables are set.\n",
+    "@pipeline(name='download-preprocess-train-deploy-pipeline')\n",
+    "def download_preprocess_train_deploy_pipeline_proxy(url: str):\n",
+    "    # Step 1: Download the dataset from the URL\n",
+    "    download_task = add_proxy(download_dataset(url=url))\n",
+    "    \n",
+    "    # Step 2: Preprocess the downloaded dataset\n",
+    "    preprocess_task =  add_proxy(preprocess_dataset(\n",
+    "        dataset=download_task.outputs['dataset_path']\n",
+    "        )\n",
+    "    )\n",
+    "    \n",
+    "    # Step 3: Train the model on the preprocessed dataset\n",
+    "    train_task = add_proxy(train_model(\n",
+    "        dataset=preprocess_task.outputs['output_file'], run_name=MLFLOW_RUN_NAME, model_name=MLFLOW_MODEL_NAME\n",
+    "    ).set_env_variable(name='MLFLOW_TRACKING_URI', value=mlflow_tracking_uri)\\\n",
+    "     .set_env_variable(name='MLFLOW_S3_ENDPOINT_URL', value=mlflow_s3_endpoint_url)\\\n",
+    "     .set_env_variable(name='AWS_ACCESS_KEY_ID', value=aws_access_key_id)\\\n",
+    "     .set_env_variable(name='AWS_SECRET_ACCESS_KEY', value=aws_secret_access_key)\n",
+    "    )\n",
+    "    \n",
+    "    # Step 4: Deploy the trained model with KServe\n",
+    "    deploy_task = add_proxy(deploy_model_with_kserve(\n",
+    "        model_uri=train_task.output, isvc_name=ISVC_NAME\n",
+    "    ).set_env_variable(name='AWS_SECRET_ACCESS_KEY', value=aws_secret_access_key)\n",
+    "    )"
    ]
   },
   {
@@ -226,11 +289,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Compile the pipeline to a YAML file\n",
-    "# This step translates the Python-based pipeline definition into a YAML file \n",
-    "# that can be used to run the pipeline in Kubeflow Pipelines.\n",
-    "kfp.compiler.Compiler().compile(download_preprocess_train_deploy_pipeline, 'download_preprocess_train_deploy_pipeline.yaml')\n",
-    "\n",
     "# Initialize a KFP client\n",
     "# This client is used to interact with the Kubeflow Pipelines API.\n",
     "client = kfp.Client()\n",
@@ -239,9 +297,23 @@
     "# This URL points to the dataset that will be downloaded and processed in the pipeline.\n",
     "url = 'https://raw.githubusercontent.com/canonical/kubeflow-examples/main/e2e-wine-kfp-mlflow/winequality-red.csv'\n",
     "\n",
-    "# Run the pipeline\n",
-    "# This command starts a new run of the compiled pipeline, passing in the dataset URL as an argument.\n",
-    "run = client.create_run_from_pipeline_func(download_preprocess_train_deploy_pipeline, arguments={'url': url})"
+    "# If proxy environment variables are set, use the `_proxy` pipeline definition.\n",
+    "if proxy_envs_set():\n",
+    "    # Compile the pipeline to a YAML file\n",
+    "    # This step translates the Python-based pipeline definition into a YAML file \n",
+    "    # that can be used to run the pipeline in Kubeflow Pipelines.\n",
+    "    kfp.compiler.Compiler().compile(download_preprocess_train_deploy_pipeline_proxy, 'download_preprocess_train_deploy_pipeline_proxy.yaml')\n",
+    "    # Run the pipeline\n",
+    "    # This command starts a new run of the compiled pipeline, passing in the dataset URL as an argument.\n",
+    "    run = client.create_run_from_pipeline_func(download_preprocess_train_deploy_pipeline_proxy, arguments={'url': url})\n",
+    "else:\n",
+    "    # Compile the pipeline to a YAML file\n",
+    "    # This step translates the Python-based pipeline definition into a YAML file \n",
+    "    # that can be used to run the pipeline in Kubeflow Pipelines.\n",
+    "    kfp.compiler.Compiler().compile(download_preprocess_train_deploy_pipeline, 'download_preprocess_train_deploy_pipeline.yaml')\n",
+    "    # Run the pipeline\n",
+    "    # This command starts a new run of the compiled pipeline, passing in the dataset URL as an argument.\n",
+    "    run = client.create_run_from_pipeline_func(download_preprocess_train_deploy_pipeline, arguments={'url': url})"
    ]
   },
   {

diff --git a/tests/notebooks/kfp_v2/kfp-v2-integration.ipynb b/tests/notebooks/kfp_v2/kfp-v2-integration.ipynb
@@ -197,7 +197,7 @@
    },
    "outputs": [],
    "source": [
-    "if proxy_envs_set:\n",
+    "if proxy_envs_set():\n",
     "    run = client.create_run_from_pipeline_func(\n",
     "        condition_pipeline_proxy,\n",
     "        experiment_name=EXPERIMENT_NAME,\n",