Skip to content

Commit

Permalink
test(robot): add test case Test Longhorn components recovery
Browse files Browse the repository at this point in the history
longhorn/longhorn#9536

Signed-off-by: Chris <chris.chien@suse.com>
  • Loading branch information
chriscchien committed Nov 11, 2024
1 parent 460c7b0 commit 5aa1885
Show file tree
Hide file tree
Showing 19 changed files with 546 additions and 13 deletions.
6 changes: 6 additions & 0 deletions e2e/keywords/backing_image.resource
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ Clean up backing image ${backing_image_name} from a disk

Delete backing image ${backing_image_name}
delete_backing_image ${backing_image_name}

Delete backing image managers and wait for recreation
delete_all_backing_image_managers_and_wait_for_recreation

Wait backing image managers running
wait_all_backing_image_managers_running
16 changes: 16 additions & 0 deletions e2e/keywords/longhorn.resource
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,19 @@ Check all Longhorn CRD removed

Install Longhorn
install_longhorn_system

Delete instance-manager of volume ${volume_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_volume_node ${volume_name}
${pod_name} = get_instance_manager_on_node ${node_name}
delete_pod ${pod_name} longhorn-system

Delete instance-manager of deployment ${deployment_id} volume
${deployment_name} = generate_name_with_suffix deployment ${deployment_id}
${volume_name} = get_workload_volume_name ${deployment_name}
${node_name} = get_volume_node ${volume_name}
${pod_name} = get_instance_manager_on_node ${node_name}
delete_pod ${pod_name} longhorn-system

Wait for Longhorn components all running
wait_for_namespace_pods_running longhorn-system
10 changes: 10 additions & 0 deletions e2e/keywords/sharemanager.resource
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,13 @@ Check sharemanager ${condition} using headless service

Wait for all sharemanager to be deleted
wait_for_sharemanagers_deleted

Delete sharemanager of deployment ${deployment_id} and wait for recreation
${deployment_name} = generate_name_with_suffix deployment ${deployment_id}
${volume_name} = get_workload_volume_name ${deployment_name}
delete_sharemanager_and_wait_for_recreation ${volume_name}

Wait for sharemanager of deployment ${deployment_id} running
${deployment_name} = generate_name_with_suffix deployment ${deployment_id}
${volume_name} = get_workload_volume_name ${deployment_name}
wait_for_share_manager_running ${volume_name}
16 changes: 15 additions & 1 deletion e2e/keywords/workload.resource
Original file line number Diff line number Diff line change
Expand Up @@ -187,5 +187,19 @@ Check ${workload_kind} ${workload_id} pod is ${expect_state} on another node
Should Not Be Equal ${node_name} ${last_volume_node}

Delete Longhorn ${workload_kind} ${workload_name} pod on node ${node_id}

${node_name} = get_node_by_index ${node_id}
delete_workload_pod_on_node ${workload_name} ${node_name} longhorn-system

IF '${workload_name}' == 'engine-image'
${label_selector} = Set Variable longhorn.io/component=engine-image
ELSE IF '${workload_name}' == 'instance-manager'
${label_selector} = Set Variable longhorn.io/component=instance-manager
ELSE
${label_selector} = Set Variable ${EMPTY}
END
delete_workload_pod_on_node ${workload_name} ${node_name} longhorn-system ${label_selector}

Delete Longhorn ${workload_kind} ${workload_name} pod
${pod_name} = get_workload_pod_name ${workload_name} longhorn-system
Log ${pod_name}
delete_pod ${pod_name} longhorn-system
18 changes: 17 additions & 1 deletion e2e/libs/backing_image/backing_image.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from backing_image.base import Base
from backing_image.rest import Rest

from backing_image.crd import CRD
from strategy import LonghornOperationStrategy


Expand Down Expand Up @@ -30,3 +30,19 @@ def delete(self, bi_name):

def cleanup_backing_images(self):
return self.backing_image.cleanup_backing_images()

def delete_backing_image_manager(self, name):
self.backing_image = CRD()
return self.backing_image.delete_backing_image_manager(name)

def wait_all_backing_image_managers_running(self):
self.backing_image = CRD()
return self.backing_image.wait_all_backing_image_managers_running()

def wait_backing_image_manager_restart(self, name, last_creation_time):
self.backing_image = CRD()
self.backing_image.wait_backing_image_manager_restart(name, last_creation_time)

def list_backing_image_manager(self):
self.backing_image = CRD()
return self.backing_image.list_backing_image_manager()
16 changes: 16 additions & 0 deletions e2e/libs/backing_image/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,19 @@ def delete(self, bi_name):
@abstractmethod
def cleanup_backing_images(self):
return NotImplemented

@abstractmethod
def wait_all_backing_image_managers_running(self):
return NotImplemented

@abstractmethod
def list_backing_image_manager(self):
return NotImplemented

@abstractmethod
def delete_backing_image_manager(self, name):
return NotImplemented

@abstractmethod
def wait_backing_image_manager_restart(self, name, last_creation_time):
return NotImplemented
91 changes: 91 additions & 0 deletions e2e/libs/backing_image/crd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from kubernetes import client
from datetime import datetime
from backing_image.base import Base

from utility.utility import logging
from utility.utility import get_retry_count_and_interval
import time

class CRD(Base):
def __init__(self):
self.obj_api = client.CustomObjectsApi()
self.retry_count, self.retry_interval = get_retry_count_and_interval()

def create(self, bi_name, source_type, url, expected_checksum):
return NotImplemented

def get(self, bi_name):
return NotImplemented

def all_disk_file_status_are_ready(self, bi_name):
return NotImplemented
def clean_up_backing_image_from_a_random_disk(self, bi_name):
return NotImplemented

def delete(self, bi_name):
return NotImplemented

def wait_for_backing_image_disk_cleanup(self, bi_name, disk_id):
return NotImplemented

def wait_for_backing_image_delete(self, bi_name):
return NotImplemented

def cleanup_backing_images(self):
return NotImplemented

def list_backing_image_manager(self):
label_selector = 'longhorn.io/component=backing-image-manager'
return self.obj_api.list_namespaced_custom_object(
group="longhorn.io",
version="v1beta2",
namespace="longhorn-system",
plural="backingimagemanagers",
label_selector=label_selector)

def delete_backing_image_manager(self, name):
logging(f"deleting backing image manager {name} ...")
self.obj_api.delete_namespaced_custom_object(
group="longhorn.io",
version="v1beta2",
namespace="longhorn-system",
plural="backingimagemanagers",
name=name
)

def wait_all_backing_image_managers_running(self):
for i in range(self.retry_count):
all_running = True
backing_image_managers = self.list_backing_image_manager()
for backing_image_manager in backing_image_managers["items"]:
current_state = backing_image_manager["status"]["currentState"]
name = backing_image_manager["metadata"]["name"]
logging(f"backing image manager {name} currently in {current_state} state")
if current_state != "running":
all_running = False
if all_running is True:
return
time.sleep(self.retry_interval)
assert False, f"Waiting all backing image manager in running state timeout"

def wait_backing_image_manager_restart(self, name, last_creation_time):
for i in range(self.retry_count):
time.sleep(self.retry_interval)
try:
backing_image_manager = self.obj_api.get_namespaced_custom_object(
group="longhorn.io",
version="v1beta2",
namespace="longhorn-system",
plural="backingimagemanagers",
name=name
)
except Exception as e:
logging(f"Finding backing image manager {name} failed with error {e}")
continue

creation_time = backing_image_manager["metadata"]["creationTimestamp"]
fmt = "%Y-%m-%dT%H:%M:%SZ"
if datetime.strptime(creation_time, fmt) > datetime.strptime(last_creation_time, fmt):
return

assert False, f"Wait backing image manager {name} restart failed ..."
12 changes: 12 additions & 0 deletions e2e/libs/backing_image/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,15 @@ def cleanup_backing_images(self):
break
time.sleep(self.retry_interval)
assert len(get_longhorn_client().list_backing_image()) == 0

def delete_backing_image_manager(self, name):
return NotImplemented

def wait_all_backing_image_managers_running(self):
return NotImplemented

def wait_backing_image_manager_restart(self, name, last_creation_time):
return NotImplemented

def list_backing_image_manager(self):
return NotImplemented
27 changes: 25 additions & 2 deletions e2e/libs/k8s/k8s.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import time
import subprocess
import asyncio
import os
from kubernetes import client
from kubernetes.client.rest import ApiException
from workload.pod import create_pod
from workload.pod import delete_pod
from workload.pod import new_pod_manifest
from workload.pod import wait_for_pod_status
from workload.pod import get_pod
from workload.constant import IMAGE_UBUNTU
from utility.utility import subprocess_exec_cmd
from utility.utility import logging
Expand Down Expand Up @@ -95,6 +95,7 @@ def check_instance_manager_pdb_not_exist(instance_manager):
exec_cmd = ["kubectl", "get", "pdb", "-n", "longhorn-system"]
res = subprocess_exec_cmd(exec_cmd)
assert instance_manager not in res.decode('utf-8')

def wait_namespaced_job_complete(job_label, namespace):
retry_count, retry_interval = get_retry_count_and_interval()
api = client.BatchV1Api()
Expand Down Expand Up @@ -170,3 +171,25 @@ def delete_namespace(namespace):
api.delete_namespace(name=namespace)
except ApiException as e:
assert e.status == 404

def wait_for_namespace_pods_running(namespace):
retry_count, retry_interval = get_retry_count_and_interval()

for i in range(retry_count):
time.sleep(retry_interval)
pod_list = list_namespace_pods(namespace)
all_running = True

for pod in pod_list.items:
pod_name = pod.metadata.name
pod_status = pod.status.phase

if pod_status != "Running":
logging(f"Pod {pod_name} is in {pod_status} state, waiting...")
all_running = False

if all_running:
logging(f"All pods in namespace {namespace} are in Running state!")
return

assert False, f"wait all pod in namespace {namespace} running failed"
20 changes: 20 additions & 0 deletions e2e/libs/keywords/backing_image_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,23 @@ def delete_backing_image(self, bi_name):

def cleanup_backing_images(self):
self.backing_image.cleanup_backing_images()

def delete_backing_image_manager(self, name):
self.backing_image.delete_backing_image_manager(name)

def wait_all_backing_image_managers_running(self):
self.backing_image.wait_all_backing_image_managers_running()

def wait_backing_image_manager_restart(self, name, last_creation_time):
self.backing_image.wait_backing_image_manager_restart(name, last_creation_time)

def list_backing_image_manager(self):
return self.backing_image.list_backing_image_manager()

def delete_all_backing_image_managers_and_wait_for_recreation(self):
backing_image_managers = self.backing_image.list_backing_image_manager()
for backing_image in backing_image_managers["items"]:
name = backing_image["metadata"]["name"]
last_creation_time = backing_image["metadata"]["creationTimestamp"]
self.backing_image.delete_backing_image_manager(name)
self.backing_image.wait_backing_image_manager_restart(name, last_creation_time)
4 changes: 4 additions & 0 deletions e2e/libs/keywords/k8s_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from k8s.k8s import check_node_cordoned
from k8s.k8s import get_instance_manager_on_node
from k8s.k8s import check_instance_manager_pdb_not_exist
from k8s.k8s import wait_for_namespace_pods_running
from utility.utility import logging
from node import Node

Expand Down Expand Up @@ -78,3 +79,6 @@ def get_instance_manager_on_node(self, node_name):

def check_instance_manager_pdb_not_exist(self, instance_manager):
return check_instance_manager_pdb_not_exist(instance_manager)

def wait_for_namespace_pods_running(self, namespace):
return wait_for_namespace_pods_running(namespace)
12 changes: 12 additions & 0 deletions e2e/libs/keywords/sharemanager_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,15 @@ def wait_for_sharemanagers_deleted(self, name=[]):
time.sleep(retry_interval)

assert AssertionError, f"Failed to wait for all sharemanagers to be deleted"

def delete_sharemanager(self, name):
return self.sharemanager.delete(name)

def delete_sharemanager_and_wait_for_recreation(self, name):
sharemanager = self.sharemanager.get(name)
last_creation_time = sharemanager["metadata"]["creationTimestamp"]
self.sharemanager.delete(name)
self.sharemanager.wait_for_restart(name, last_creation_time)

def wait_for_share_manager_running(self, name):
return self.sharemanager.wait_for_running(name)
12 changes: 6 additions & 6 deletions e2e/libs/keywords/workload_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def create_pod(self, pod_name, claim_name):
logging(f'Creating pod {pod_name} using pvc {claim_name}')
create_pod(new_busybox_manifest(pod_name, claim_name))

def delete_pod(self, pod_name):
def delete_pod(self, pod_name, namespace='default'):
logging(f'Deleting pod {pod_name}')
delete_pod(pod_name)
delete_pod(pod_name, namespace)

def cleanup_pods(self):
cleanup_pods()
Expand All @@ -61,15 +61,15 @@ def check_pod_data_checksum(self, expected_checksum, pod_name, file_name):
logging(f'Checking checksum for file {file_name} in pod {pod_name}')
check_pod_data_checksum(expected_checksum, pod_name, file_name)

def delete_workload_pod_on_node(self, workload_name, node_name, namespace="default"):
pods = get_workload_pods(workload_name, namespace=namespace)
def delete_workload_pod_on_node(self, workload_name, node_name, namespace="default", label_selector=""):
pods = get_workload_pods(workload_name, namespace=namespace, label_selector=label_selector)
for pod in pods:
if pod.spec.node_name == node_name:
logging(f'Deleting pod {pod.metadata.name} on node {node_name}')
delete_pod(pod.metadata.name, namespace=namespace)

def get_workload_pod_name(self, workload_name):
return get_workload_pod_names(workload_name)[0]
def get_workload_pod_name(self, workload_name, namespace="default"):
return get_workload_pod_names(workload_name, namespace)[0]

def get_workload_persistent_volume_claim_name(self, workload_name):
return get_workload_persistent_volume_claim_name(workload_name)
Expand Down
16 changes: 16 additions & 0 deletions e2e/libs/sharemanager/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,19 @@ class Base(ABC):
@abstractmethod
def list(self):
return NotImplemented

@abstractmethod
def get(self, name):
return NotImplemented

@abstractmethod
def delete(self, name):
return NotImplemented

@abstractmethod
def wait_for_running(self, name):
return NotImplemented

@abstractmethod
def wait_for_restart(self, name, last_creation_time):
return NotImplemented
Loading

0 comments on commit 5aa1885

Please sign in to comment.