diff --git a/suites/squid/rbd/tier-3_rbd_migration.yaml b/suites/squid/rbd/tier-3_rbd_migration.yaml index c14fdd9ce64..dc432a30596 100644 --- a/suites/squid/rbd/tier-3_rbd_migration.yaml +++ b/suites/squid/rbd/tier-3_rbd_migration.yaml @@ -137,3 +137,22 @@ tests: module: test_rbd_live_migrate_encryption.py name: Live migration of qcow2 format images with encryption polarion-id: CEPH-83596588 + + - test: + desc: Rollback after Failed Migration + config: + rep_pool_config: + num_pools: 1 + do_not_create_image: true + ec_pool_config: + num_pools: 1 + do_not_create_image: true + fio: + size: 1G + fs: ext4 + io: true + module: test_rbd_abort_migration.py + name: > + Rollback after Failed Migration using migration + abort in single ceph cluster + polarion-id: CEPH-83584090 diff --git a/tests/rbd/test_rbd_abort_migration.py b/tests/rbd/test_rbd_abort_migration.py new file mode 100644 index 00000000000..834f3be4230 --- /dev/null +++ b/tests/rbd/test_rbd_abort_migration.py @@ -0,0 +1,312 @@ +"""Module to verify Rollback after Failed Migration + +Test case covered - +CEPH-83584090 - Rollback after Failed Migration using migration abort in single ceph cluster + +Pre-requisites : +1. Cluster must be up and running with capacity to create pool +2. We need atleast one client node with ceph-common package, + conf and keyring files + +Test Case Flow: +1.Deploy single ceph cluster along with mon,mgr,osd’s +2. Create two pools with rbd application enabled for migration as source and destination pool +3.Store the external qcow2 data in json spec + E.g: testspec.json + { + "type": "qcow", + "stream": { + "type": "file", + "file_path": "/mnt/image.qcow" + } + } +4. Keep two files on image mounted path where one file with data filled and checksum noted. +5.Execute prepare migration with import-only option for RBD image from an + external source of qcowdata (specified by the JSON string in the echo command) + E.g: + echo '{"type":"qcow","stream":{"type":"http","url":"http://download.ceph.com/qa/ubuntu-12.04.raw"}}' + | rbd migration prepare --import-only --source-spec-path - / --cluster +6. Initiate migration execute using migration execute command + E.g: + rbd migration execute TARGET_POOL_NAME/SOURCE_IMAGE_NAME +7. Intentionally cause some failure to the cluster either osd node reboot or network failure +8. Check the progress of migration using rbd status +9. Attempt to rollback the failed migration using migration abort command +10. Check the disk usage of the source and destination image for data transfer using rbd du command +11. Verify data integrity by calculating MD5 sums for both the source RBD image and the original RBD image + +""" + +import tempfile +from copy import deepcopy + +from ceph.rbd.initial_config import initial_rbd_config +from ceph.rbd.utils import get_md5sum_rbd_image, getdict, random_string +from ceph.rbd.workflows.cleanup import cleanup +from ceph.rbd.workflows.krbd_io_handler import krbd_io_handler +from ceph.rbd.workflows.migration import verify_migration_state +from ceph.rbd.workflows.rbd import create_single_pool_and_images +from ceph.utils import get_node_by_id +from cli.rbd.rbd import Rbd +from tests.rbd.rbd_utils import Rbd as rbdutils +from utility.log import Log + +log = Log(__name__) + + +def rollback_migration_encrypted(rbd_obj, client, **kw): + """ + Test to verify Rollback using Migration abort + Args: + rbd_obj: RBD object + client : client node object + **kw: any other arguments + """ + + kw["client"] = client + rbd = rbd_obj.get("rbd") + + for pool_type in rbd_obj.get("pool_types"): + rbd_config = kw.get("config", {}).get(pool_type, {}) + multi_pool_config = deepcopy(getdict(rbd_config)) + + for pool, pool_config in multi_pool_config.items(): + kw["pool-name"] = pool + + # for encryption_type in kw.get("config", {}).get("encryption_type", {}): + # Create an RBD image in pool + image = "image_" + random_string(len=4) + out, err = rbd.create(**{"image-spec": f"{pool}/{image}", "size": 1024}) + if err: + log.error(f"Create image {pool}/{image} failed with error {err}") + return 1 + else: + log.info(f"Successfully created image {pool}/{image}") + + # Map, mount and run IOs + fio = kw.get("config", {}).get("fio", {}) + io_config = { + "rbd_obj": rbd, + "client": client, + "size": fio["size"], + "do_not_create_image": True, + "config": { + "file_size": fio["size"], + "file_path": [f"/mnt/mnt_{random_string(len=5)}/file"], + "get_time_taken": True, + "image_spec": [f"{pool}/{image}"], + "operations": { + "fs": "ext4", + "io": True, + "mount": True, + "map": True, + "nounmap": True, + }, + "cmd_timeout": 2400, + "io_type": "write", + }, + } + + out, err = krbd_io_handler(**io_config) + if err: + log.error(f"Map, mount and run IOs failed for encrypted {pool}/{image}") + return 1 + else: + log.info(f"Map, mount and IOs successful for encrypted {pool}/{image}") + + dev = io_config["config"]["device_names"][0][0].strip() + qcow_file = tempfile.mktemp(prefix=f"{image}_", suffix=".qcow2", dir="/tmp") + + out, err = client.exec_command( + sudo=True, + cmd=f"qemu-img convert -f raw -O qcow2 {dev} {qcow_file}", + ) + if err: + log.error(f"Image convert to qcow2 failed with err {err}") + return 1 + else: + log.info("Successfully converted image to qcow2") + + md5_before_migration = get_md5sum_rbd_image( + image_spec=f"{pool}/{image}", + rbd=rbd, + client=client, + file_path="file" + random_string(len=5), + ) + log.info( + f"md5sum of source image before migration is {md5_before_migration}" + ) + + kw["cleanup_files"].append(qcow_file) + qcow_spec = { + "type": "qcow", + "stream": {"type": "file", "file_path": f"{qcow_file}"}, + } + + # Create a target pool where the encrypted image is to be migrated + is_ec_pool = True if "ec" in pool_type else False + config = kw.get("config", {}) + target_pool = "target_pool_" + random_string() + target_pool_config = {} + if is_ec_pool: + data_pool_target = "data_pool_new_" + random_string() + target_pool_config["data_pool"] = data_pool_target + rc = create_single_pool_and_images( + config=config, + pool=target_pool, + pool_config=target_pool_config, + client=client, + cluster="ceph", + rbd=rbd, + ceph_version=int(config.get("rhbuild")[0]), + is_ec_pool=is_ec_pool, + is_secondary=False, + do_not_create_image=True, + ) + if rc: + log.error(f"Creation of target pool {target_pool} failed") + return rc + + # Adding the new pool details to config so that they are handled in cleanup + if pool_type == "rep_pool_config": + kw["config"]["rep_pool_config"][target_pool] = {} + elif pool_type == "ec_pool_config": + kw["config"]["ec_pool_config"][target_pool] = { + "data_pool": data_pool_target + } + + # Prepare Migration + target_image = "target_image_" + random_string() + rbd.migration.prepare( + source_spec=qcow_spec, + dest_spec=f"{target_pool}/{target_image}", + client_node=client, + ) + + # Verify prepare migration status + if verify_migration_state( + action="prepare", + image_spec=f"{target_pool}/{target_image}", + **kw, + ): + log.error("Failed to prepare migration") + return 1 + else: + log.info("Migration prepare status verfied successfully") + + # # execute migration + rbd.migration.action( + action="execute", + dest_spec=f"{target_pool}/{target_image}", + client_node=client, + ) + + # verify execute migration status + if verify_migration_state( + action="execute", + image_spec=f"{target_pool}/{target_image}", + **kw, + ): + log.error("Failed to execute migration") + return 1 + else: + log.info("Migration executed successfully") + + # Migration can't really fail -- instead it remains "in progress" in perpetuity, + # even if a failure is simulated through network connectivity. + # When tried to introduce network delay or packet loss using 'tc adisk', + # the execute operation just delays or waits for the network to complete but does not really fail. + # Hence, proceeded with the abort operation after 'Execute'. + + # Abort the migration + rbd.migration.action( + action="abort", + dest_spec=f"{target_pool}/{target_image}", + client_node=client, + ) + log.info("Migration abort executed successfully") + + # verify target image does not exist after abort + rbdutil = rbdutils(**kw) + if rbdutil.image_exists(target_pool, target_image): + log.error( + f"Image still exist after aborting the image migration in pool {target_pool}" + ) + return 1 + else: + log.info( + f"Image {target_image} is not found in pool {target_pool} after aborting migration" + ) + + md5_after_abort = get_md5sum_rbd_image( + image_spec=f"{pool}/{image}", + rbd=rbd, + client=client, + file_path="file" + random_string(len=5), + ) + log.info( + f"md5sum of source image after aborting migration is {md5_after_abort}" + ) + + if md5_before_migration == md5_after_abort: + log.info( + "md5sum of source image remains same before and after aborting migration" + ) + else: + log.error( + "md5sum of source image is not same before and after aborting migration" + ) + return 1 + return 0 + + +def run(**kw): + """ + This test verifies Rollback using migration abort + Args: + kw: test data + Returns: + int: The return value. 0 for success, 1 otherwise + + """ + try: + log.info( + "CEPH-83584090 - Rollback after Failed Migration using migration abort in single ceph cluster" + ) + + if kw.get("client_node"): + client = get_node_by_id(kw.get("ceph_cluster"), kw.get("client_node")) + else: + client = kw.get("ceph_cluster").get_nodes(role="client")[0] + rbd_obj = initial_rbd_config(**kw) + pool_types = rbd_obj.get("pool_types") + kw.update({"cleanup_files": []}) + if rbd_obj: + log.info("Executing test on Replicated and EC pool") + if rollback_migration_encrypted(rbd_obj, client, **kw): + return 1 + log.info( + "Test Rollback after Failed Migration using migration abort passed" + ) + + except Exception as e: + log.error( + f"Test Rollback after Failed Migration using migration abort failed: {str(e)}" + ) + return 1 + + finally: + try: + for file in kw["cleanup_files"]: + out, err = client.exec_command(sudo=True, cmd=f"rm -f {file}") + if err: + log.error(f"Failed to delete file {file}") + except Exception as e: + log.error(f"Failed to cleanup temp files with err {e}") + cluster_name = kw.get("ceph_cluster", {}).name + if "rbd_obj" not in locals(): + rbd_obj = Rbd(client) + obj = {cluster_name: rbd_obj} + cleanup(pool_types=pool_types, multi_cluster_obj=obj, **kw) + + return 0