Test Automation:CEPH-83584090

Signed-off-by: Chaitanya Dommeti <[email protected]>
red-hat-storage · Jan 24, 2025 · c59fa43 · c59fa43
1 parent 339d916
commit c59fa43
Show file tree

Hide file tree

Showing 2 changed files with 331 additions and 0 deletions.
diff --git a/suites/squid/rbd/tier-3_rbd_migration.yaml b/suites/squid/rbd/tier-3_rbd_migration.yaml
@@ -137,3 +137,22 @@ tests:
       module: test_rbd_live_migrate_encryption.py
       name: Live migration of qcow2 format images with encryption
       polarion-id: CEPH-83596588
+
+  - test:
+      desc: Rollback after Failed Migration
+      config:
+        rep_pool_config:
+          num_pools: 1
+          do_not_create_image: true
+        ec_pool_config:
+          num_pools: 1
+          do_not_create_image: true
+        fio:
+          size: 1G
+          fs: ext4
+          io: true
+      module: test_rbd_abort_migration.py
+      name: >
+            Rollback after Failed Migration using migration
+            abort in single ceph cluster
+      polarion-id: CEPH-83584090
diff --git a/tests/rbd/test_rbd_abort_migration.py b/tests/rbd/test_rbd_abort_migration.py
@@ -0,0 +1,312 @@
+"""Module to verify Rollback after Failed Migration
+
+Test case covered -
+CEPH-83584090 - Rollback after Failed Migration using migration abort in single ceph cluster
+
+Pre-requisites :
+1. Cluster must be up and running with capacity to create pool
+2. We need atleast one client node with ceph-common package,
+   conf and keyring files
+
+Test Case Flow:
+1.Deploy single ceph cluster along with mon,mgr,osd’s
+2. Create two pools with rbd application enabled  for migration as source and destination pool
+3.Store the external qcow2 data in json spec
+    E.g: testspec.json
+    {
+        "type": "qcow",
+        "stream": {
+        "type": "file",
+    "file_path": "/mnt/image.qcow"
+        }
+    }
+4. Keep two files on image mounted path where one file with data filled and checksum noted.
+5.Execute prepare migration with import-only option for RBD image from an
+  external source of qcowdata  (specified by the JSON string in the echo command)
+  E.g:
+  echo '{"type":"qcow","stream":{"type":"http","url":"http://download.ceph.com/qa/ubuntu-12.04.raw"}}'
+    | rbd migration prepare --import-only --source-spec-path - <pool_name>/<image_name> --cluster <cluster-source>
+6. Initiate migration execute using migration execute command
+   E.g:
+   rbd migration execute TARGET_POOL_NAME/SOURCE_IMAGE_NAME
+7. Intentionally cause some failure to the cluster either osd node reboot or network failure
+8. Check the progress of migration using rbd status
+9. Attempt to rollback the failed migration using migration abort command
+10. Check the disk usage of the source and destination image for data transfer using rbd du command
+11. Verify data integrity by calculating MD5 sums for both the source RBD image and the original RBD image
+
+"""
+
+import tempfile
+from copy import deepcopy
+
+from ceph.rbd.initial_config import initial_rbd_config
+from ceph.rbd.utils import get_md5sum_rbd_image, getdict, random_string
+from ceph.rbd.workflows.cleanup import cleanup
+from ceph.rbd.workflows.krbd_io_handler import krbd_io_handler
+from ceph.rbd.workflows.migration import verify_migration_state
+from ceph.rbd.workflows.rbd import create_single_pool_and_images
+from ceph.utils import get_node_by_id
+from cli.rbd.rbd import Rbd
+from tests.rbd.rbd_utils import Rbd as rbdutils
+from utility.log import Log
+
+log = Log(__name__)
+
+
+def rollback_migration_encrypted(rbd_obj, client, **kw):
+    """
+        Test to verify Rollback using Migration abort
+    Args:
+        rbd_obj: RBD object
+        client : client node object
+        **kw: any other arguments
+    """
+
+    kw["client"] = client
+    rbd = rbd_obj.get("rbd")
+
+    for pool_type in rbd_obj.get("pool_types"):
+        rbd_config = kw.get("config", {}).get(pool_type, {})
+        multi_pool_config = deepcopy(getdict(rbd_config))
+
+        for pool, pool_config in multi_pool_config.items():
+            kw["pool-name"] = pool
+
+            # for encryption_type in kw.get("config", {}).get("encryption_type", {}):
+            # Create an RBD image in pool
+            image = "image_" + random_string(len=4)
+            out, err = rbd.create(**{"image-spec": f"{pool}/{image}", "size": 1024})
+            if err:
+                log.error(f"Create image {pool}/{image} failed with error {err}")
+                return 1
+            else:
+                log.info(f"Successfully created image {pool}/{image}")
+
+            # Map, mount and run IOs
+            fio = kw.get("config", {}).get("fio", {})
+            io_config = {
+                "rbd_obj": rbd,
+                "client": client,
+                "size": fio["size"],
+                "do_not_create_image": True,
+                "config": {
+                    "file_size": fio["size"],
+                    "file_path": [f"/mnt/mnt_{random_string(len=5)}/file"],
+                    "get_time_taken": True,
+                    "image_spec": [f"{pool}/{image}"],
+                    "operations": {
+                        "fs": "ext4",
+                        "io": True,
+                        "mount": True,
+                        "map": True,
+                        "nounmap": True,
+                    },
+                    "cmd_timeout": 2400,
+                    "io_type": "write",
+                },
+            }
+
+            out, err = krbd_io_handler(**io_config)
+            if err:
+                log.error(f"Map, mount and run IOs failed for encrypted {pool}/{image}")
+                return 1
+            else:
+                log.info(f"Map, mount and IOs successful for encrypted {pool}/{image}")
+
+            dev = io_config["config"]["device_names"][0][0].strip()
+            qcow_file = tempfile.mktemp(prefix=f"{image}_", suffix=".qcow2", dir="/tmp")
+
+            out, err = client.exec_command(
+                sudo=True,
+                cmd=f"qemu-img convert -f raw -O qcow2 {dev} {qcow_file}",
+            )
+            if err:
+                log.error(f"Image convert to qcow2 failed with err {err}")
+                return 1
+            else:
+                log.info("Successfully converted image to qcow2")
+
+            md5_before_migration = get_md5sum_rbd_image(
+                image_spec=f"{pool}/{image}",
+                rbd=rbd,
+                client=client,
+                file_path="file" + random_string(len=5),
+            )
+            log.info(
+                f"md5sum of source image before migration  is {md5_before_migration}"
+            )
+
+            kw["cleanup_files"].append(qcow_file)
+            qcow_spec = {
+                "type": "qcow",
+                "stream": {"type": "file", "file_path": f"{qcow_file}"},
+            }
+
+            # Create a target pool where the encrypted image is to be migrated
+            is_ec_pool = True if "ec" in pool_type else False
+            config = kw.get("config", {})
+            target_pool = "target_pool_" + random_string()
+            target_pool_config = {}
+            if is_ec_pool:
+                data_pool_target = "data_pool_new_" + random_string()
+                target_pool_config["data_pool"] = data_pool_target
+            rc = create_single_pool_and_images(
+                config=config,
+                pool=target_pool,
+                pool_config=target_pool_config,
+                client=client,
+                cluster="ceph",
+                rbd=rbd,
+                ceph_version=int(config.get("rhbuild")[0]),
+                is_ec_pool=is_ec_pool,
+                is_secondary=False,
+                do_not_create_image=True,
+            )
+            if rc:
+                log.error(f"Creation of target pool {target_pool} failed")
+                return rc
+
+            # Adding the new pool details to config so that they are handled in cleanup
+            if pool_type == "rep_pool_config":
+                kw["config"]["rep_pool_config"][target_pool] = {}
+            elif pool_type == "ec_pool_config":
+                kw["config"]["ec_pool_config"][target_pool] = {
+                    "data_pool": data_pool_target
+                }
+
+            # Prepare Migration
+            target_image = "target_image_" + random_string()
+            rbd.migration.prepare(
+                source_spec=qcow_spec,
+                dest_spec=f"{target_pool}/{target_image}",
+                client_node=client,
+            )
+
+            # Verify prepare migration status
+            if verify_migration_state(
+                action="prepare",
+                image_spec=f"{target_pool}/{target_image}",
+                **kw,
+            ):
+                log.error("Failed to prepare migration")
+                return 1
+            else:
+                log.info("Migration prepare status verfied successfully")
+
+            # # execute migration
+            rbd.migration.action(
+                action="execute",
+                dest_spec=f"{target_pool}/{target_image}",
+                client_node=client,
+            )
+
+            # verify execute migration status
+            if verify_migration_state(
+                action="execute",
+                image_spec=f"{target_pool}/{target_image}",
+                **kw,
+            ):
+                log.error("Failed to execute migration")
+                return 1
+            else:
+                log.info("Migration executed successfully")
+
+            # Migration can't really fail -- instead it remains "in progress" in perpetuity,
+            # even if a failure is simulated through network connectivity.
+            # When tried to introduce network delay or packet loss  using 'tc adisk',
+            # the execute operation just delays or waits for the network to complete but does not really fail.
+            # Hence, proceeded with the abort operation after 'Execute'.
+
+            # Abort the migration
+            rbd.migration.action(
+                action="abort",
+                dest_spec=f"{target_pool}/{target_image}",
+                client_node=client,
+            )
+            log.info("Migration abort executed successfully")
+
+            # verify target image does not exist after abort
+            rbdutil = rbdutils(**kw)
+            if rbdutil.image_exists(target_pool, target_image):
+                log.error(
+                    f"Image still exist after aborting the image migration in pool {target_pool}"
+                )
+                return 1
+            else:
+                log.info(
+                    f"Image {target_image} is not found in pool {target_pool} after aborting migration"
+                )
+
+            md5_after_abort = get_md5sum_rbd_image(
+                image_spec=f"{pool}/{image}",
+                rbd=rbd,
+                client=client,
+                file_path="file" + random_string(len=5),
+            )
+            log.info(
+                f"md5sum of source image after aborting migration  is {md5_after_abort}"
+            )
+
+            if md5_before_migration == md5_after_abort:
+                log.info(
+                    "md5sum of source image remains same before and after aborting migration"
+                )
+            else:
+                log.error(
+                    "md5sum of source image is not same before and after aborting migration"
+                )
+                return 1
+    return 0
+
+
+def run(**kw):
+    """
+    This test verifies Rollback using migration abort
+    Args:
+        kw: test data
+    Returns:
+        int: The return value. 0 for success, 1 otherwise
+
+    """
+    try:
+        log.info(
+            "CEPH-83584090 - Rollback after Failed Migration using migration abort in single ceph cluster"
+        )
+
+        if kw.get("client_node"):
+            client = get_node_by_id(kw.get("ceph_cluster"), kw.get("client_node"))
+        else:
+            client = kw.get("ceph_cluster").get_nodes(role="client")[0]
+        rbd_obj = initial_rbd_config(**kw)
+        pool_types = rbd_obj.get("pool_types")
+        kw.update({"cleanup_files": []})
+        if rbd_obj:
+            log.info("Executing test on Replicated and EC pool")
+            if rollback_migration_encrypted(rbd_obj, client, **kw):
+                return 1
+            log.info(
+                "Test Rollback after Failed Migration using migration abort passed"
+            )
+
+    except Exception as e:
+        log.error(
+            f"Test Rollback after Failed Migration using migration abort failed: {str(e)}"
+        )
+        return 1
+
+    finally:
+        try:
+            for file in kw["cleanup_files"]:
+                out, err = client.exec_command(sudo=True, cmd=f"rm -f {file}")
+                if err:
+                    log.error(f"Failed to delete file {file}")
+        except Exception as e:
+            log.error(f"Failed to cleanup temp files with err {e}")
+        cluster_name = kw.get("ceph_cluster", {}).name
+        if "rbd_obj" not in locals():
+            rbd_obj = Rbd(client)
+        obj = {cluster_name: rbd_obj}
+        cleanup(pool_types=pool_types, multi_cluster_obj=obj, **kw)
+
+    return 0