Skip to content

Commit

Permalink
Test Automation:CEPH-83584090
Browse files Browse the repository at this point in the history
Signed-off-by: Chaitanya Dommeti <[email protected]>
  • Loading branch information
Chaitanya Dommeti authored and Chaitanya Dommeti committed Jan 24, 2025
1 parent 339d916 commit c59fa43
Show file tree
Hide file tree
Showing 2 changed files with 331 additions and 0 deletions.
19 changes: 19 additions & 0 deletions suites/squid/rbd/tier-3_rbd_migration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,22 @@ tests:
module: test_rbd_live_migrate_encryption.py
name: Live migration of qcow2 format images with encryption
polarion-id: CEPH-83596588

- test:
desc: Rollback after Failed Migration
config:
rep_pool_config:
num_pools: 1
do_not_create_image: true
ec_pool_config:
num_pools: 1
do_not_create_image: true
fio:
size: 1G
fs: ext4
io: true
module: test_rbd_abort_migration.py
name: >
Rollback after Failed Migration using migration
abort in single ceph cluster
polarion-id: CEPH-83584090
312 changes: 312 additions & 0 deletions tests/rbd/test_rbd_abort_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
"""Module to verify Rollback after Failed Migration
Test case covered -
CEPH-83584090 - Rollback after Failed Migration using migration abort in single ceph cluster
Pre-requisites :
1. Cluster must be up and running with capacity to create pool
2. We need atleast one client node with ceph-common package,
conf and keyring files
Test Case Flow:
1.Deploy single ceph cluster along with mon,mgr,osd’s
2. Create two pools with rbd application enabled for migration as source and destination pool
3.Store the external qcow2 data in json spec
E.g: testspec.json
{
"type": "qcow",
"stream": {
"type": "file",
"file_path": "/mnt/image.qcow"
}
}
4. Keep two files on image mounted path where one file with data filled and checksum noted.
5.Execute prepare migration with import-only option for RBD image from an
external source of qcowdata (specified by the JSON string in the echo command)
E.g:
echo '{"type":"qcow","stream":{"type":"http","url":"http://download.ceph.com/qa/ubuntu-12.04.raw"}}'
| rbd migration prepare --import-only --source-spec-path - <pool_name>/<image_name> --cluster <cluster-source>
6. Initiate migration execute using migration execute command
E.g:
rbd migration execute TARGET_POOL_NAME/SOURCE_IMAGE_NAME
7. Intentionally cause some failure to the cluster either osd node reboot or network failure
8. Check the progress of migration using rbd status
9. Attempt to rollback the failed migration using migration abort command
10. Check the disk usage of the source and destination image for data transfer using rbd du command
11. Verify data integrity by calculating MD5 sums for both the source RBD image and the original RBD image
"""

import tempfile
from copy import deepcopy

from ceph.rbd.initial_config import initial_rbd_config
from ceph.rbd.utils import get_md5sum_rbd_image, getdict, random_string
from ceph.rbd.workflows.cleanup import cleanup
from ceph.rbd.workflows.krbd_io_handler import krbd_io_handler
from ceph.rbd.workflows.migration import verify_migration_state
from ceph.rbd.workflows.rbd import create_single_pool_and_images
from ceph.utils import get_node_by_id
from cli.rbd.rbd import Rbd
from tests.rbd.rbd_utils import Rbd as rbdutils
from utility.log import Log

log = Log(__name__)


def rollback_migration_encrypted(rbd_obj, client, **kw):
"""
Test to verify Rollback using Migration abort
Args:
rbd_obj: RBD object
client : client node object
**kw: any other arguments
"""

kw["client"] = client
rbd = rbd_obj.get("rbd")

for pool_type in rbd_obj.get("pool_types"):
rbd_config = kw.get("config", {}).get(pool_type, {})
multi_pool_config = deepcopy(getdict(rbd_config))

for pool, pool_config in multi_pool_config.items():
kw["pool-name"] = pool

# for encryption_type in kw.get("config", {}).get("encryption_type", {}):
# Create an RBD image in pool
image = "image_" + random_string(len=4)
out, err = rbd.create(**{"image-spec": f"{pool}/{image}", "size": 1024})
if err:
log.error(f"Create image {pool}/{image} failed with error {err}")
return 1
else:
log.info(f"Successfully created image {pool}/{image}")

# Map, mount and run IOs
fio = kw.get("config", {}).get("fio", {})
io_config = {
"rbd_obj": rbd,
"client": client,
"size": fio["size"],
"do_not_create_image": True,
"config": {
"file_size": fio["size"],
"file_path": [f"/mnt/mnt_{random_string(len=5)}/file"],
"get_time_taken": True,
"image_spec": [f"{pool}/{image}"],
"operations": {
"fs": "ext4",
"io": True,
"mount": True,
"map": True,
"nounmap": True,
},
"cmd_timeout": 2400,
"io_type": "write",
},
}

out, err = krbd_io_handler(**io_config)
if err:
log.error(f"Map, mount and run IOs failed for encrypted {pool}/{image}")
return 1
else:
log.info(f"Map, mount and IOs successful for encrypted {pool}/{image}")

dev = io_config["config"]["device_names"][0][0].strip()
qcow_file = tempfile.mktemp(prefix=f"{image}_", suffix=".qcow2", dir="/tmp")

out, err = client.exec_command(
sudo=True,
cmd=f"qemu-img convert -f raw -O qcow2 {dev} {qcow_file}",
)
if err:
log.error(f"Image convert to qcow2 failed with err {err}")
return 1
else:
log.info("Successfully converted image to qcow2")

md5_before_migration = get_md5sum_rbd_image(
image_spec=f"{pool}/{image}",
rbd=rbd,
client=client,
file_path="file" + random_string(len=5),
)
log.info(
f"md5sum of source image before migration is {md5_before_migration}"
)

kw["cleanup_files"].append(qcow_file)
qcow_spec = {
"type": "qcow",
"stream": {"type": "file", "file_path": f"{qcow_file}"},
}

# Create a target pool where the encrypted image is to be migrated
is_ec_pool = True if "ec" in pool_type else False
config = kw.get("config", {})
target_pool = "target_pool_" + random_string()
target_pool_config = {}
if is_ec_pool:
data_pool_target = "data_pool_new_" + random_string()
target_pool_config["data_pool"] = data_pool_target
rc = create_single_pool_and_images(
config=config,
pool=target_pool,
pool_config=target_pool_config,
client=client,
cluster="ceph",
rbd=rbd,
ceph_version=int(config.get("rhbuild")[0]),
is_ec_pool=is_ec_pool,
is_secondary=False,
do_not_create_image=True,
)
if rc:
log.error(f"Creation of target pool {target_pool} failed")
return rc

# Adding the new pool details to config so that they are handled in cleanup
if pool_type == "rep_pool_config":
kw["config"]["rep_pool_config"][target_pool] = {}
elif pool_type == "ec_pool_config":
kw["config"]["ec_pool_config"][target_pool] = {
"data_pool": data_pool_target
}

# Prepare Migration
target_image = "target_image_" + random_string()
rbd.migration.prepare(
source_spec=qcow_spec,
dest_spec=f"{target_pool}/{target_image}",
client_node=client,
)

# Verify prepare migration status
if verify_migration_state(
action="prepare",
image_spec=f"{target_pool}/{target_image}",
**kw,
):
log.error("Failed to prepare migration")
return 1
else:
log.info("Migration prepare status verfied successfully")

# # execute migration
rbd.migration.action(
action="execute",
dest_spec=f"{target_pool}/{target_image}",
client_node=client,
)

# verify execute migration status
if verify_migration_state(
action="execute",
image_spec=f"{target_pool}/{target_image}",
**kw,
):
log.error("Failed to execute migration")
return 1
else:
log.info("Migration executed successfully")

# Migration can't really fail -- instead it remains "in progress" in perpetuity,
# even if a failure is simulated through network connectivity.
# When tried to introduce network delay or packet loss using 'tc adisk',
# the execute operation just delays or waits for the network to complete but does not really fail.
# Hence, proceeded with the abort operation after 'Execute'.

# Abort the migration
rbd.migration.action(
action="abort",
dest_spec=f"{target_pool}/{target_image}",
client_node=client,
)
log.info("Migration abort executed successfully")

# verify target image does not exist after abort
rbdutil = rbdutils(**kw)
if rbdutil.image_exists(target_pool, target_image):
log.error(
f"Image still exist after aborting the image migration in pool {target_pool}"
)
return 1
else:
log.info(
f"Image {target_image} is not found in pool {target_pool} after aborting migration"
)

md5_after_abort = get_md5sum_rbd_image(
image_spec=f"{pool}/{image}",
rbd=rbd,
client=client,
file_path="file" + random_string(len=5),
)
log.info(
f"md5sum of source image after aborting migration is {md5_after_abort}"
)

if md5_before_migration == md5_after_abort:
log.info(
"md5sum of source image remains same before and after aborting migration"
)
else:
log.error(
"md5sum of source image is not same before and after aborting migration"
)
return 1
return 0


def run(**kw):
"""
This test verifies Rollback using migration abort
Args:
kw: test data
Returns:
int: The return value. 0 for success, 1 otherwise
"""
try:
log.info(
"CEPH-83584090 - Rollback after Failed Migration using migration abort in single ceph cluster"
)

if kw.get("client_node"):
client = get_node_by_id(kw.get("ceph_cluster"), kw.get("client_node"))
else:
client = kw.get("ceph_cluster").get_nodes(role="client")[0]
rbd_obj = initial_rbd_config(**kw)
pool_types = rbd_obj.get("pool_types")
kw.update({"cleanup_files": []})
if rbd_obj:
log.info("Executing test on Replicated and EC pool")
if rollback_migration_encrypted(rbd_obj, client, **kw):
return 1
log.info(
"Test Rollback after Failed Migration using migration abort passed"
)

except Exception as e:
log.error(
f"Test Rollback after Failed Migration using migration abort failed: {str(e)}"
)
return 1

finally:
try:
for file in kw["cleanup_files"]:
out, err = client.exec_command(sudo=True, cmd=f"rm -f {file}")
if err:
log.error(f"Failed to delete file {file}")
except Exception as e:
log.error(f"Failed to cleanup temp files with err {e}")
cluster_name = kw.get("ceph_cluster", {}).name
if "rbd_obj" not in locals():
rbd_obj = Rbd(client)
obj = {cluster_name: rbd_obj}
cleanup(pool_types=pool_types, multi_cluster_obj=obj, **kw)

return 0

0 comments on commit c59fa43

Please sign in to comment.