Skip to content

Commit

Permalink
mdadm: Do not start reshape before switchroot
Browse files Browse the repository at this point in the history
There are numerous issues for --grow --continue in switchroot phrase,
they include:
* Events being missed for restarting grow-continue service. This is
  apparent mostly on OS on RAID scenarios. When a checkpoint (next step)
  is committed, we have no reliable way to gracefully stop reshape until
  it reaches that checkpoint. During boot, there's heavy I/O utilisation,
  which causes sync speed drop, and naturally checkpoint takes longer to
  reach. This further causes systemd to forcefully kill grow-continue
  service due to timeouts, which results in udev event being missed for
  grow-continue service restart.
* Grow-continue (seemingly) was not designed to be restarted without
  reassembly, some things like stopping chunksize (to lower) migration
  were straight up not working until recently.
This patch makes grow-continue (actual reshape) start after switchroot
phrase. This way we should not encounter issues related to restarting
the service.

Add checks not start a reshape if in initrd, let it initialise only.
Change grow-continue udev rule to be triggered whenever there's a
reshape happening in metadata, rely on udev event to kick reshape after
switchroot. Add handle_forking helper function for reshapes to avoid
duplicating code.

Signed-off-by: Mateusz Kusiak <[email protected]>
  • Loading branch information
dancesWithMachines authored and mtkaczyk committed Jan 13, 2025
1 parent 25267bc commit 8a0d3fe
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 27 deletions.
81 changes: 57 additions & 24 deletions Grow.c
Original file line number Diff line number Diff line change
Expand Up @@ -2995,6 +2995,34 @@ static void catch_term(int sig)
sigterm = 1;
}


/**
* handle_forking() - Handle reshape forking.
*
* @forked: if already forked.
* @devname: device name.
* Returns: -1 if fork() failed,
* 0 if child process,
* 1 if job delegated to forked process or systemd.
*
* This function is a helper function for reshapes for fork handling.
*/
static mdadm_status_t handle_forking(bool forked, char *devname)
{
if (forked)
return MDADM_STATUS_FORKED;

if (devname && continue_via_systemd(devname, GROW_SERVICE, NULL))
return MDADM_STATUS_SUCCESS;

switch (fork()) {
case -1: return MDADM_STATUS_ERROR; /* error */
case 0: return MDADM_STATUS_FORKED; /* child */
default: return MDADM_STATUS_SUCCESS; /* parent */
}

}

static int reshape_array(char *container, int fd, char *devname,
struct supertype *st, struct mdinfo *info,
int force, struct mddev_dev *devlist,
Expand Down Expand Up @@ -3485,33 +3513,35 @@ static int reshape_array(char *container, int fd, char *devname,
if (restart)
sysfs_set_str(sra, NULL, "array_state", "active");

if (!forked)
if (continue_via_systemd(container ?: sra->sys_name,
GROW_SERVICE, NULL)) {
free(fdlist);
free(offsets);
sysfs_free(sra);
return 0;
}
/* Do not run in initrd */
if (in_initrd()) {
free(fdlist);
free(offsets);
sysfs_free(sra);
pr_info("Reshape has to be continued from location %llu when root filesystem has been mounted.\n",
sra->reshape_progress);
return 1;
}

/* Now we just need to kick off the reshape and watch, while
* handling backups of the data...
* This is all done by a forked background process.
*/
switch(forked ? 0 : fork()) {
case -1:
switch (handle_forking(forked, container ? container : sra->sys_name)) {
default: /* Unused, only to satisfy compiler. */
case MDADM_STATUS_ERROR: /* error */
pr_err("Cannot run child to monitor reshape: %s\n",
strerror(errno));
abort_reshape(sra);
goto release;
default:
case MDADM_STATUS_FORKED: /* child */
map_fork();
break;
case MDADM_STATUS_SUCCESS: /* parent */
free(fdlist);
free(offsets);
sysfs_free(sra);
return 0;
case 0:
map_fork();
break;
}

/* Close unused file descriptor in the forked process */
Expand Down Expand Up @@ -3680,22 +3710,19 @@ int reshape_container(char *container, char *devname,
*/
ping_monitor(container);

if (!forked)
if (continue_via_systemd(container, GROW_SERVICE, NULL))
return 0;

switch (forked ? 0 : fork()) {
case -1: /* error */
switch (handle_forking(forked, container)) {
default: /* Unused, only to satisfy compiler. */
case MDADM_STATUS_ERROR: /* error */
perror("Cannot fork to complete reshape\n");
unfreeze(st);
return 1;
default: /* parent */
printf("%s: multi-array reshape continues in background\n", Name);
return 0;
case 0: /* child */
case MDADM_STATUS_FORKED: /* child */
manage_fork_fds(0);
map_fork();
break;
case MDADM_STATUS_SUCCESS: /* parent */
printf("%s: multi-array reshape continues in background\n", Name);
return 0;
}

/* close unused handle in child process
Expand Down Expand Up @@ -3791,6 +3818,12 @@ int reshape_container(char *container, char *devname,
c->backup_file, c->verbose, 1, restart);
close(fd);

/* Do not run reshape in initrd but let it initialize.*/
if (in_initrd()) {
sysfs_free(cc);
exit(0);
}

restart = 0;
if (rv)
break;
Expand Down
3 changes: 2 additions & 1 deletion mdadm_status.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ typedef enum mdadm_status {
MDADM_STATUS_SUCCESS = 0,
MDADM_STATUS_ERROR,
MDADM_STATUS_UNDEF,
MDADM_STATUS_MEM_FAIL
MDADM_STATUS_MEM_FAIL,
MDADM_STATUS_FORKED
} mdadm_status_t;

#endif
3 changes: 1 addition & 2 deletions udev-md-raid-arrays.rules
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state"
ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state"
TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end"
ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end"
ATTR{md/sync_action}=="reshape", ENV{RESHAPE_ACTIVE}="yes"
LABEL="md_ignore_state"

IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode"
Expand All @@ -40,6 +39,6 @@ ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service"
ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/usr/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c"
ENV{MD_MON_THIS}=="?*", TEST=="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@initrd-%c.service"
ENV{MD_MON_THIS}=="?*", TEST!="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service"
ENV{RESHAPE_ACTIVE}=="yes", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service"
ENV{MD_RESHAPE_ACTIVE}=="True", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service"

LABEL="md_end"
1 change: 1 addition & 0 deletions util.c
Original file line number Diff line number Diff line change
Expand Up @@ -2307,6 +2307,7 @@ int continue_via_systemd(char *devnm, char *service_name, char *prefix)
int pid, status;
char pathbuf[1024];

dprintf("Start %s service\n", service_name);
/* Simply return that service cannot be started */
if (check_env("MDADM_NO_SYSTEMCTL"))
return 0;
Expand Down

0 comments on commit 8a0d3fe

Please sign in to comment.