Skip to content

Commit

Permalink
nvmf: Subsystem pauses only pause admin queues
Browse files Browse the repository at this point in the history
Additionally, the user can specify a namespace to also pause during the
operation.

This allows for the management of hosts, listeners, and the addition of
namespaces all while I/O to other namespaces is occurring. Pausing a
specific namespace also allows for the removal of that namespace without
impacting I/O to other namespaces in the subsystem.

Change-Id: I364336df16df92fe2069114674cb7a68076de6fb
Signed-off-by: Ben Walker <[email protected]>
Signed-off-by: Jim Harris <[email protected]>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4997
Community-CI: Broadcom CI
Tested-by: SPDK CI Jenkins <[email protected]>
Reviewed-by: Tomasz Zawadzki <[email protected]>
  • Loading branch information
Ben Walker authored and tomzawadzki committed Jan 26, 2021
1 parent 7665710 commit 312a9d6
Show file tree
Hide file tree
Showing 13 changed files with 244 additions and 79 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,13 @@ that allowed transport specific options within subsystem.

The `trsvcid` in `nvmf_subsystem_add_listener` RPC is now optional.

Pausing a subsystem now only pauses all admin queues. This allows for the
management of hosts and listeners, plus the addition of namespaces without a
full subsystem pause. Additionally, the target now allows for pausing
individual namespaces within a subsystem. To remove a namespace from a
subsystem, only the specific namespace must be paused. I/O will continue to
other namespaces while these operations execute.

### rpc

An new optional parameter `wait` was added to the RPC `iscsi_create_portal_group`,
Expand Down
11 changes: 11 additions & 0 deletions include/spdk/nvmf.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,20 +447,28 @@ int spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
/**
* Transition an NVMe-oF subsystem from Active to Paused state.
*
* In a paused state, all admin queues are frozen across the whole subsystem. If
* a namespace ID is provided, all commands to that namespace are quiesced and incoming
* commands for that namespace are queued until the subsystem is resumed.
*
* \param subsystem The NVMe-oF subsystem.
* \param nsid The namespace to pause. If 0, pause no namespaces.
* \param cb_fn A function that will be called once the subsystem has changed state.
* \param cb_arg Argument passed to cb_fn.
*
* \return 0 on success, or negated errno on failure. The callback provided will only
* be called on success.
*/
int spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
uint32_t nsid,
spdk_nvmf_subsystem_state_change_done cb_fn,
void *cb_arg);

/**
* Transition an NVMe-oF subsystem from Paused to Active state.
*
* This resumes the entire subsystem, including any paused namespaces.
*
* \param subsystem The NVMe-oF subsystem.
* \param cb_fn A function that will be called once the subsystem has changed state.
* \param cb_arg Argument passed to cb_fn.
Expand Down Expand Up @@ -613,6 +621,7 @@ const char *spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host);
* This does not start the listener. Use spdk_nvmf_tgt_listen() for that.
*
* May only be performed on subsystems in the PAUSED or INACTIVE states.
* No namespaces are required to be paused.
*
* \param subsystem Subsystem to add listener to.
* \param trid The address to accept connections from.
Expand All @@ -632,6 +641,7 @@ void spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
* spdk_nvmf_tgt_stop_listen().
*
* May only be performed on subsystems in the PAUSED or INACTIVE states.
* No namespaces are required to be paused.
*
* \param subsystem Subsystem to remove listener from.
* \param trid The address to no longer accept connections from.
Expand Down Expand Up @@ -801,6 +811,7 @@ uint32_t spdk_nvmf_subsystem_add_ns_ext(struct spdk_nvmf_subsystem *subsystem,
* Remove a namespace from a subsytem.
*
* May only be performed on subsystems in the PAUSED or INACTIVE states.
* Additionally, the namespace must be paused.
*
* \param subsystem Subsystem the namespace belong to.
* \param nsid Namespace ID to be removed.
Expand Down
107 changes: 83 additions & 24 deletions lib/nvmf/ctrlr.c
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
goto out;
}

sgroup->io_outstanding++;
sgroup->mgmt_io_outstanding++;
TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);

status = _nvmf_ctrlr_connect(req);
Expand Down Expand Up @@ -1672,7 +1672,7 @@ nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
/* AER cmd is an exception */
sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
assert(sgroup != NULL);
sgroup->io_outstanding--;
sgroup->mgmt_io_outstanding--;

/* Four asynchronous events are supported for now */
if (ctrlr->nr_aer_reqs >= NVMF_MAX_ASYNC_EVENTS) {
Expand Down Expand Up @@ -3463,7 +3463,10 @@ _nvmf_request_complete(void *ctx)
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
struct spdk_nvmf_qpair *qpair;
struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
bool is_aer = false;
uint32_t nsid;
bool paused;

rsp->sqid = 0;
rsp->status.p = 0;
Expand All @@ -3489,15 +3492,40 @@ _nvmf_request_complete(void *ctx)

/* AER cmd is an exception */
if (sgroup && !is_aer) {
assert(sgroup->io_outstanding > 0);
sgroup->io_outstanding--;
if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
sgroup->io_outstanding == 0) {
sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
sgroup->cb_fn(sgroup->cb_arg, 0);
sgroup->cb_fn = NULL;
sgroup->cb_arg = NULL;
if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC ||
nvmf_qpair_is_admin_queue(qpair))) {
assert(sgroup->mgmt_io_outstanding > 0);
sgroup->mgmt_io_outstanding--;
} else {
nsid = req->cmd->nvme_cmd.nsid;

/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
if (spdk_likely(nsid - 1 < sgroup->num_ns)) {
sgroup->ns_info[nsid - 1].io_outstanding--;
}
}

if (spdk_unlikely(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
sgroup->mgmt_io_outstanding == 0)) {
paused = true;
for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
ns_info = &sgroup->ns_info[nsid];

if (ns_info->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
ns_info->io_outstanding > 0) {
paused = false;
break;
}
}

if (paused) {
sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
sgroup->cb_fn(sgroup->cb_arg, 0);
sgroup->cb_fn = NULL;
sgroup->cb_arg = NULL;
}
}

}

nvmf_qpair_request_cleanup(qpair);
Expand Down Expand Up @@ -3532,7 +3560,7 @@ spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req)
}

assert(sgroup != NULL);
sgroup->io_outstanding++;
sgroup->mgmt_io_outstanding++;

/* Place the request on the outstanding list so we can keep track of it */
TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
Expand All @@ -3550,7 +3578,9 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
{
struct spdk_nvmf_qpair *qpair = req->qpair;
struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
enum spdk_nvmf_request_exec_status status;
uint32_t nsid;

if (qpair->ctrlr) {
sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
Expand All @@ -3561,22 +3591,55 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)

/* Check if the subsystem is paused (if there is a subsystem) */
if (sgroup != NULL) {
if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
/* The subsystem is not currently active. Queue this request. */
TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
return;
if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC ||
nvmf_qpair_is_admin_queue(qpair))) {
if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
/* The subsystem is not currently active. Queue this request. */
TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
return;
}
sgroup->mgmt_io_outstanding++;
} else {
nsid = req->cmd->nvme_cmd.nsid;

/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
if (spdk_unlikely(nsid - 1 >= sgroup->num_ns)) {
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
req->rsp->nvme_cpl.status.dnr = 1;
TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
_nvmf_request_complete(req);
return;
}

ns_info = &sgroup->ns_info[nsid - 1];
if (ns_info->channel == NULL) {
/* This can can happen if host sends I/O to a namespace that is
* in the process of being added, but before the full addition
* process is complete. Report invalid namespace in that case.
*/
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
req->rsp->nvme_cpl.status.dnr = 1;
TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
ns_info->io_outstanding++;
_nvmf_request_complete(req);
return;
}

if (ns_info->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
/* The namespace is not currently active. Queue this request. */
TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
return;
}
ns_info->io_outstanding++;
}
}

if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
/* Place the request on the outstanding list so we can keep track of it */
TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
/* Still increment io_outstanding because request_complete decrements it */
if (sgroup != NULL) {
sgroup->io_outstanding++;
}
_nvmf_request_complete(req);
return;
}
Expand All @@ -3585,10 +3648,6 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
spdk_nvme_print_command(qpair->qid, &req->cmd->nvme_cmd);
}

if (sgroup) {
sgroup->io_outstanding++;
}

/* Place the request on the outstanding list so we can keep track of it */
TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);

Expand Down
1 change: 1 addition & 0 deletions lib/nvmf/fc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3151,6 +3151,7 @@ nvmf_fc_adm_add_rem_nport_listener(struct spdk_nvmf_fc_nport *nport, bool add)
ctx->trid.traddr);
free(ctx);
} else if (spdk_nvmf_subsystem_pause(subsystem,
0,
nvmf_fc_adm_subsystem_paused_cb,
ctx)) {
SPDK_ERRLOG("Failed to pause subsystem: %s\n",
Expand Down
36 changes: 34 additions & 2 deletions lib/nvmf/nvmf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1308,6 +1308,7 @@ nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
{
int rc = 0;
struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id];
uint32_t i;

TAILQ_INIT(&sgroup->queued);

Expand All @@ -1318,6 +1319,11 @@ nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
}

sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;

for (i = 0; i < sgroup->num_ns; i++) {
sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
}

fini:
if (cb_fn) {
cb_fn(cb_arg, rc);
Expand Down Expand Up @@ -1401,6 +1407,7 @@ nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
struct spdk_nvmf_subsystem_poll_group *sgroup;
struct nvmf_qpair_disconnect_many_ctx *ctx;
int rc = 0;
uint32_t i;

ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));

Expand All @@ -1417,6 +1424,10 @@ nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
sgroup = &group->sgroups[subsystem->id];
sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;

for (i = 0; i < sgroup->num_ns; i++) {
sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
}

TAILQ_FOREACH(qpair, &group->qpairs, link) {
if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) {
break;
Expand Down Expand Up @@ -1445,9 +1456,11 @@ nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
void
nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
struct spdk_nvmf_subsystem *subsystem,
uint32_t nsid,
spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
{
struct spdk_nvmf_subsystem_poll_group *sgroup;
struct spdk_nvmf_subsystem_pg_ns_info *ns_info = NULL;
int rc = 0;

if (subsystem->id >= group->num_sgroups) {
Expand All @@ -1466,15 +1479,29 @@ nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
}
sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING;

if (sgroup->io_outstanding > 0) {
/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
if (nsid - 1 < sgroup->num_ns) {
ns_info = &sgroup->ns_info[nsid - 1];
ns_info->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
}

if (sgroup->mgmt_io_outstanding > 0) {
assert(sgroup->cb_fn == NULL);
sgroup->cb_fn = cb_fn;
assert(sgroup->cb_arg == NULL);
sgroup->cb_arg = cb_arg;
return;
}

assert(sgroup->io_outstanding == 0);
if (ns_info != NULL && ns_info->io_outstanding > 0) {
assert(sgroup->cb_fn == NULL);
sgroup->cb_fn = cb_fn;
assert(sgroup->cb_arg == NULL);
sgroup->cb_arg = cb_arg;
return;
}

assert(sgroup->mgmt_io_outstanding == 0);
sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
fini:
if (cb_fn) {
Expand All @@ -1490,6 +1517,7 @@ nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
struct spdk_nvmf_request *req, *tmp;
struct spdk_nvmf_subsystem_poll_group *sgroup;
int rc = 0;
uint32_t i;

if (subsystem->id >= group->num_sgroups) {
rc = -1;
Expand All @@ -1507,6 +1535,10 @@ nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
goto fini;
}

for (i = 0; i < sgroup->num_ns; i++) {
sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
}

sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;

/* Release all queued requests */
Expand Down
11 changes: 9 additions & 2 deletions lib/nvmf/nvmf_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ struct spdk_nvmf_subsystem_pg_ns_info {
/* Host ID for the registrants with the namespace */
struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS];
uint64_t num_blocks;

/* I/O outstanding to this namespace */
uint64_t io_outstanding;
enum spdk_nvmf_subsystem_state state;
};

typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
Expand All @@ -141,7 +145,8 @@ struct spdk_nvmf_subsystem_poll_group {
struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
uint32_t num_ns;

uint64_t io_outstanding;
/* Number of ADMIN and FABRICS requests outstanding */
uint64_t mgmt_io_outstanding;
spdk_nvmf_poll_group_mod_done cb_fn;
void *cb_arg;

Expand Down Expand Up @@ -309,7 +314,9 @@ int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
struct spdk_nvmf_subsystem *subsystem,
uint32_t nsid,
spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);

Expand Down
Loading

0 comments on commit 312a9d6

Please sign in to comment.