From 121ed7234f80f74bc43d721acecaf6737f7b9d40 Mon Sep 17 00:00:00 2001 From: Ankit Sharma Date: Tue, 16 Feb 2021 18:27:42 -0600 Subject: [PATCH] ture/Problem Description: Effective checking on the available credit Root Cause: As of now credit availability is checked only after the pacer period expiry. Hence in case if there is left our credit available will not be consumed till the next pacer period. This will cause unnecessary holding of the request till the next pacer period. Fix info: Added the Function is_credit_available() to check the credit available before expiry of the pacer period. This is to consume available credit if not consumed during the previous pacer period based on the threshold limit. (1) Added below macros to configure the credit checking /* * Configures credit availability to be calculated along with the * bytes in flight within in the allowed margin offset */ #define SPDK_NVMF_RDMA_IO_PACER_CHECK_CREDITS_ONLY_AT_PACER_PERIOD 1 #define SPDK_NVMF_RDMA_IO_PACER_ALLOW_WITHIN_CREDIT_ONLY 1 #define SPDK_NVMF_RDMA_IO_PACER_DEFAULT_MARGIN_ABOVE_CREDIT 1 (2) Added the below macros to configure the tuner instead of hardcoded values: #define SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_01 01 #define SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_02 02 [Features/Functional areas and/or component interfaces affected] Request submission within the pacer period [Whether the change affects how hardware is programmed] Yes - Will not hold the request if the credit available within the pacer period. Local testing Performed the testing with SPDK_NVMF_RDMA_IO_PACER_CHECK_CREDITS_ONLY_AT_PACER_PERIOD turned to '0' and below are the results. gobinaths@clx-ssp-025 io_pacing]$ SETUP=5 ./test.sh test_14 test_14 CPU mask , num cores 4, IO pacer period 5600, adjusted period 22400 ./test.sh: line 339: 43714 Terminated tail -f > rpc_pipe | QD | BW | BW Max | WIRE BW | AVG LAT, us | BW STDDEV | L3 Hit Rate | Bufs in-flight (MiB) | Pacer period, us | 256 | 177.9 | 191.0 | 189.3288 | 3017.4 | 7.6 | 99.3 | 32.6 (4.0) | 22.9 | 2048 | 171.5 | 190.9 | 157.3464 | 25042.2 | 8.3 | 97.0 | 36.0 (4.5) | 23.2 CPU mask , num cores 4, IO pacer period 5650, adjusted period 22600 ./test.sh: line 339: 44445 Terminated tail -f > rpc_pipe | QD | BW | BW Max | WIRE BW | AVG LAT, us | BW STDDEV | L3 Hit Rate | Bufs in-flight (MiB) | Pacer period, us | 256 | 179.6 | 190.3 | 146.3115 | 2989.0 | 6.0 | 99.4 | 37.0 (4.6) | 22.9 | 2048 | 169.3 | 190.1 | 135.8801 | 25358.7 | 8.5 | 97.1 | 65.0 (8.1) | 23.2 CPU mask , num cores 4, IO pacer period 5700, adjusted period 22800 ./test.sh: line 339: 45177 Terminated tail -f > rpc_pipe | QD | BW | BW Max | WIRE BW | AVG LAT, us | BW STDDEV | L3 Hit Rate | Bufs in-flight (MiB) | Pacer period, us | 256 | 177.9 | 189.5 | 186.3718 | 3016.9 | 6.0 | 98.8 | 32.0 (4.0) | 23.2 | 2048 | 175.6 | 189.5 | 186.3559 | 24469.5 | 5.3 | 98.7 | 53.3 (6.6) | 23.4 CPU mask , num cores 4, IO pacer period 5750, adjusted period 23000 ./test.sh: line 339: 45909 Terminated tail -f > rpc_pipe | QD | BW | BW Max | WIRE BW | AVG LAT, us | BW STDDEV | L3 Hit Rate | Bufs in-flight (MiB) | Pacer period, us | 256 | 170.3 | 188.8 | 184.2591 | 3152.5 | 8.2 | 98.9 | 37.0 (4.6) | 23.5 | 2048 | 176.5 | 188.6 | 184.7163 | 24332.3 | 5.3 | 99.0 | 39.3 (4.9) | 23.6 CPU mask , num cores 4, IO pacer period 5800, adjusted period 23200 ./test.sh: line 339: 47221 Terminated tail -f > rpc_pipe | QD | BW | BW Max | WIRE BW | AVG LAT, us | BW STDDEV | L3 Hit Rate | Bufs in-flight (MiB) | Pacer period, us | 256 | 164.5 | 187.9 | 145.8121 | 3262.9 | 8.3 | 99.2 | 30.3 (3.7) | 23.8 | 2048 | 176.4 | 187.9 | 183.0275 | 24350.6 | 4.3 | 99.1 | 55.6 (6.9) | 23.8 CPU mask , num cores 4, IO pacer period 6000, adjusted period 24000 ./test.sh: line 339: 48341 Terminated tail -f > rpc_pipe | QD | BW | BW Max | WIRE BW | AVG LAT, us | BW STDDEV | L3 Hit Rate | Bufs in-flight (MiB) | Pacer period, us | 256 | 163.5 | 184.8 | 154.4534 | 3282.1 | 7.2 | 99.4 | 59.0 (7.3) | 24.5 | 2048 | 170.2 | 194.7 | 175.3624 | 25237.4 | 5.6 | 99.1 | 41.0 (5.1) | 24.9 --- include/spdk/nvmf.h | 1 + lib/nvmf/io_pacer.c | 85 +++++++++++++++++++++++++++++++++++---------- lib/nvmf/io_pacer.h | 18 +++++++++- lib/nvmf/rdma.c | 13 +++---- 4 files changed, 91 insertions(+), 26 deletions(-) diff --git a/include/spdk/nvmf.h b/include/spdk/nvmf.h index dfc0e066911..4efec8eec2b 100644 --- a/include/spdk/nvmf.h +++ b/include/spdk/nvmf.h @@ -125,6 +125,7 @@ struct spdk_nvmf_io_pacer_stat { uint64_t calls; uint64_t no_ios; uint64_t period_ticks; + uint64_t nos_credit_unavailable; }; struct spdk_nvmf_transport_poll_group_stat { diff --git a/lib/nvmf/io_pacer.c b/lib/nvmf/io_pacer.c index f27448ed2ce..d3622cc6d68 100644 --- a/lib/nvmf/io_pacer.c +++ b/lib/nvmf/io_pacer.c @@ -57,6 +57,8 @@ struct io_pacer_queue { STAILQ_HEAD(, io_pacer_queue_entry) queue; }; +struct spdk_io_pacer_tuner; +struct spdk_io_pacer_tuner2; struct spdk_io_pacer { uint64_t period_ticks; int64_t credit; @@ -72,6 +74,11 @@ struct spdk_io_pacer { struct io_pacer_queue *queues; struct spdk_poller *poller; uint32_t disk_credit; + uint32_t pacer_tuner_type; + union { + struct spdk_io_pacer_tuner *pacer_tuner; + struct spdk_io_pacer_tuner2 *pacer_tuner2; + }; }; struct spdk_io_pacer_tuner { @@ -127,22 +134,24 @@ io_pacer_poll(void *arg) #endif /* SPDK_CONFIG_VTUNE */ pacer->stat.calls++; - if (ticks_diff < pacer->period_ticks) { - return 0; + if (ticks_diff >= pacer->period_ticks){ + pacer->stat.total_ticks = cur_tick - pacer->first_tick; + pacer->last_tick = cur_tick - ticks_diff % pacer->period_ticks; + pacer->stat.polls++; + pacer->remaining_credit = spdk_min(pacer->remaining_credit + pacer->credit, + pacer->credit); + if (pacer->num_ios == 0) { + pacer->stat.no_ios++; + } } - pacer->stat.total_ticks = cur_tick - pacer->first_tick; - pacer->last_tick = cur_tick - ticks_diff % pacer->period_ticks; - pacer->stat.polls++; - - pacer->remaining_credit = spdk_min(pacer->remaining_credit + pacer->credit, - pacer->credit); - - if (pacer->num_ios == 0) { - pacer->stat.no_ios++; +#if SPDK_NVMF_RDMA_IO_PACER_CHECK_CREDITS_ONLY_AT_PACER_PERIOD + else { + return 0; } +#endif /* SPDK_NVMF_RDMA_IO_PACER_CHECK_CREDITS_ONLY_AT_PACER_PERIOD */ while ((pacer->num_ios > 0) && - (pacer->remaining_credit > 0) && + (is_credit_available(pacer) == true) && (attempts_cnt < pacer->num_queues)) { next_queue %= pacer->num_queues; attempts_cnt++; @@ -182,7 +191,8 @@ struct spdk_io_pacer * spdk_io_pacer_create(uint32_t period_ns, uint32_t credit, uint32_t disk_credit, - spdk_io_pacer_pop_cb pop_cb) + spdk_io_pacer_pop_cb pop_cb, + uint32_t io_pacer_tuner_type) { struct spdk_io_pacer *pacer; @@ -201,6 +211,7 @@ spdk_io_pacer_create(uint32_t period_ns, pacer->pop_cb = pop_cb; pacer->first_tick = spdk_get_ticks(); pacer->last_tick = spdk_get_ticks(); + pacer->stat.nos_credit_unavailable = 0; pacer->poller = SPDK_POLLER_REGISTER(io_pacer_poll, (void *)pacer, 0); if (!pacer->poller) { SPDK_ERRLOG("Failed to create poller for IO pacer\n"); @@ -208,14 +219,15 @@ spdk_io_pacer_create(uint32_t period_ns, return NULL; } - SPDK_NOTICELOG("Created IO pacer %p: period_ns %u, period_ticks %lu, max_queues %u, credit %ld, disk_credit %u, core %u\n", + SPDK_NOTICELOG("Created IO pacer %p: period_ns %u, period_ticks %lu, max_queues %u, credit %ld, disk_credit %u, core %u, Tuner type: %u\n", pacer, period_ns, pacer->period_ticks, pacer->max_queues, pacer->credit, pacer->disk_credit, - spdk_env_get_current_core()); + spdk_env_get_current_core(), + io_pacer_tuner_type); return pacer; } @@ -435,6 +447,8 @@ spdk_io_pacer_tuner_create(struct spdk_io_pacer *pacer, tuner->step_ns = step_ns; tuner->min_pacer_period_ticks = pacer->period_ticks; tuner->max_pacer_period_ticks = 2 * tuner->min_pacer_period_ticks; + pacer->pacer_tuner_type = SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_01; + pacer->pacer_tuner = tuner; if (0 != period_us) { tuner->poller = SPDK_POLLER_REGISTER(io_pacer_tune, (void *)tuner, period_us); @@ -475,6 +489,7 @@ struct spdk_io_pacer_tuner2 { uint64_t min_pacer_period_ticks; uint64_t max_pacer_period_ticks; struct spdk_poller *poller; + uint64_t min_threshold_offset; }; static int @@ -492,14 +507,15 @@ io_pacer_tune2(void *arg) static __thread uint32_t log_counter = 0; /* Try to log once per second, else it would be too much log */ if (log_counter % (SPDK_SEC_TO_NSEC / tuner->period_ns) == 0) { - SPDK_NOTICELOG("IO pacer tuner %p: pacer %p, value %u, new period %lu ticks, min %lu, polls %u. ios %u\n", + SPDK_NOTICELOG("IO pacer tuner %p: pacer %p, value %u, new period %lu ticks, min %lu, polls %lu. ios %lu, no credit %lu", tuner, pacer, v, new_period_ticks, tuner->min_pacer_period_ticks, pacer->stat.polls, - pacer->stat.ios); + pacer->stat.ios, + pacer->stat.nos_credit_unavailable); } log_counter++; @@ -515,6 +531,7 @@ spdk_io_pacer_tuner2_create(struct spdk_io_pacer *pacer, uint64_t factor) { struct spdk_io_pacer_tuner2 *tuner; + uint32_t min_threshold_margin; assert(pacer != NULL); @@ -531,7 +548,11 @@ spdk_io_pacer_tuner2_create(struct spdk_io_pacer *pacer, tuner->factor = factor; tuner->min_pacer_period_ticks = pacer->period_ticks; tuner->max_pacer_period_ticks = 4 * tuner->min_pacer_period_ticks; + pacer->pacer_tuner_type = SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_02; + pacer->pacer_tuner2 = tuner; + min_threshold_margin = 100 / SPDK_NVMF_RDMA_IO_PACER_DEFAULT_MARGIN_ABOVE_CREDIT; /* variable parameter */ + tuner->min_threshold_offset = (tuner->min_threshold / min_threshold_margin); if (0 != period_us) { tuner->poller = SPDK_POLLER_REGISTER(io_pacer_tune2, (void *)tuner, period_us); if (!tuner->poller) { @@ -541,12 +562,13 @@ spdk_io_pacer_tuner2_create(struct spdk_io_pacer *pacer, } } - SPDK_NOTICELOG("Created IO pacer tuner %p: pacer %p, period_ns %lu, threshold %u, factor %lu\n", + SPDK_NOTICELOG("Created IO pacer tuner %p: pacer %p, period_ns %lu, threshold %u, factor %lu, allowed offset %lu\n", tuner, pacer, tuner->period_ns, tuner->min_threshold, - tuner->factor); + tuner->factor, + tuner->min_threshold_offset); return tuner; } @@ -574,3 +596,28 @@ spdk_io_pacer_tuner2_sub(struct spdk_io_pacer_tuner2 *tuner, uint32_t value) assert(tuner != NULL); tuner->value -= value; } + +bool +is_credit_available(struct spdk_io_pacer *pacer){ +#if SPDK_NVMF_RDMA_IO_PACER_ALLOW_WITHIN_CREDIT_ONLY + /* + * Check the bytes in fight within allowed offset above the minimum allowed + * threshold value + */ + if (pacer->pacer_tuner_type == SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_02){ + struct spdk_io_pacer_tuner2 *pacer_tuner = pacer->pacer_tuner2; + if ((pacer->remaining_credit > 0)&& + (pacer_tuner->value <= pacer_tuner->min_threshold + pacer_tuner->min_threshold_offset)){ + return true; + } + } else if (pacer->remaining_credit > 0){ + return true; + } +#else /* SPDK_NVMF_RDMA_IO_PACER_ALLOW_WITHIN_CREDIT_ONLY */ + else if (pacer->remaining_credit > 0){ + return true; + } +#endif /* SPDK_NVMF_RDMA_IO_PACER_ALLOW_WITHIN_CREDIT_ONLY */ + pacer->stat.nos_credit_unavailable++; + return false; +} diff --git a/lib/nvmf/io_pacer.h b/lib/nvmf/io_pacer.h index d3ad21e1ebb..beaccb4f735 100644 --- a/lib/nvmf/io_pacer.h +++ b/lib/nvmf/io_pacer.h @@ -42,6 +42,19 @@ #include "spdk_internal/log.h" #include "spdk/nvmf.h" +/* Tuner types +*/ +#define SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_01 01 +#define SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_02 02 + +/* + * Configures credit availability to be calculated along with the + * bytes in flight within in the allowed margin offset + */ +#define SPDK_NVMF_RDMA_IO_PACER_CHECK_CREDITS_ONLY_AT_PACER_PERIOD 1 +#define SPDK_NVMF_RDMA_IO_PACER_ALLOW_WITHIN_CREDIT_ONLY 1 +#define SPDK_NVMF_RDMA_IO_PACER_DEFAULT_MARGIN_ABOVE_CREDIT 1 + struct spdk_io_pacer; struct spdk_io_pacer_tuner; struct spdk_io_pacer_tuner2; @@ -68,7 +81,8 @@ typedef void (*spdk_io_pacer_pop_cb)(void *io); struct spdk_io_pacer *spdk_io_pacer_create(uint32_t period_ns, uint32_t credit, uint32_t disk_credit, - spdk_io_pacer_pop_cb pop_cb); + spdk_io_pacer_pop_cb pop_cb, + uint32_t io_pacer_tuner_type); void spdk_io_pacer_destroy(struct spdk_io_pacer *pacer); int spdk_io_pacer_create_queue(struct spdk_io_pacer *pacer, uint64_t key); int spdk_io_pacer_destroy_queue(struct spdk_io_pacer *pacer, uint64_t key); @@ -89,6 +103,8 @@ void spdk_io_pacer_tuner2_destroy(struct spdk_io_pacer_tuner2 *tuner); void spdk_io_pacer_tuner2_add(struct spdk_io_pacer_tuner2 *tuner, uint32_t value); void spdk_io_pacer_tuner2_sub(struct spdk_io_pacer_tuner2 *tuner, uint32_t value); +bool is_credit_available(struct spdk_io_pacer *pacer); + static inline void drive_stats_lock(struct spdk_io_pacer_drives_stats *stats) { rte_spinlock_lock(&stats->lock); } diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c index 0b8e03a6178..bba214fe2aa 100644 --- a/lib/nvmf/rdma.c +++ b/lib/nvmf/rdma.c @@ -2227,7 +2227,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport, STAILQ_REMOVE_HEAD(&rgroup->group.pending_buf_queue, buf_link); - if (rgroup->pacer_tuner2 && (rtransport->transport.opts.io_pacer_tuner_type == 1)) { + if (rgroup->pacer_tuner2 && (rtransport->transport.opts.io_pacer_tuner_type == SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_02)) { spdk_io_pacer_tuner2_add(rgroup->pacer_tuner2, rdma_req->req.length); } @@ -2406,7 +2406,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport, } if (rgroup->pacer_tuner2 && - (rtransport->transport.opts.io_pacer_tuner_type == 1)) { + (rtransport->transport.opts.io_pacer_tuner_type == SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_02)) { spdk_io_pacer_tuner2_sub(rgroup->pacer_tuner2, rdma_req->req.length); } @@ -2446,7 +2446,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport, #define SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP false #define SPDK_NVMF_RDMA_DEFAULT_IO_PACER_PERIOD 0 #define SPDK_NVMF_RDMA_DEFAULT_IO_PACER_THRESHOLD 0 -#define SPDK_NVMF_RDMA_DEFAULT_IO_PACER_TUNER_TYPE 1 /* Buffers based tuner */ +#define SPDK_NVMF_RDMA_DEFAULT_IO_PACER_TUNER_TYPE SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_02 /* Buffers based tuner */ #define SPDK_NVMF_RDMA_DEFAULT_IO_PACER_TUNER_PERIOD 10000 /* us */ #define SPDK_NVMF_RDMA_DEFAULT_IO_PACER_TUNER_STEP 1000 /* ns */ #define SPDK_NVMF_RDMA_DEFAULT_IO_PACER_TUNER_THRESHOLD 12*1024*1024 @@ -3561,7 +3561,8 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport) rgroup->pacer = spdk_io_pacer_create(transport->opts.io_pacer_period, transport->opts.io_pacer_credit, transport->opts.io_pacer_disk_credit, - nvmf_rdma_io_pacer_pop_cb); + nvmf_rdma_io_pacer_pop_cb, + transport->opts.io_pacer_tuner_type); if (!rgroup->pacer) { SPDK_ERRLOG("Failed to create IO pacer\n"); spdk_nvmf_rdma_poll_group_destroy(&rgroup->group); @@ -3569,7 +3570,7 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport) return NULL; } - if (transport->opts.io_pacer_tuner_type == 0) { + if (transport->opts.io_pacer_tuner_type == SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_01) { rgroup->pacer_tuner = spdk_io_pacer_tuner_create(rgroup->pacer, transport->opts.io_pacer_tuner_period, transport->opts.io_pacer_tuner_step); @@ -3718,7 +3719,7 @@ spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) rtransport = SPDK_CONTAINEROF(rgroup->group.transport, struct spdk_nvmf_rdma_transport, transport); if (rgroup->pacer) { - if (rtransport->transport.opts.io_pacer_tuner_type == 0) { + if (rtransport->transport.opts.io_pacer_tuner_type == SPDK_NVMF_RDMA_IO_PACER_TUNER_TYPE_01) { spdk_io_pacer_tuner_destroy(rgroup->pacer_tuner); } else { spdk_io_pacer_tuner2_destroy(rgroup->pacer_tuner2);