Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

common/swdptap: make SWD timing more consistent #1714

Merged
merged 4 commits into from
Jul 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 92 additions & 21 deletions src/platforms/common/swdptap.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ static bool swdptap_seq_in_parity(uint32_t *ret, size_t clock_cycles) __attribut
static void swdptap_seq_out(uint32_t tms_states, size_t clock_cycles) __attribute__((optimize(3)));
static void swdptap_seq_out_parity(uint32_t tms_states, size_t clock_cycles) __attribute__((optimize(3)));

/*
* Overall strategy for timing consistency:
*
* - Each primitive ends with a falling clock edge
* - Output is driven after the falling clock edge
* - Input is read immediately before the rising clock edge
* - Each primitive assumes it was immediately preceded by a falling clock edge
*
* This increases the chances of meeting setup and hold times when the target
* connection is lower bandwidth (with adequately slower clocks configured).
*/

void swdptap_init(void)
{
swd_proc.seq_in = swdptap_seq_in;
Expand All @@ -68,18 +80,16 @@ static void swdptap_turnaround(const swdio_status_t dir)

if (dir == SWDIO_STATUS_FLOAT) {
SWDIO_MODE_FLOAT();
} else
gpio_clear(SWCLK_PORT, SWCLK_PIN);

}
for (volatile uint32_t counter = target_clk_divider + 1; counter > 0; --counter)
continue;

gpio_set(SWCLK_PORT, SWCLK_PIN);
for (volatile uint32_t counter = target_clk_divider + 1; counter > 0; --counter)
continue;

gpio_clear(SWCLK_PORT, SWCLK_PIN);
if (dir == SWDIO_STATUS_DRIVE) {
gpio_clear(SWCLK_PORT, SWCLK_PIN);
SWDIO_MODE_DRIVE();
}
}
Expand All @@ -89,31 +99,59 @@ static uint32_t swdptap_seq_in_clk_delay(size_t clock_cycles) __attribute__((opt
static uint32_t swdptap_seq_in_clk_delay(const size_t clock_cycles)
{
uint32_t value = 0;
for (size_t cycle = 0; cycle < clock_cycles; ++cycle) {
gpio_clear(SWCLK_PORT, SWCLK_PIN);
value |= gpio_get(SWDIO_IN_PORT, SWDIO_IN_PIN) ? 1U << cycle : 0U;
if (!clock_cycles)
return 0;
/*
* Count down instead of up, because with an up-count, some ARM-GCC
* versions use an explicit CMP, missing the optimization of converting
* to a faster down-count that uses SUBS followed by BCS/BCC.
*/
for (size_t cycle = clock_cycles; cycle--;) {
for (volatile uint32_t counter = target_clk_divider; counter > 0; --counter)
continue;
const bool bit = gpio_get(SWDIO_IN_PORT, SWDIO_IN_PIN);
gpio_set(SWCLK_PORT, SWCLK_PIN);
for (volatile uint32_t counter = target_clk_divider; counter > 0; --counter)
continue;
value >>= 1U;
value |= (uint32_t)bit << 31U;
/* Reordering barrier */
__asm__("" ::: "memory");
gpio_clear(SWCLK_PORT, SWCLK_PIN);
/* Reordering barrier */
__asm__("" ::: "memory");
}
gpio_clear(SWCLK_PORT, SWCLK_PIN);
value >>= (32U - clock_cycles);
return value;
}

static uint32_t swdptap_seq_in_no_delay(size_t clock_cycles) __attribute__((optimize(3)));

static uint32_t swdptap_seq_in_no_delay(const size_t clock_cycles)
{
if (!clock_cycles)
tlyu marked this conversation as resolved.
Show resolved Hide resolved
return 0;
uint32_t value = 0;
for (size_t cycle = 0; cycle < clock_cycles; ++cycle) {
gpio_clear(SWCLK_PORT, SWCLK_PIN);
value |= gpio_get(SWDIO_IN_PORT, SWDIO_IN_PIN) ? 1U << cycle : 0U;
/*
* Count down instead of up, because with an up-count, some ARM-GCC
* versions use an explicit CMP, missing the optimization of converting
* to a faster down-count that uses SUBS followed by BCS/BCC.
*/
for (size_t cycle = clock_cycles; cycle--;) {
/* Reordering barrier */
__asm__("" ::: "memory");
bool bit = gpio_get(SWDIO_IN_PORT, SWDIO_IN_PIN);
gpio_set(SWCLK_PORT, SWCLK_PIN);
__asm__("nop");
__asm__("nop" ::: "memory");
value >>= 1U;
value |= (uint32_t)bit << 31U;
/* Reordering barrier */
__asm__("" ::: "memory");
gpio_clear(SWCLK_PORT, SWCLK_PIN);
/* Reordering barrier */
__asm__("" ::: "memory");
}
gpio_clear(SWCLK_PORT, SWCLK_PIN);
value >>= (32U - clock_cycles);
return value;
}

Expand All @@ -132,43 +170,76 @@ static bool swdptap_seq_in_parity(uint32_t *ret, size_t clock_cycles)
for (volatile uint32_t counter = target_clk_divider + 1; counter > 0; --counter)
continue;

const bool parity = calculate_odd_parity(result);
const bool bit = gpio_get(SWDIO_IN_PORT, SWDIO_IN_PIN);

gpio_set(SWCLK_PORT, SWCLK_PIN);
for (volatile uint32_t counter = target_clk_divider + 1; counter > 0; --counter)
continue;

*ret = result;
gpio_clear(SWCLK_PORT, SWCLK_PIN);
/* Terminate the read cycle now */
swdptap_turnaround(SWDIO_STATUS_DRIVE);

const bool parity = calculate_odd_parity(result);
*ret = result;
return parity == bit;
}

static void swdptap_seq_out_clk_delay(uint32_t tms_states, size_t clock_cycles) __attribute__((optimize(3)));

static void swdptap_seq_out_clk_delay(const uint32_t tms_states, const size_t clock_cycles)
{
for (size_t cycle = 0; cycle < clock_cycles; ++cycle) {
gpio_clear(SWCLK_PORT, SWCLK_PIN);
gpio_set_val(SWDIO_PORT, SWDIO_PIN, tms_states & (1U << cycle));
uint32_t value = tms_states;
bool bit = value & 1U;
if (!clock_cycles)
return;
/*
* Count down instead of up, because with an up-count, some ARM-GCC
* versions use an explicit CMP, missing the optimization of converting
* to a faster down-count that uses SUBS followed by BCS/BCC.
*/
for (size_t cycle = clock_cycles; cycle--;) {
/* Reordering barrier */
__asm__("" ::: "memory");
gpio_set_val(SWDIO_PORT, SWDIO_PIN, bit);
for (volatile uint32_t counter = target_clk_divider; counter > 0; --counter)
continue;
gpio_set(SWCLK_PORT, SWCLK_PIN);
for (volatile uint32_t counter = target_clk_divider; counter > 0; --counter)
continue;
__asm__("nop" ::: "memory");
value >>= 1U;
bit = value & 1U;
/* Reordering barrier */
__asm__("" ::: "memory");
gpio_clear(SWCLK_PORT, SWCLK_PIN);
}
gpio_clear(SWCLK_PORT, SWCLK_PIN);
}

static void swdptap_seq_out_no_delay(uint32_t tms_states, size_t clock_cycles) __attribute__((optimize(3)));

static void swdptap_seq_out_no_delay(const uint32_t tms_states, const size_t clock_cycles)
{
for (size_t cycle = 0; cycle < clock_cycles; ++cycle) {
uint32_t value = tms_states;
bool bit = value & 1U;
if (!clock_cycles)
return;
/*
* Count down instead of up, because with an up-count, some ARM-GCC
* versions use an explicit CMP, missing the optimization of converting
* to a faster down-count that uses SUBS followed by BCS/BCC.
*/
for (size_t cycle = clock_cycles; cycle--;) {
/* Reordering barrier */
__asm__("" ::: "memory");
gpio_clear(SWCLK_PORT, SWCLK_PIN);
gpio_set_val(SWDIO_PORT, SWDIO_PIN, tms_states & (1U << cycle));
gpio_set_val(SWDIO_PORT, SWDIO_PIN, bit);
gpio_set(SWCLK_PORT, SWCLK_PIN);
__asm__("nop" ::: "memory");
value >>= 1U;
bit = value & 1U;
/* Reordering barrier */
__asm__("" ::: "memory");
}
gpio_clear(SWCLK_PORT, SWCLK_PIN);
}
Expand Down
Loading